# Project - Local Food Wastage management system

## Name - Nilesh Bahirgaonkar 

# Data Preparation

## Importing Libraries 

In [1]:
import pandas as pd 

## For Claim Data 

In [2]:
claim_data = pd.read_csv('claims_data.csv')     # Loading the csv file 

In [3]:
claim_data.head()      # first view of the dataset

Unnamed: 0,Claim_ID,Food_ID,Receiver_ID,Status,Timestamp
0,1,164,908,Pending,3/5/2025 5:26
1,2,353,391,Cancelled,3/11/2025 10:24
2,3,626,492,Completed,3/21/2025 0:59
3,4,61,933,Cancelled,3/4/2025 9:08
4,5,345,229,Pending,3/14/2025 15:17


In [4]:
claim_data.info()     # Info of dataset 

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 5 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   Claim_ID     1000 non-null   int64 
 1   Food_ID      1000 non-null   int64 
 2   Receiver_ID  1000 non-null   int64 
 3   Status       1000 non-null   object
 4   Timestamp    1000 non-null   object
dtypes: int64(3), object(2)
memory usage: 39.2+ KB


In [5]:
# We can see that, the timestamp is in object datatype and it should be in datetime. So converting it  

In [6]:
from datetime import datetime

In [7]:
claim_data['Timestamp'] = pd.to_datetime(claim_data['Timestamp'])

In [8]:
# Now let's check for missing values and duplicate ID's

In [9]:
claim_data.isnull().sum()

Claim_ID       0
Food_ID        0
Receiver_ID    0
Status         0
Timestamp      0
dtype: int64

In [10]:
# No missing values 

In [11]:
claim_data['Claim_ID'].duplicated().sum()

0

In [12]:
claim_data['Food_ID'].duplicated().sum()

353

In [13]:
claim_data['Receiver_ID'].duplicated().sum()

376

In [14]:
# We see that there are 353 duplicate values in Food_ID, but the food is repeated so no need to worry about it.
# Also we see that Receiver_ID also has 376 duplicate values, again no need to varry about it, 
# Claim_ID is important and it has unique ID's.

In [15]:
# Saving it in csv format 

In [16]:
claim_data.to_csv('Claim_data_Pre.csv',index=False)

## For Food Listings data 

In [17]:
fld = pd.read_csv('food_listings_data.csv',index_col=False)     # loading the data

In [18]:
fld.head()     # first view of data

Unnamed: 0,Food_ID,Food_Name,Quantity,Expiry_Date,Provider_ID,Provider_Type,Location,Food_Type,Meal_Type
0,1,Bread,43,3/17/2025,110,Grocery Store,South Kellyville,Non-Vegetarian,Breakfast
1,2,Soup,22,3/24/2025,791,Grocery Store,West James,Non-Vegetarian,Dinner
2,3,Fruits,46,3/28/2025,478,Catering Service,Lake Regina,Vegan,Breakfast
3,4,Fruits,15,3/16/2025,930,Restaurant,Kellytown,Vegan,Lunch
4,5,Soup,14,3/19/2025,279,Restaurant,Garciaport,Vegan,Dinner


In [19]:
fld.info()    # Dataset info 

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 9 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   Food_ID        1000 non-null   int64 
 1   Food_Name      1000 non-null   object
 2   Quantity       1000 non-null   int64 
 3   Expiry_Date    1000 non-null   object
 4   Provider_ID    1000 non-null   int64 
 5   Provider_Type  1000 non-null   object
 6   Location       1000 non-null   object
 7   Food_Type      1000 non-null   object
 8   Meal_Type      1000 non-null   object
dtypes: int64(3), object(6)
memory usage: 70.4+ KB


In [20]:
# Converting Expiry_Date to datetime

In [21]:
fld['Expiry_Date'] = pd.to_datetime(fld['Expiry_Date'])

In [22]:
# Now let's check for missing values and duplicate ID's

In [23]:
fld.isnull().sum()

Food_ID          0
Food_Name        0
Quantity         0
Expiry_Date      0
Provider_ID      0
Provider_Type    0
Location         0
Food_Type        0
Meal_Type        0
dtype: int64

In [24]:
# No Null values

In [25]:
# Checking for duplicate values 

In [26]:
fld.duplicated().sum()

0

In [27]:
# No Duplicates 

In [28]:
# Saving it in csv format

In [29]:
fld.to_csv('food_listing_pre.csv',index=False)

## For Providers data

In [30]:
psd = pd.read_csv('providers_data.csv')

In [31]:
psd.head()

Unnamed: 0,Provider_ID,Name,Type,Address,City,Contact
0,1,Gonzales-Cochran,Supermarket,"74347 Christopher Extensions\nAndreamouth, OK ...",New Jessica,+1-600-220-0480
1,2,"Nielsen, Johnson and Fuller",Grocery Store,"91228 Hanson Stream\nWelchtown, OR 27136",East Sheena,+1-925-283-8901x6297
2,3,Miller-Black,Supermarket,"561 Martinez Point Suite 507\nGuzmanchester, W...",Lake Jesusview,001-517-295-2206
3,4,"Clark, Prince and Williams",Grocery Store,"467 Bell Trail Suite 409\nPort Jesus, IA 61188",Mendezmouth,556.944.8935x401
4,5,Coleman-Farley,Grocery Store,"078 Matthew Creek Apt. 319\nSaraborough, MA 53978",Valentineside,193.714.6577


In [32]:
psd.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 6 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   Provider_ID  1000 non-null   int64 
 1   Name         1000 non-null   object
 2   Type         1000 non-null   object
 3   Address      1000 non-null   object
 4   City         1000 non-null   object
 5   Contact      1000 non-null   object
dtypes: int64(1), object(5)
memory usage: 47.0+ KB


In [33]:
# No need to change datatype 

In [34]:
# Checking for null values and duplicates 

In [35]:
psd.isnull().sum()

Provider_ID    0
Name           0
Type           0
Address        0
City           0
Contact        0
dtype: int64

In [36]:
# no null values

In [37]:
psd.duplicated().sum()

0

In [38]:
# No duplicates

In [39]:
# Saving it in csv format 

In [40]:
psd.to_csv('Providers_data_Pre.csv',index=False)

## For Receiver's data

In [41]:
rd = pd.read_csv('receivers_data.csv')

In [42]:
rd.head()

Unnamed: 0,Receiver_ID,Name,Type,City,Contact
0,1,Donald Gomez,Shelter,Port Carlburgh,(955)922-5295
1,2,Laurie Ramos,Individual,Lewisburgh,761.042.1570
2,3,Ashley Mckee,NGO,South Randalltown,691-023-0094x856
3,4,Erika Rose,NGO,South Shaneville,8296491111
4,5,John Romero,Individual,Bakerport,067.491.0154


In [43]:
rd.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 5 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   Receiver_ID  1000 non-null   int64 
 1   Name         1000 non-null   object
 2   Type         1000 non-null   object
 3   City         1000 non-null   object
 4   Contact      1000 non-null   object
dtypes: int64(1), object(4)
memory usage: 39.2+ KB


In [44]:
# Checking for null values 

In [45]:
rd.isnull().sum()

Receiver_ID    0
Name           0
Type           0
City           0
Contact        0
dtype: int64

In [46]:
# No null values 

In [47]:
# Checking for duplicates 

In [48]:
rd.duplicated().sum()

0

In [49]:
# No duplicates

In [50]:
# Saving it in csv format 

In [51]:
rd.to_csv('Receivers_data_Pre.csv',index=False)