# **Loading the Dataset**

In [24]:
import pandas as pd

df = pd.read_csv('/content/food_coded.csv')

# **Understanding the Data**

In [25]:
print(df.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 125 entries, 0 to 124
Data columns (total 61 columns):
 #   Column                        Non-Null Count  Dtype  
---  ------                        --------------  -----  
 0   GPA                           123 non-null    object 
 1   Gender                        125 non-null    int64  
 2   breakfast                     125 non-null    int64  
 3   calories_chicken              125 non-null    int64  
 4   calories_day                  106 non-null    float64
 5   calories_scone                124 non-null    float64
 6   coffee                        125 non-null    int64  
 7   comfort_food                  124 non-null    object 
 8   comfort_food_reasons          123 non-null    object 
 9   comfort_food_reasons_coded    106 non-null    float64
 10  cook                          122 non-null    float64
 11  comfort_food_reasons_coded.1  125 non-null    int64  
 12  cuisine                       108 non-null    float64
 13  diet_

In [26]:
print(df.head())

     GPA  Gender  breakfast  calories_chicken  calories_day  calories_scone  \
0    2.4       2          1               430           NaN           315.0   
1  3.654       1          1               610           3.0           420.0   
2    3.3       1          1               720           4.0           420.0   
3    3.2       1          1               430           3.0           420.0   
4    3.5       1          1               720           2.0           420.0   

   coffee                      comfort_food        comfort_food_reasons  \
0       1                              none       we dont have comfort    
1       2       chocolate, chips, ice cream        Stress, bored, anger   
2       2   frozen yogurt, pizza, fast food             stress, sadness   
3       2  Pizza, Mac and cheese, ice cream                     Boredom   
4       2      Ice cream, chocolate, chips   Stress, boredom, cravings    

   comfort_food_reasons_coded  ...  soup  sports  thai_food tortilla_calor

In [27]:
print(df.describe())

           Gender   breakfast  calories_chicken  calories_day  calories_scone  \
count  125.000000  125.000000        125.000000    106.000000      124.000000   
mean     1.392000    1.112000        577.320000      3.028302      505.241935   
std      0.490161    0.316636        131.214156      0.639308      230.840506   
min      1.000000    1.000000        265.000000      2.000000      315.000000   
25%      1.000000    1.000000        430.000000      3.000000      420.000000   
50%      1.000000    1.000000        610.000000      3.000000      420.000000   
75%      2.000000    1.000000        720.000000      3.000000      420.000000   
max      2.000000    2.000000        720.000000      4.000000      980.000000   

          coffee  comfort_food_reasons_coded        cook  \
count  125.00000                  106.000000  122.000000   
mean     1.75200                    2.698113    2.786885   
std      0.43359                    1.972042    1.038351   
min      1.00000              

# **Handling Missing Values**



*   Identifying Missing Values


In [28]:
print(df.isnull().sum())

GPA                  2
Gender               0
breakfast            0
calories_chicken     0
calories_day        19
                    ..
type_sports         26
veggies_day          0
vitamins             0
waffle_calories      0
weight               2
Length: 61, dtype: int64




#**Stratergy for handling missing data**




1.   Drop rows/column


In [29]:
# drop specific row
df.dropna()

Unnamed: 0,GPA,Gender,breakfast,calories_chicken,calories_day,calories_scone,coffee,comfort_food,comfort_food_reasons,comfort_food_reasons_coded,...,soup,sports,thai_food,tortilla_calories,turkey_calories,type_sports,veggies_day,vitamins,waffle_calories,weight
1,3.654,1,1,610,3.0,420.0,2,"chocolate, chips, ice cream","Stress, bored, anger",1.0,...,1.0,1.0,2,725.0,690,Basketball,4,2,900,155
2,3.3,1,1,720,4.0,420.0,2,"frozen yogurt, pizza, fast food","stress, sadness",1.0,...,1.0,2.0,5,1165.0,500,none,5,1,900,I'm not answering this.
4,3.5,1,1,720,2.0,420.0,2,"Ice cream, chocolate, chips","Stress, boredom, cravings",1.0,...,1.0,1.0,4,940.0,500,Softball,4,2,760,190
6,3.8,2,1,610,3.0,420.0,2,"Chocolate, ice cream, french fries, pretzels","stress, boredom",1.0,...,1.0,1.0,5,940.0,690,soccer,4,1,1315,180
7,3.3,1,1,720,3.0,420.0,1,"Ice cream, cheeseburgers, chips.",I eat comfort food when im stressed out from s...,1.0,...,1.0,2.0,1,725.0,500,none,4,2,1315,137
9,3.3,1,1,430,3.0,315.0,2,"Mac and cheese, chocolate, and pasta","Stress, anger and sadness",1.0,...,1.0,1.0,4,580.0,345,field hockey,5,1,900,125
10,3.5,1,1,610,3.0,980.0,2,"Pasta, grandma homemade chocolate cake anythin...",Boredom,2.0,...,1.0,1.0,2,940.0,345,soccer,5,2,900,116
11,3.904,1,1,720,4.0,420.0,2,"chocolate, pasta, soup, chips, popcorn","sadness, stress, cold weather",3.0,...,1.0,1.0,5,940.0,500,Running,5,1,900,110
12,3.4,2,1,430,3.0,420.0,2,"Cookies, popcorn, and chips","Sadness, boredom, late night snack",3.0,...,2.0,1.0,3,940.0,500,Soccer and basketball,3,2,575,264
13,3.6,1,1,610,3.0,420.0,2,"ice cream, cake, chocolate","stress, boredom, special occasions",1.0,...,1.0,1.0,5,1165.0,850,intramural volleyball,5,2,1315,123


In [31]:
#Drop Column
df = df.drop(columns=['type_sports'])



2.   Impute Missing Values



In [36]:
# Convert Non-Numeric to Numeric
df['weight'] = pd.to_numeric(df['weight'], errors='coerce')

In [38]:
df['weight'].fillna(df['weight'].mean(), inplace=True) # Mean Imputation
df['weight'].fillna(df['weight'].mode()[0], inplace=True) # Mode Imputation

In [39]:
df

Unnamed: 0,GPA,Gender,breakfast,calories_chicken,calories_day,calories_scone,coffee,comfort_food,comfort_food_reasons,comfort_food_reasons_coded,...,self_perception_weight,soup,sports,thai_food,tortilla_calories,turkey_calories,veggies_day,vitamins,waffle_calories,weight
0,2.4,2,1,430,,315.0,1,none,we dont have comfort,9.0,...,3.0,1.0,1.0,1,1165.0,345,5,1,1315,187.0
1,3.654,1,1,610,3.0,420.0,2,"chocolate, chips, ice cream","Stress, bored, anger",1.0,...,3.0,1.0,1.0,2,725.0,690,4,2,900,155.0
2,3.3,1,1,720,4.0,420.0,2,"frozen yogurt, pizza, fast food","stress, sadness",1.0,...,6.0,1.0,2.0,5,1165.0,500,5,1,900,158.5
3,3.2,1,1,430,3.0,420.0,2,"Pizza, Mac and cheese, ice cream",Boredom,2.0,...,5.0,1.0,2.0,5,725.0,690,3,1,1315,158.5
4,3.5,1,1,720,2.0,420.0,2,"Ice cream, chocolate, chips","Stress, boredom, cravings",1.0,...,4.0,1.0,1.0,4,940.0,500,4,2,760,190.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
120,3.5,1,1,610,4.0,420.0,2,"wine. mac and cheese, pizza, ice cream",boredom and sadness,,...,4.0,1.0,1.0,5,940.0,500,5,1,1315,156.0
121,3,1,1,265,2.0,315.0,2,Pizza / Wings / Cheesecake,Loneliness / Homesick / Sadness,,...,4.0,1.0,,4,940.0,500,5,2,1315,180.0
122,3.882,1,1,720,,420.0,1,"rice, potato, seaweed soup",sadness,,...,4.0,1.0,2.0,5,580.0,690,4,2,1315,120.0
123,3,2,1,720,4.0,420.0,1,"Mac n Cheese, Lasagna, Pizza","happiness, they are some of my favorite foods",,...,2.0,2.0,2.0,1,940.0,500,3,1,1315,135.0
