# **Preprocessing - Normaliziing**

## Normalization is used to rescale the values of numeric features to a standard range, often between 0 and 1, to ensure that each feature contributes equally to the model.


# **Step 1: Loading the Dataset**

In [2]:
import pandas as pd

df = pd.read_csv('./content/food_coded.csv')

# **Step 2: Exploring the Dataset**

Let's Perform some Basic Cleaning Steps and also get to know about the Dataset

In [3]:
df.head()

Unnamed: 0,GPA,Gender,breakfast,calories_chicken,calories_day,calories_scone,coffee,comfort_food,comfort_food_reasons,comfort_food_reasons_coded,...,soup,sports,thai_food,tortilla_calories,turkey_calories,type_sports,veggies_day,vitamins,waffle_calories,weight
0,2.4,2,1,430,,315.0,1,none,we dont have comfort,9.0,...,1.0,1.0,1,1165.0,345,car racing,5,1,1315,187
1,3.654,1,1,610,3.0,420.0,2,"chocolate, chips, ice cream","Stress, bored, anger",1.0,...,1.0,1.0,2,725.0,690,Basketball,4,2,900,155
2,3.3,1,1,720,4.0,420.0,2,"frozen yogurt, pizza, fast food","stress, sadness",1.0,...,1.0,2.0,5,1165.0,500,none,5,1,900,I'm not answering this.
3,3.2,1,1,430,3.0,420.0,2,"Pizza, Mac and cheese, ice cream",Boredom,2.0,...,1.0,2.0,5,725.0,690,,3,1,1315,"Not sure, 240"
4,3.5,1,1,720,2.0,420.0,2,"Ice cream, chocolate, chips","Stress, boredom, cravings",1.0,...,1.0,1.0,4,940.0,500,Softball,4,2,760,190


In [4]:
df.describe()

Unnamed: 0,Gender,breakfast,calories_chicken,calories_day,calories_scone,coffee,comfort_food_reasons_coded,cook,comfort_food_reasons_coded.1,cuisine,...,persian_food,self_perception_weight,soup,sports,thai_food,tortilla_calories,turkey_calories,veggies_day,vitamins,waffle_calories
count,125.0,125.0,125.0,106.0,124.0,125.0,106.0,122.0,125.0,108.0,...,124.0,124.0,124.0,123.0,125.0,124.0,125.0,125.0,125.0,125.0
mean,1.392,1.112,577.32,3.028302,505.241935,1.752,2.698113,2.786885,2.688,1.388889,...,2.806452,3.120968,1.217742,1.390244,3.336,947.580645,555.04,4.008,1.512,1073.4
std,0.490161,0.316636,131.214156,0.639308,230.840506,0.43359,1.972042,1.038351,1.910987,0.974759,...,1.423824,1.11598,0.414385,0.4898,1.436528,202.090179,152.370379,1.081337,0.501867,248.667092
min,1.0,1.0,265.0,2.0,315.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,580.0,345.0,1.0,1.0,575.0
25%,1.0,1.0,430.0,3.0,420.0,2.0,2.0,2.0,2.0,1.0,...,2.0,2.0,1.0,1.0,2.0,725.0,500.0,3.0,1.0,900.0
50%,1.0,1.0,610.0,3.0,420.0,2.0,2.0,3.0,2.0,1.0,...,3.0,3.0,1.0,1.0,3.0,940.0,500.0,4.0,2.0,900.0
75%,2.0,1.0,720.0,3.0,420.0,2.0,3.0,3.0,3.0,1.0,...,4.0,4.0,1.0,2.0,5.0,1165.0,690.0,5.0,2.0,1315.0
max,2.0,2.0,720.0,4.0,980.0,2.0,9.0,5.0,9.0,6.0,...,5.0,6.0,2.0,2.0,5.0,1165.0,850.0,5.0,2.0,1315.0


In [6]:
df.isnull().sum()

Unnamed: 0,0
GPA,2
Gender,0
breakfast,0
calories_chicken,0
calories_day,19
...,...
type_sports,26
veggies_day,0
vitamins,0
waffle_calories,0


# **Step 3: Cleaning the Dataset**

In [8]:
for column in df.select_dtypes(include=['object']).columns:
    df[column].fillna(df[column].mode()[0], inplace=True)

In [11]:
for column in df.select_dtypes(include=['number']).columns:
    df[column].fillna(df[column].median(), inplace=True)

# **Step 4: Encode Categorical Variables**

In [12]:
# Label Encoding
from sklearn.preprocessing import LabelEncoder

label_encoder = LabelEncoder()
df['Gender'] = label_encoder.fit_transform(df['Gender'])

In [13]:
# One Hot Encoding
df = pd.get_dummies(df, columns=['cuisine'], drop_first=True)

# **Step 5: Normalize the Dataset**

In [22]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()

# List of numerical columns to normalize
numerical_columns = ['calories_chicken', 'calories_day', 'calories_scone']

df[numerical_columns] = scaler.fit_transform(df[numerical_columns])

# **Step 6: Scale the Dataset (Alternative to Normalization)**

In [24]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
df[numerical_columns] = scaler.fit_transform(df[numerical_columns])

# **Step 7: Standardize the Dataset Using Z-Score**

In [29]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
numerical_columns = df.select_dtypes(include='number').columns
df[numerical_columns] = scaler.fit_transform(df[numerical_columns])

# **Step 8: Save the Dataset for Further Use**

In [30]:
df.to_csv('Normalized_food_score.csv', index=False)

In [31]:
df.isnull().sum()

Unnamed: 0,0
GPA,0
Gender,0
breakfast,0
calories_chicken,0
calories_day,0
...,...
cuisine_2.0,0
cuisine_3.0,0
cuisine_4.0,0
cuisine_5.0,0


# **In this Experiment we Learnt About Various Steps for Normalizing a Provided Dataset**