# Dataset Processing For Food Recommendation

## Install Dependencies

In [None]:
%pip install numpy
%pip install pandas

## Import Dependencies

In [1]:
import os
import numpy as np
import pandas as pd

## Load Dataset

In [2]:
# Variable
fileName = 'nutritions.csv'

# Do not change
filePath = os.path.join(os.getcwd(), 'data', fileName)
data = pd.read_csv(filePath)
data.head()

Unnamed: 0,Food,Category,Serving Size (g),Calories,Protein (g),Fats (g),Vitamin A (IU),Vitamin C (mg),Vitamin D (IU),Vitamin E (mg),Vitamin B1 (mg),Vitamin B2 (mg)
0,Chicken Breast,Meat,100,165,31.0,3.6,0,0,0.0,0.0,7.0,6.0
1,Salmon,Fish,100,206,22.0,12.0,4,0,91.0,1.0,5.0,22.0
2,Brocoli,Vegetables,100,55,4.0,0.6,12,135,0.0,4.0,8.0,7.0
3,Carrots,Vegetables,100,41,0.9,0.2,334,9,0.0,2.0,4.0,3.0
4,Apples,Fruits,100,52,0.3,0.2,1,8,0.0,1.0,1.0,1.0


## Dataset Count

In [4]:
# Do not change
data.shape

(41, 12)

## Check For Missing Value

In [3]:
# Do not change
data.isnull().sum()

Food                0
Category            0
Serving Size (g)    0
Calories            0
Protein (g)         0
Fats (g)            0
Vitamin A (IU)      0
Vitamin C (mg)      0
Vitamin D (IU)      0
Vitamin E (mg)      0
Vitamin B1 (mg)     0
Vitamin B2 (mg)     0
dtype: int64

## Add A New Column

Based on these rules:
- Severely Stunting: High-calorie, protein-rich, and micronutrient-dense foods.
- Stunting: Balanced nutrition with emphasis on growth-promoting nutrients.
- Normal: Maintenance-focused diet.
- High: Energy-dense foods if needed.

We need to group the food using these rules:
- Food suitable for Severely Stunting: Foods with Calories ≥ 200 and Protein ≥ 20g.
- Food suitable for Stunting: Foods with Calories ≥ 150 and Protein ≥ 10g.
- Food suitable for Normal: Foods with 100 ≤ Calories < 150.
- Food suitable for High: Foods that do not meet the above criteria.

In [5]:
# Do not change
def classify_nutrition(row):
    if row['Calories'] >= 200 and row['Protein (g)'] >= 20:
        return 0
    elif row['Calories'] >= 150 and row['Protein (g)'] >= 10:
        return 1
    elif 100 <= row['Calories'] < 150:
        return 2
    else:
        return 3

data['Classification'] = data.apply(classify_nutrition, axis=1)

## Check The New Dataset

In [6]:
# Do not change
data.head()

Unnamed: 0,Food,Category,Serving Size (g),Calories,Protein (g),Fats (g),Vitamin A (IU),Vitamin C (mg),Vitamin D (IU),Vitamin E (mg),Vitamin B1 (mg),Vitamin B2 (mg),Classification
0,Chicken Breast,Meat,100,165,31.0,3.6,0,0,0.0,0.0,7.0,6.0,1
1,Salmon,Fish,100,206,22.0,12.0,4,0,91.0,1.0,5.0,22.0,0
2,Brocoli,Vegetables,100,55,4.0,0.6,12,135,0.0,4.0,8.0,7.0,3
3,Carrots,Vegetables,100,41,0.9,0.2,334,9,0.0,2.0,4.0,3.0,3
4,Apples,Fruits,100,52,0.3,0.2,1,8,0.0,1.0,1.0,1.0,3


## Save The Dataset

In [7]:
# Do not change
outputDir = os.path.join(os.getcwd(), 'data')
os.makedirs(outputDir, exist_ok=True)

data.to_csv(os.path.join(outputDir, 'final.csv'), index=False)