In [1]:
import pandas as pd
import numpy as np

In [2]:
def BMI_calculator(person):
    """Calculate the person's bmi"""
    BMI = person.Weight / ((person.Height/100)**2)
    return round(BMI, 2)

In [3]:
# Load dataset from csv file
csv_file_path = "../input/user_profile_dataset.csv"
user_profile_dataset = pd.read_csv(csv_file_path)

In [4]:
# Define columns to remove
wanted_columns = ["Ages", "Gender", "Height", "Weight", 
                    "Activity Level", "Dietary Preference"]

existing_columns = [col for col in wanted_columns if col in user_profile_dataset.columns]

user_profile_dataset = user_profile_dataset.loc[:, existing_columns]

user_profile_dataset.head()
user_profile_dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1698 entries, 0 to 1697
Data columns (total 6 columns):
 #   Column              Non-Null Count  Dtype 
---  ------              --------------  ----- 
 0   Ages                1698 non-null   int64 
 1   Gender              1698 non-null   object
 2   Height              1698 non-null   int64 
 3   Weight              1698 non-null   int64 
 4   Activity Level      1698 non-null   object
 5   Dietary Preference  1698 non-null   object
dtypes: int64(3), object(3)
memory usage: 79.7+ KB


In [5]:
# Make the cleaned dataframe into a csv file
user_profile_dataset.to_csv("../row/row_user_profile.csv", index=False)

In [6]:
user_profile_dataset["BMI"] = user_profile_dataset.apply(BMI_calculator, axis=1)
print(user_profile_dataset)

      Ages  Gender  Height  Weight     Activity Level Dietary Preference  \
0       25    Male     180      80  Moderately Active           Omnivore   
1       32  Female     165      65     Lightly Active         Vegetarian   
2       48    Male     175      95          Sedentary              Vegan   
3       55  Female     160      70        Very Active           Omnivore   
4       62    Male     170      85          Sedentary         Vegetarian   
...    ...     ...     ...     ...                ...                ...   
1693    53  Female     182      76          Sedentary              Vegan   
1694    38    Male     150      76        Very Active              Vegan   
1695    57    Male     165      73        Very Active              Vegan   
1696    40    Male     166      91   Extremely Active              Vegan   
1697    54  Female     198      75          Sedentary         Vegetarian   

        BMI  
0     24.69  
1     23.88  
2     31.02  
3     27.34  
4     29.41  
...

In [7]:
def shape_classifier(person):
    """Determine the level of obesity a person and his goal"""
    
    bmi = person.BMI

    if bmi < 18.5:
        obesity_level = "underweight"
        goal = "gain_weight"

    elif bmi < 25:
        obesity_level = "healthy weight"
        goal = "mainten_weight"

    elif bmi < 30:
        obesity_level = "overweight"
        goal = "loss_weight"

    elif bmi < 40:
        obesity_level = "obese"
        goal = "loss_weight"
    
    else :
        obesity_level = "severely obese"
        goal = "loss_weight"

    return obesity_level, goal

In [8]:
user_profile_dataset[["Obesity Level", "Goal"]] = user_profile_dataset.apply(shape_classifier, axis=1, result_type="expand")
print(user_profile_dataset)

      Ages  Gender  Height  Weight     Activity Level Dietary Preference  \
0       25    Male     180      80  Moderately Active           Omnivore   
1       32  Female     165      65     Lightly Active         Vegetarian   
2       48    Male     175      95          Sedentary              Vegan   
3       55  Female     160      70        Very Active           Omnivore   
4       62    Male     170      85          Sedentary         Vegetarian   
...    ...     ...     ...     ...                ...                ...   
1693    53  Female     182      76          Sedentary              Vegan   
1694    38    Male     150      76        Very Active              Vegan   
1695    57    Male     165      73        Very Active              Vegan   
1696    40    Male     166      91   Extremely Active              Vegan   
1697    54  Female     198      75          Sedentary         Vegetarian   

        BMI   Obesity Level            Goal  
0     24.69  healthy weight  mainten_weig

In [9]:
user_profile_dataset.info()
user_profile_dataset.to_csv("../output/user_profile_phase_1.csv", index=False)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1698 entries, 0 to 1697
Data columns (total 9 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   Ages                1698 non-null   int64  
 1   Gender              1698 non-null   object 
 2   Height              1698 non-null   int64  
 3   Weight              1698 non-null   int64  
 4   Activity Level      1698 non-null   object 
 5   Dietary Preference  1698 non-null   object 
 6   BMI                 1698 non-null   float64
 7   Obesity Level       1698 non-null   object 
 8   Goal                1698 non-null   object 
dtypes: float64(1), int64(3), object(5)
memory usage: 119.5+ KB


In [10]:
# Load phase 1 of the user dataset
user_profile_phase_1 = "../output/user_profile_phase_1.csv"
user_profile_dataset = pd.read_csv(user_profile_phase_1)
user_profile_dataset.head()

Unnamed: 0,Ages,Gender,Height,Weight,Activity Level,Dietary Preference,BMI,Obesity Level,Goal
0,25,Male,180,80,Moderately Active,Omnivore,24.69,healthy weight,mainten_weight
1,32,Female,165,65,Lightly Active,Vegetarian,23.88,healthy weight,mainten_weight
2,48,Male,175,95,Sedentary,Vegan,31.02,obese,loss_weight
3,55,Female,160,70,Very Active,Omnivore,27.34,overweight,loss_weight
4,62,Male,170,85,Sedentary,Vegetarian,29.41,overweight,loss_weight


In [11]:
# Rename dataframe's columns
user_profile_dataset.columns = ["age", "gender", "height", "weight", "activity_level",
                                 "dietary_preference", "BMI", "obesity_level", "goal"]

In [12]:
user_profile_dataset.head(10)
user_profile_dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1698 entries, 0 to 1697
Data columns (total 9 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   age                 1698 non-null   int64  
 1   gender              1698 non-null   object 
 2   height              1698 non-null   int64  
 3   weight              1698 non-null   int64  
 4   activity_level      1698 non-null   object 
 5   dietary_preference  1698 non-null   object 
 6   BMI                 1698 non-null   float64
 7   obesity_level       1698 non-null   object 
 8   goal                1698 non-null   object 
dtypes: float64(1), int64(3), object(5)
memory usage: 119.5+ KB


In [13]:
from enum import Enum
class Activity_Level(Enum):
    """Activity level enum for equavilante value"""
    SEDENTARY = 1.2
    LIGHTLY = 1.375
    MODERATELY = 1.55
    VERY = 1.725
    EXTREMELY = 1.9
    
    

In [14]:
class Gender(Enum):
    """Value of gender in TDEE equation"""
    MALE = 5
    FEMALE = -161

In [15]:
def TDEE_calculator(person):
    """Calculate the TDEE of a person"""
    activity_level = person.activity_level
    # Spliting activity_level
    words = activity_level.split()
    
   
    age = person.age
    gender = person.gender.upper()
    height = person.height
    weight = person.weight
    activity_level = words[0].upper()


    activity_value = Activity_Level[activity_level].value
    gender_value = Gender[gender].value

    # Calculate the BMR "Basel Metabolic Rate" using mifflin-st jeor equation
    BMR = (10 * weight) + (6.25 * height) - (5 * age) + gender_value

    # Calculate TDEE "Total daily energy expenditure"
    TDEE = BMR * activity_value
    return round(TDEE, 2)

In [16]:
# Assigning TDEE value
user_profile_dataset["TDEE"] = user_profile_dataset.apply(TDEE_calculator, axis=1)

In [17]:
print(user_profile_dataset.head())
print(user_profile_dataset.info())

   age  gender  height  weight     activity_level dietary_preference    BMI  \
0   25    Male     180      80  Moderately Active           Omnivore  24.69   
1   32  Female     165      65     Lightly Active         Vegetarian  23.88   
2   48    Male     175      95          Sedentary              Vegan  31.02   
3   55  Female     160      70        Very Active           Omnivore  27.34   
4   62    Male     170      85          Sedentary         Vegetarian  29.41   

    obesity_level            goal     TDEE  
0  healthy weight  mainten_weight  2797.75  
1  healthy weight  mainten_weight  1870.34  
2           obese     loss_weight  2170.50  
3      overweight     loss_weight  2180.40  
4      overweight     loss_weight  1929.00  
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1698 entries, 0 to 1697
Data columns (total 10 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   age                 1698 non-null   int64  
 

In [18]:
class Goal(Enum):
    """Value modified in TDEE"""
    LOSS_WEIGHT = -500
    MAINTEN_WEIGHT = 0
    GAIN_WEIGHT = 500


In [19]:
def daily_calories_goal(person):
    """Calculate the daily calories based on the TDEE and the person's goal"""
    goal = person.goal
    words = goal.split()
    goal = words[0].upper()
    
    tdee = person.TDEE
    goal_value = Goal[goal].value

    daily_cals_goal = tdee + goal_value

    return daily_cals_goal

In [20]:
user_profile_dataset["daily_cals_goal"] = user_profile_dataset.apply(daily_calories_goal, axis=1)
user_profile_dataset.head()

Unnamed: 0,age,gender,height,weight,activity_level,dietary_preference,BMI,obesity_level,goal,TDEE,daily_cals_goal
0,25,Male,180,80,Moderately Active,Omnivore,24.69,healthy weight,mainten_weight,2797.75,2797.75
1,32,Female,165,65,Lightly Active,Vegetarian,23.88,healthy weight,mainten_weight,1870.34,1870.34
2,48,Male,175,95,Sedentary,Vegan,31.02,obese,loss_weight,2170.5,1670.5
3,55,Female,160,70,Very Active,Omnivore,27.34,overweight,loss_weight,2180.4,1680.4
4,62,Male,170,85,Sedentary,Vegetarian,29.41,overweight,loss_weight,1929.0,1429.0


In [21]:
def calculate_macronutrient(person):
    """Calculate the macronutrient for a peerson based on the TDEE"""

    macro_ratios = {
        "loss_weight": {"protein": 0.40, "carbs": 0.30, "fats": 0.30},
        "mainten_weight": {"protein": 0.30, "carbs": 0.40, "fats": 0.30},
        "gain_weight": {"protein": 0.35, "carbs": 0.45, "fats": 0.20},
        # Gain_weight is equivalent to gain muscle
        # To be fixed in a few cells
    }

    goal = person.goal
    tdee = person.TDEE

    protein_pct = macro_ratios[goal]["protein"]
    carbs_pct = macro_ratios[goal]["carbs"]
    fats_pct = macro_ratios[goal]["fats"]

    protein_g = (tdee * protein_pct) / 4
    carbs_g = (tdee * carbs_pct) / 4
    fats_g = (tdee * fats_pct) / 9

    return round(protein_g, 2), round(carbs_g, 2), round(fats_g, 2)

In [22]:
user_profile_dataset[["protein_g", "carbs_g", "fats_g"]] = user_profile_dataset.apply(calculate_macronutrient, axis=1, result_type="expand")
user_profile_dataset.head()

Unnamed: 0,age,gender,height,weight,activity_level,dietary_preference,BMI,obesity_level,goal,TDEE,daily_cals_goal,protein_g,carbs_g,fats_g
0,25,Male,180,80,Moderately Active,Omnivore,24.69,healthy weight,mainten_weight,2797.75,2797.75,209.83,279.78,93.26
1,32,Female,165,65,Lightly Active,Vegetarian,23.88,healthy weight,mainten_weight,1870.34,1870.34,140.28,187.03,62.34
2,48,Male,175,95,Sedentary,Vegan,31.02,obese,loss_weight,2170.5,1670.5,217.05,162.79,72.35
3,55,Female,160,70,Very Active,Omnivore,27.34,overweight,loss_weight,2180.4,1680.4,218.04,163.53,72.68
4,62,Male,170,85,Sedentary,Vegetarian,29.41,overweight,loss_weight,1929.0,1429.0,192.9,144.67,64.3


In [23]:
# Change the gain_weight into gain_muscle
user_profile_dataset["goal"] = user_profile_dataset["goal"].replace("gain_weight", "gain_muscle")
user_profile_dataset.head()

Unnamed: 0,age,gender,height,weight,activity_level,dietary_preference,BMI,obesity_level,goal,TDEE,daily_cals_goal,protein_g,carbs_g,fats_g
0,25,Male,180,80,Moderately Active,Omnivore,24.69,healthy weight,mainten_weight,2797.75,2797.75,209.83,279.78,93.26
1,32,Female,165,65,Lightly Active,Vegetarian,23.88,healthy weight,mainten_weight,1870.34,1870.34,140.28,187.03,62.34
2,48,Male,175,95,Sedentary,Vegan,31.02,obese,loss_weight,2170.5,1670.5,217.05,162.79,72.35
3,55,Female,160,70,Very Active,Omnivore,27.34,overweight,loss_weight,2180.4,1680.4,218.04,163.53,72.68
4,62,Male,170,85,Sedentary,Vegetarian,29.41,overweight,loss_weight,1929.0,1429.0,192.9,144.67,64.3


In [24]:
user_profile_dataset["user_id"] = range(1, len(user_profile_dataset) + 1)

In [25]:
user_profile_dataset.head()

Unnamed: 0,age,gender,height,weight,activity_level,dietary_preference,BMI,obesity_level,goal,TDEE,daily_cals_goal,protein_g,carbs_g,fats_g,user_id
0,25,Male,180,80,Moderately Active,Omnivore,24.69,healthy weight,mainten_weight,2797.75,2797.75,209.83,279.78,93.26,1
1,32,Female,165,65,Lightly Active,Vegetarian,23.88,healthy weight,mainten_weight,1870.34,1870.34,140.28,187.03,62.34,2
2,48,Male,175,95,Sedentary,Vegan,31.02,obese,loss_weight,2170.5,1670.5,217.05,162.79,72.35,3
3,55,Female,160,70,Very Active,Omnivore,27.34,overweight,loss_weight,2180.4,1680.4,218.04,163.53,72.68,4
4,62,Male,170,85,Sedentary,Vegetarian,29.41,overweight,loss_weight,1929.0,1429.0,192.9,144.67,64.3,5


In [26]:
# Rearange the dataframe columns
new_order = ["user_id", "age", "gender", "height", "weight", "activity_level",
              "dietary_preference", "BMI", "obesity_level", "goal", "TDEE",
                "daily_cals_goal", "protein_g", "carbs_g", "fats_g"]

user_profile_dataset = user_profile_dataset[new_order]

In [27]:
# Make final version of users profiles' dataset
user_profile_dataset.to_csv("../output/user_profile_final_version.csv", index=False)

In [28]:
user_profile_dataset.head(10)

Unnamed: 0,user_id,age,gender,height,weight,activity_level,dietary_preference,BMI,obesity_level,goal,TDEE,daily_cals_goal,protein_g,carbs_g,fats_g
0,1,25,Male,180,80,Moderately Active,Omnivore,24.69,healthy weight,mainten_weight,2797.75,2797.75,209.83,279.78,93.26
1,2,32,Female,165,65,Lightly Active,Vegetarian,23.88,healthy weight,mainten_weight,1870.34,1870.34,140.28,187.03,62.34
2,3,48,Male,175,95,Sedentary,Vegan,31.02,obese,loss_weight,2170.5,1670.5,217.05,162.79,72.35
3,4,55,Female,160,70,Very Active,Omnivore,27.34,overweight,loss_weight,2180.4,1680.4,218.04,163.53,72.68
4,5,62,Male,170,85,Sedentary,Vegetarian,29.41,overweight,loss_weight,1929.0,1429.0,192.9,144.67,64.3
5,6,68,Female,155,60,Lightly Active,Vegan,24.97,healthy weight,mainten_weight,1468.16,1468.16,110.11,146.82,48.94
6,7,28,Male,190,100,Moderately Active,Omnivore,27.7,overweight,loss_weight,3181.38,2681.38,318.14,238.6,106.05
7,8,35,Female,170,75,Very Active,Vegetarian,25.95,overweight,loss_weight,2546.96,2046.96,254.7,191.02,84.9
8,9,42,Male,185,110,Moderately Active,Vegan,32.14,obese,loss_weight,3179.44,2679.44,317.94,238.46,105.98
9,10,58,Female,168,78,Sedentary,Vegan,27.64,overweight,loss_weight,1654.8,1154.8,165.48,124.11,55.16


In [29]:
user_profile_dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1698 entries, 0 to 1697
Data columns (total 15 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   user_id             1698 non-null   int64  
 1   age                 1698 non-null   int64  
 2   gender              1698 non-null   object 
 3   height              1698 non-null   int64  
 4   weight              1698 non-null   int64  
 5   activity_level      1698 non-null   object 
 6   dietary_preference  1698 non-null   object 
 7   BMI                 1698 non-null   float64
 8   obesity_level       1698 non-null   object 
 9   goal                1698 non-null   object 
 10  TDEE                1698 non-null   float64
 11  daily_cals_goal     1698 non-null   float64
 12  protein_g           1698 non-null   float64
 13  carbs_g             1698 non-null   float64
 14  fats_g              1698 non-null   float64
dtypes: float64(6), int64(4), object(5)
memory usage: 199.1+

In [30]:
import random
# Cuisine options with weights
cuisine = ['Japanese', 'Russian', 'Italian', 'Chinese', 'American', 'None']
weights = [0.1, 0.1, 0.1, 0.2, 0.4, 0.1]

# Assign cuisine based on weights
user_profile_dataset['fav_cuisine'] = user_profile_dataset['user_id'].apply(
    lambda _: random.choices(cuisine, weights=weights, k=1)[0]
)

# Print the result
print(user_profile_dataset)

      user_id  age  gender  height  weight     activity_level  \
0           1   25    Male     180      80  Moderately Active   
1           2   32  Female     165      65     Lightly Active   
2           3   48    Male     175      95          Sedentary   
3           4   55  Female     160      70        Very Active   
4           5   62    Male     170      85          Sedentary   
...       ...  ...     ...     ...     ...                ...   
1693     1694   53  Female     182      76          Sedentary   
1694     1695   38    Male     150      76        Very Active   
1695     1696   57    Male     165      73        Very Active   
1696     1697   40    Male     166      91   Extremely Active   
1697     1698   54  Female     198      75          Sedentary   

     dietary_preference    BMI   obesity_level            goal     TDEE  \
0              Omnivore  24.69  healthy weight  mainten_weight  2797.75   
1            Vegetarian  23.88  healthy weight  mainten_weight  1870.

In [31]:
user_profile_dataset.to_csv("../meal_planner/user_profile_final_version_2.csv", index=False)