In [1]:
import pandas as pd
import numpy as np

In [2]:
recipe_file_path = "../meal_planner/full_recipe_dataset_final_version.csv"
recipe_dataset = pd.read_csv(recipe_file_path)

user_file_path = "../meal_planner/user_profile_final_version_2.csv"
user_dataset = pd.read_csv(user_file_path)


In [3]:
recipe_dataset.columns

Index(['recipe_id', '30_mins', 'breakfast', 'cookies', 'cottage_cheese',
       'desserts', 'dinner', 'lunch', 'meal_prep', 'sauces_seasoning',
       'sides_appetizers', 'calories', 'carbohydrates', 'cholesterol',
       'cook_time', 'course', 'cuisine', 'description', 'fat', 'fiber',
       'full_name', 'image_url', 'ingredient_count', 'ingredients',
       'instructions', 'monounsaturated_fat', 'notes', 'polyunsaturated_fat',
       'potassium', 'prep_time', 'protein', 'saturated_fat', 'serving',
       'sodium', 'sugar', 'title', 'dairy_free', 'gluten_free', '4th_july',
       'christmas', 'cinco_de_mayo', 'easter', 'fathers_day', 'labor_day',
       'memorial_day', 'mothers_day', 'thanksgiving', 'valentienes_day',
       'beef', 'chicken', 'pork', 'turkey', 'seafood', 'fall', 'pumpkin',
       'spring', 'summer', 'winter', 'protein_type', 'meal_type', 'holiday',
       'season', 'ingredient_names', 'diet_type', 'diet_prefrences'],
      dtype='object')

In [4]:
unwanted_columns = ['recipe_id', '30_mins', 'breakfast', 'cookies', 'cottage_cheese',
       'desserts', 'dinner', 'lunch', 'meal_prep', 'sauces_seasoning',
       'sides_appetizers', 'cholesterol',
       'cook_time', 'course', 'description', 'fiber',
       'full_name', 'image_url', 'ingredient_count', 'ingredients',
       'instructions', 'monounsaturated_fat', 'notes', 'polyunsaturated_fat',
       'potassium', 'prep_time', 'saturated_fat', 'serving',
       'sodium', 'sugar', 'dairy_free', 'gluten_free', '4th_july',
       'christmas', 'cinco_de_mayo', 'easter', 'fathers_day', 'labor_day',
       'memorial_day', 'mothers_day', 'thanksgiving', 'valentienes_day',
       'beef', 'chicken', 'pork', 'turkey', 'seafood', 'fall', 'pumpkin',
       'spring', 'summer', 'winter', 'holiday',
       'season']
recipe_dataset = recipe_dataset.drop(unwanted_columns, axis=1)

In [5]:
recipe_dataset.columns

Index(['calories', 'carbohydrates', 'cuisine', 'fat', 'protein', 'title',
       'protein_type', 'meal_type', 'ingredient_names', 'diet_type',
       'diet_prefrences'],
      dtype='object')

In [6]:
recipe_dataset.to_csv("../meal_planner/model_recipe.csv", index=False)

In [40]:
user_dataset.columns

Index(['user_id', 'age', 'gender', 'height', 'weight', 'activity_level',
       'dietary_preference', 'BMI', 'obesity_level', 'goal', 'TDEE',
       'daily_cals_goal', 'protein_g', 'carbs_g', 'fats_g', 'fav_cuisine'],
      dtype='object')

In [41]:
unwanted_columns = ['user_id', 'age', 'gender', 'height', 'weight', 'activity_level',
       'dietary_preference', 'BMI', 'obesity_level', 'goal', 'TDEE']
user_dataset = user_dataset.drop(unwanted_columns, axis=1)
user_dataset ["fav_cuisine"] = user_dataset["fav_cuisine"].apply(lambda x: [] if pd.isna(x) else x)

In [42]:
user_dataset.to_csv("../meal_planner/model_user.csv", index=False)

In [48]:
import pandas as pd
import ast  # Safe parsing for list-like strings
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer

# Load datasets
recipes = recipe_dataset.copy()
users = user_dataset.copy()

# Function to safely process list-like columns
def process_list_column(col):
    return col.apply(lambda x: ','.join(ast.literal_eval(x)) if isinstance(x, str) and x.startswith("[") else '')

recipes['meal_type'] = process_list_column(recipes['meal_type'])
recipes['diet_prefrences'] = process_list_column(recipes['diet_prefrences'])

# Select a user (first user for simplicity)
user = users.iloc[0]

# Ensure fav_cuisine is a list (if empty, set to an empty list)
user['fav_cuisine'] = user['fav_cuisine'] if isinstance(user['fav_cuisine'], list) else []

# Fill missing values in 'calories' column
recipes['calories'].fillna(recipes['calories'].median(), inplace=True)

# Compute calorie difference and match favorite cuisine
recipes['calorie_diff'] = abs(recipes['calories'] - user['daily_cals_goal'] / 3)

# Fill missing values in 'calorie_diff'
recipes['calorie_diff'].fillna(0, inplace=True)

# Cuisine match (convert to int)
recipes['fav_cuisine_match'] = recipes['cuisine'].apply(lambda x: int(x in user['fav_cuisine']))

# Prepare features and target
X = recipes[['calories', 'protein', 'carbohydrates', 'fat', 'meal_type', 'diet_prefrences', 'fav_cuisine_match']]
y = -recipes['calorie_diff']  # Negative to prioritize lower calorie difference

# Ensure y has no NaN values
y.fillna(0, inplace=True)

# Preprocessing pipeline
categorical_features = ['meal_type', 'diet_prefrences']
numerical_features = ['calories', 'protein', 'carbohydrates', 'fat', 'fav_cuisine_match']

preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numerical_features),
        ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features)
    ]
)

# Train a model
model = Pipeline([
    ('preprocessor', preprocessor),
    ('regressor', RandomForestRegressor(random_state=42))
])

# Split data and fit the model
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model.fit(X_train, y_train)  # No more NaN errors!

# Predict scores for recommendation
recipes['score'] = model.predict(X)

# Recommend top 5 meals
recommendations = recipes.sort_values(by='score', ascending=False).head(5)

print("Recommended Meals:")
print(recommendations[['title', 'calories', 'meal_type', 'diet_prefrences', 'score']])


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  user['fav_cuisine'] = user['fav_cuisine'] if isinstance(user['fav_cuisine'], list) else []
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  recipes['calories'].fillna(recipes['calories'].median(), inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.met

Recommended Meals:
                                title  calories meal_type diet_prefrences  \
240            Instant Pot Short Ribs     667.0                             
119  Cajun Sausage and Potato Skillet     573.0                             
320            Marry Me Chicken Pasta     539.0                             
28           Macro Friendly Chili Mac     499.0                             
78                     Fajita Burgers     496.0                             

          score  
240 -325.233333  
119 -375.573333  
320 -414.863333  
28  -435.493333  
78  -441.413333  


In [46]:
import pandas as pd 
import ast  # Safe parsing for list-like strings
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer

# Load datasets
recipes = recipe_dataset.copy()
users = user_dataset.copy()

# Function to safely process list-like columns
def process_list_column(col):
    return col.apply(lambda x: ','.join(ast.literal_eval(x)) if isinstance(x, str) and x.startswith("[") else '')

recipes['meal_type'] = process_list_column(recipes['meal_type'])
recipes['diet_prefrences'] = process_list_column(recipes['diet_prefrences'])

# Select a user (first user for simplicity)
user = users.iloc[0]

# Ensure fav_cuisine is a list (if empty, set to an empty list)
user['fav_cuisine'] = user['fav_cuisine'] if isinstance(user['fav_cuisine'], list) else []

# Fill missing values in 'calories' column
recipes['calories'].fillna(recipes['calories'].median(), inplace=True)

# Compute calorie difference and match favorite cuisine
recipes['calorie_diff'] = abs(recipes['calories'] - user['daily_cals_goal'] / 4)

# Fill missing values in 'calorie_diff'
recipes['calorie_diff'].fillna(0, inplace=True)

# Cuisine match (convert to int)
recipes['fav_cuisine_match'] = recipes['cuisine'].apply(lambda x: int(x in user['fav_cuisine']))

# Filter recipes based on diet preferences
diet_preferences = set(user.get('diet_preferences', []))
recipes = recipes[recipes['diet_prefrences'].apply(lambda x: bool(set(x.split(',')) & diet_preferences))]

# Prepare features and target
X = recipes[['calories', 'protein', 'carbohydrates', 'fat', 'meal_type', 'diet_prefrences', 'fav_cuisine_match']]
y = -recipes['calorie_diff']  # Negative to prioritize lower calorie difference

# Ensure y has no NaN values
y.fillna(0, inplace=True)

# Preprocessing pipeline
categorical_features = ['meal_type', 'diet_prefrences']
numerical_features = ['calories', 'protein', 'carbohydrates', 'fat', 'fav_cuisine_match']

preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numerical_features),
        ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features)
    ]
)

# Train a model
model = Pipeline([
    ('preprocessor', preprocessor),
    ('regressor', RandomForestRegressor(random_state=42))
])

# Split data and fit the model
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model.fit(X_train, y_train)  # No more NaN errors!

# Predict scores for recommendation
recipes['score'] = model.predict(X)

# Recommend top 4 meals
recommendations = recipes.sort_values(by='score', ascending=False).head(4)

print("Recommended Meals:")
print(recommendations[['title', 'calories', 'meal_type', 'diet_prefrences', 'score']])


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  user['fav_cuisine'] = user['fav_cuisine'] if isinstance(user['fav_cuisine'], list) else []
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  recipes['calories'].fillna(recipes['calories'].median(), inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.met

ValueError: With n_samples=0, test_size=0.2 and train_size=None, the resulting train set will be empty. Adjust any of the aforementioned parameters.