In [2]:
import pandas as pd

# Load the recipe dataset
recipe_df = pd.read_csv('/content/70000_recipes_nutrients_cleaned_final.csv')

# Load the health and age-based nutrient needs
age_health_df = pd.read_csv('/content/Cleaned_Age_and_health_data.csv')

# Show initial few rows
print("Recipe data:")
print(recipe_df.head())

print("\nAge and Health nutrient requirements:")
print(age_health_df.head())

# Get and print column names
print("\nColumns in Recipe Dataset:")
print(recipe_df.columns.tolist())

print("\nColumns in Age and Health Dataset:")
print(age_health_df.columns.tolist())


Recipe data:
                                 recipe_name  \
0                              stalker pasta   
1                vegan wild mushroom lasagna   
2  rwop finalist: tantalizing tilapia recipe   
3             blue cheese portobello burgers   
4       pan-grilled portobello mushroom caps   

                                         ingredients  calories  protein  \
0  3 tbsp. olive oil, 2 oz. pancetta or regular b...   1577.70    80.65   
1  9 sheets of oven-ready, no-boil lasagna, 1 1/2...   4751.64   131.58   
2  2 tsp blackened seasoning, 1 tsp lemon pepper ...   4423.25   283.33   
3  3 tablespoons extra-virgin olive oil, divided,...   1345.74    44.47   
4  4 x portobello mushroom caps, the dry stem tri...    677.32    13.03   

      fat  carbohydrates  
0  129.21          13.05  
1   99.37         817.18  
2  355.05          41.84  
3   69.45         142.97  
4   61.41          19.62  

Age and Health nutrient requirements:
   Ages  Gender  Height  Weight     Activity L

In [4]:
# Standardize column names
# Rename recipe nutrient columns to match health dataset format
recipe_df = recipe_df.rename(columns={
    'calories': 'Calories',
    'protein': 'Protein',
    'fat': 'Fat',
    'carbohydrates': 'Carbohydrates'
})


In [6]:
#Convert nutrient columns to numeric
# Ensure nutrient columns are numeric
nutrient_cols = ['Calories', 'Protein', 'Carbohydrates', 'Fat']
recipe_df[nutrient_cols] = recipe_df[nutrient_cols].apply(pd.to_numeric, errors='coerce')
age_health_df[nutrient_cols] = age_health_df[nutrient_cols].apply(pd.to_numeric, errors='coerce')
recipe_df.head()

Unnamed: 0,recipe_name,ingredients,Calories,Protein,Fat,Carbohydrates,ingredient_list
0,stalker pasta,"3 tbsp. olive oil, 2 oz. pancetta or regular b...",1577.7,80.65,129.21,13.05,"[3 tbsp. olive oil, 2 oz. pancetta or regular ..."
1,vegan wild mushroom lasagna,"9 sheets of oven-ready, no-boil lasagna, 1 1/2...",4751.64,131.58,99.37,817.18,"[9 sheets of oven-ready, no-boil lasagna, 1 1/..."
2,rwop finalist: tantalizing tilapia recipe,"2 tsp blackened seasoning, 1 tsp lemon pepper ...",4423.25,283.33,355.05,41.84,"[2 tsp blackened seasoning, 1 tsp lemon pepper..."
3,blue cheese portobello burgers,"3 tablespoons extra-virgin olive oil, divided,...",1345.74,44.47,69.45,142.97,"[3 tablespoons extra-virgin olive oil, divided..."
4,pan-grilled portobello mushroom caps,"4 x portobello mushroom caps, the dry stem tri...",677.32,13.03,61.41,19.62,"[4 x portobello mushroom caps, the dry stem tr..."


In [7]:
age_health_df.head()

Unnamed: 0,Ages,Gender,Height,Weight,Activity Level,Dietary Preference,Daily Calorie Target,Protein,Sugar,Sodium,Calories,Carbohydrates,Fiber,Fat,Breakfast Suggestion,Lunch Suggestion,Dinner Suggestion,Snack Suggestion,Disease
0,25,Male,180,80,Moderately Active,Omnivore,2000,120,125.0,24.0,2020,250,30.0,60,Oatmeal with berries and nuts,Grilled chicken salad with mixed greens,Salmon with roasted vegetables,Greek yogurt with fruit,Weight Gain
1,32,Female,165,65,Lightly Active,Vegetarian,1600,80,100.0,16.0,1480,200,24.0,40,Tofu scramble with veggies,Lentil soup with whole wheat bread,Vegetable stir-fry with brown rice,Apple with almond butter,"Weight Gain, Hypertension, Heart Disease"
2,48,Male,175,95,Sedentary,Vegan,2200,100,150.0,20.0,2185,300,36.0,65,Tofu and veggie breakfast burrito,Black bean burger on a whole wheat bun,Lentil and vegetable curry,Trail mix,Weight Gain
3,55,Female,160,70,Very Active,Omnivore,2500,140,175.0,28.0,2680,350,42.0,80,Greek yogurt with granola and fruit,Chicken and vegetable stir-fry,Turkey chili with brown rice,Banana with peanut butter,Weight Gain
4,62,Male,170,85,Sedentary,Vegetarian,2000,80,125.0,16.0,1815,250,30.0,55,Scrambled eggs with whole wheat toast and avocado,Quinoa salad with chickpeas and vegetables,Vegetarian chili with cornbread,Fruit and nut mix,Weight Gain


In [8]:
# Add nutrient ratio features
recipe_df['Protein/Calorie'] = recipe_df['Protein'] / recipe_df['Calories']
recipe_df['Carb/Calorie'] = recipe_df['Carbohydrates'] / recipe_df['Calories']
recipe_df['Fat/Calorie'] = recipe_df['Fat'] / recipe_df['Calories']


Process ingredients into lists + ingredient count

Prepare ingredient data for NLP + add count feature.

In [9]:
# Convert ingredient string to list and count
recipe_df['ingredient_list'] = recipe_df['ingredients'].apply(lambda x: [i.strip().lower() for i in x.split(',')])
recipe_df['num_ingredients'] = recipe_df['ingredient_list'].apply(len)


In [10]:
recipe_df.head()

Unnamed: 0,recipe_name,ingredients,Calories,Protein,Fat,Carbohydrates,ingredient_list,Protein/Calorie,Carb/Calorie,Fat/Calorie,num_ingredients
0,stalker pasta,"3 tbsp. olive oil, 2 oz. pancetta or regular b...",1577.7,80.65,129.21,13.05,"[3 tbsp. olive oil, 2 oz. pancetta or regular ...",0.051119,0.008272,0.081898,11
1,vegan wild mushroom lasagna,"9 sheets of oven-ready, no-boil lasagna, 1 1/2...",4751.64,131.58,99.37,817.18,"[9 sheets of oven-ready, no-boil lasagna, 1 1/...",0.027691,0.171979,0.020913,21
2,rwop finalist: tantalizing tilapia recipe,"2 tsp blackened seasoning, 1 tsp lemon pepper ...",4423.25,283.33,355.05,41.84,"[2 tsp blackened seasoning, 1 tsp lemon pepper...",0.064055,0.009459,0.080269,22
3,blue cheese portobello burgers,"3 tablespoons extra-virgin olive oil, divided,...",1345.74,44.47,69.45,142.97,"[3 tablespoons extra-virgin olive oil, divided...",0.033045,0.106239,0.051607,18
4,pan-grilled portobello mushroom caps,"4 x portobello mushroom caps, the dry stem tri...",677.32,13.03,61.41,19.62,"[4 x portobello mushroom caps, the dry stem tr...",0.019238,0.028967,0.090666,11


 Process diseases as multi-label binary features

This prepares data for filtering and future model training.

In [11]:
# Convert disease column to list
age_health_df['Disease_List'] = age_health_df['Disease'].fillna('').apply(lambda x: [d.strip().lower() for d in x.split(',')])

# One-hot encode diseases
from sklearn.preprocessing import MultiLabelBinarizer
mlb = MultiLabelBinarizer()
disease_encoded = pd.DataFrame(mlb.fit_transform(age_health_df['Disease_List']),
                               columns=mlb.classes_,
                               index=age_health_df.index)

# Concatenate back
age_health_df = pd.concat([age_health_df, disease_encoded], axis=1)


In [12]:
age_health_df.head()

Unnamed: 0,Ages,Gender,Height,Weight,Activity Level,Dietary Preference,Daily Calorie Target,Protein,Sugar,Sodium,...,Snack Suggestion,Disease,Disease_List,acne,diabetes,heart disease,hypertension,kidney disease,weight gain,weight loss
0,25,Male,180,80,Moderately Active,Omnivore,2000,120,125.0,24.0,...,Greek yogurt with fruit,Weight Gain,[weight gain],0,0,0,0,0,1,0
1,32,Female,165,65,Lightly Active,Vegetarian,1600,80,100.0,16.0,...,Apple with almond butter,"Weight Gain, Hypertension, Heart Disease","[weight gain, hypertension, heart disease]",0,0,1,1,0,1,0
2,48,Male,175,95,Sedentary,Vegan,2200,100,150.0,20.0,...,Trail mix,Weight Gain,[weight gain],0,0,0,0,0,1,0
3,55,Female,160,70,Very Active,Omnivore,2500,140,175.0,28.0,...,Banana with peanut butter,Weight Gain,[weight gain],0,0,0,0,0,1,0
4,62,Male,170,85,Sedentary,Vegetarian,2000,80,125.0,16.0,...,Fruit and nut mix,Weight Gain,[weight gain],0,0,0,0,0,1,0


Normalize nutrient features

This is useful if you plan to use ML models like KNN or clustering.

In [13]:
# Normalize nutrient values
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
recipe_df_scaled = recipe_df.copy()
recipe_df_scaled[nutrient_cols] = scaler.fit_transform(recipe_df[nutrient_cols])


1. **Data Cleaning & Standardization**

Unified column names between the recipe and health dataset (Calories, Protein, etc.).

Converted all nutrient columns to numeric to allow math operations.

2. **Nutrient Ratio Features**

computed nutrient density ratios (e.g., Protein/Calorie), which:

Help capture how efficient a recipe is in providing nutrients.

Improve model performance if used later for similarity or classification.

3. **Ingredient Processing**

Ingredients were tokenized into lists.

computed the number of ingredients, which can be useful for:

Estimating recipe complexity.

Matching recipes with simpler or more complex dietary needs.

4. **Health Condition Feature Engineering**

converted the Disease column into multi-label binary features.

Now, can filter recipes based on target diseases like "diabetes" or "heart disease".

5. **Optional Scaling (for ML)**

Ycreated a normalized version of  recipe data (recipe_df_scaled).

This is useful to use clustering, recommendation models, or ML.

first prompt

my wireframe is
. R e c i p e e x t r a c t o r ( u s i n g N L P t o r e a d i n g r e d i e n t s )
2 . N u t r i e n t s a n a l y z e r ( f i n d s m i s s i n g n u t r i e n t s )
3 . A I R e c o m m e n d e r m o d e l ( s u g g e s t w h a t t o a d d )
4 . U I L a y e r ( u s e r e n t e r s r e c i p e a n d h e a l t h g o a l s )
say me that does this idea works or need to change something

last prompt : done now give me the brief explanation and future work