In [1]:
%pip install isodate




[notice] A new release of pip is available: 24.0 -> 24.3.1
[notice] To update, run: python.exe -m pip install --upgrade pip





In [2]:
import isodate
# Function to convert ISO 8601 duration to human-readable format
def convert_iso_duration(iso_duration):
    # Handle non-string values (e.g., None or NaN)
    if not isinstance(iso_duration, str):
        return "unknown"
    
    try:
        # Parse the ISO 8601 duration string using isodate
        duration = isodate.parse_duration(iso_duration)
        
        # Extract total seconds and break them into days, hours, minutes, seconds
        total_seconds = int(duration.total_seconds())
        days, remainder = divmod(total_seconds, 86400)  # 1 day = 86400 seconds
        hours, remainder = divmod(remainder, 3600)  # 1 hour = 3600 seconds
        minutes, seconds = divmod(remainder, 60)  # 1 minute = 60 seconds

        # Build human-readable string
        readable = []
        if days > 0:
            readable.append(f"{days} day{'s' if days > 1 else ''}")
        if hours > 0:
            readable.append(f"{hours} hour{'s' if hours > 1 else ''}")
        if minutes > 0:
            readable.append(f"{minutes} minute{'s' if minutes > 1 else ''}")
        if seconds > 0:
            readable.append(f"{seconds} second{'s' if seconds > 1 else ''}")

        return ", ".join(readable)
    except Exception as e:
        # If there is an error (e.g., invalid format), return an error message
        return "unknown"
# Example usage
iso_duration = "PT24H"  # 24 hours
print(convert_iso_duration(None))  # Output: 24 hours

unknown


In [3]:
# Define a cleaning function
def clean_ingredients(ingredient_str):
    # Remove 'c (' and ')', and clean double quotes and spaces
    ingredient_str = ingredient_str.replace('c(', '').replace(')', '')  # Remove 'c (' and ')'
    ingredient_str = ingredient_str.replace('"', '')  # Remove double quotes
    return ingredient_str

In [4]:
#convert RecipeIngredientParts and RecipeIngredientQuantities to a list
import ast

def parse_list(r_string):
    try:
        if r_string == "character(0)":
            return []  # Handle character(0) as an empty list
        return ast.literal_eval(r_string.replace('c(', '[').replace(')', ']'))
    except Exception:
        return []  # Return None for invalid rows


In [5]:
from fractions import Fraction

# Function to convert a string to a float, handling fractions like "1/4"
def convert_to_float(value):
    try:
        # Try to parse the string as a fraction
        return float(Fraction(value))
    except ValueError:
        # If it fails (e.g., not a fraction or float), return 0.0
        return 0.0
    
# Convert Quantities from strings to floats (handling fractions and decimals)
#data_sup['Quantities'] = data_sup['Quantities'].apply(
#    lambda x: [convert_to_float(q) for q in x]  # Convert each string in the list
#)

1. Getting the Data

In [6]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

pd.options.display.float_format = '{:.2f}'.format

# Load dataset
data = pd.read_csv('recipes.csv')
print("Dataset Loaded Successfully")


Dataset Loaded Successfully


2. Exploring the Data

In [7]:
# Overview of the dataset
print("Dataset Information:")
data.info()

Dataset Information:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 522517 entries, 0 to 522516
Data columns (total 28 columns):
 #   Column                      Non-Null Count   Dtype  
---  ------                      --------------   -----  
 0   RecipeId                    522517 non-null  int64  
 1   Name                        522517 non-null  object 
 2   AuthorId                    522517 non-null  int64  
 3   AuthorName                  522517 non-null  object 
 4   CookTime                    439972 non-null  object 
 5   PrepTime                    522517 non-null  object 
 6   TotalTime                   522517 non-null  object 
 7   DatePublished               522517 non-null  object 
 8   Description                 522512 non-null  object 
 9   Images                      522516 non-null  object 
 10  RecipeCategory              521766 non-null  object 
 11  Keywords                    505280 non-null  object 
 12  RecipeIngredientQuantities  522514 non-null  object

In [8]:
# Shape and statistical description
print("\nDataset Shape:", data.shape)
print("\nStatistical Summary:")
print(data.describe(include='all'))


Dataset Shape: (522517, 28)

Statistical Summary:
        RecipeId          Name      AuthorId       AuthorName CookTime  \
count  522517.00        522517     522517.00           522517   439972   
unique       NaN        438188           NaN            56793      490   
top          NaN  Banana Bread           NaN  ratherbeswimmin    PT30M   
freq         NaN           186           NaN             7742    50715   
mean   271821.44           NaN   45725847.89              NaN      NaN   
std    155495.88           NaN  292971448.67              NaN      NaN   
min        38.00           NaN         27.00              NaN      NaN   
25%    137206.00           NaN      69474.00              NaN      NaN   
50%    271758.00           NaN     238937.00              NaN      NaN   
75%    406145.00           NaN     565828.00              NaN      NaN   
max    541383.00           NaN 2002886148.00              NaN      NaN   

       PrepTime TotalTime         DatePublished  \
count    

In [9]:
# Check for missing values
missing_values = data.isnull().sum()
missing_percentage = (missing_values / len(data)) * 100
print("\nMissing Values Count and Percentage:")
print(missing_percentage)


Missing Values Count and Percentage:
RecipeId                      0.00
Name                          0.00
AuthorId                      0.00
AuthorName                    0.00
CookTime                     15.80
PrepTime                      0.00
TotalTime                     0.00
DatePublished                 0.00
Description                   0.00
Images                        0.00
RecipeCategory                0.14
Keywords                      3.30
RecipeIngredientQuantities    0.00
RecipeIngredientParts         0.00
AggregatedRating             48.46
ReviewCount                  47.36
Calories                      0.00
FatContent                    0.00
SaturatedFatContent           0.00
CholesterolContent            0.00
SodiumContent                 0.00
CarbohydrateContent           0.00
FiberContent                  0.00
SugarContent                  0.00
ProteinContent                0.00
RecipeServings               35.01
RecipeYield                  66.61
RecipeInstruction

In [10]:
data.head(10)

Unnamed: 0,RecipeId,Name,AuthorId,AuthorName,CookTime,PrepTime,TotalTime,DatePublished,Description,Images,...,SaturatedFatContent,CholesterolContent,SodiumContent,CarbohydrateContent,FiberContent,SugarContent,ProteinContent,RecipeServings,RecipeYield,RecipeInstructions
0,38,Low-Fat Berry Blue Frozen Dessert,1533,Dancer,PT24H,PT45M,PT24H45M,1999-08-09T21:46:00Z,Make and share this Low-Fat Berry Blue Frozen ...,"c(""https://img.sndimg.com/food/image/upload/w_...",...,1.3,8.0,29.8,37.1,3.6,30.2,3.2,4.0,,"c(""Toss 2 cups berries with sugar."", ""Let stan..."
1,39,Biryani,1567,elly9812,PT25M,PT4H,PT4H25M,1999-08-29T13:12:00Z,Make and share this Biryani recipe from Food.com.,"c(""https://img.sndimg.com/food/image/upload/w_...",...,16.6,372.8,368.4,84.4,9.0,20.4,63.4,6.0,,"c(""Soak saffron in warm milk for 5 minutes and..."
2,40,Best Lemonade,1566,Stephen Little,PT5M,PT30M,PT35M,1999-09-05T19:52:00Z,This is from one of my first Good House Keepi...,"c(""https://img.sndimg.com/food/image/upload/w_...",...,0.0,0.0,1.8,81.5,0.4,77.2,0.3,4.0,,"c(""Into a 1 quart Jar with tight fitting lid, ..."
3,41,Carina's Tofu-Vegetable Kebabs,1586,Cyclopz,PT20M,PT24H,PT24H20M,1999-09-03T14:54:00Z,This dish is best prepared a day in advance to...,"c(""https://img.sndimg.com/food/image/upload/w_...",...,3.8,0.0,1558.6,64.2,17.3,32.1,29.3,2.0,4 kebabs,"c(""Drain the tofu, carefully squeezing out exc..."
4,42,Cabbage Soup,1538,Duckie067,PT30M,PT20M,PT50M,1999-09-19T06:19:00Z,Make and share this Cabbage Soup recipe from F...,"""https://img.sndimg.com/food/image/upload/w_55...",...,0.1,0.0,959.3,25.1,4.8,17.7,4.3,4.0,,"c(""Mix everything together and bring to a boil..."
5,43,Best Blackbottom Pie,34879,Barefoot Beachcomber,PT2H,PT20M,PT2H20M,1999-08-21T10:35:00Z,Make and share this Best Blackbottom Pie recip...,character(0),...,10.9,94.3,267.6,58.0,1.8,42.5,7.0,8.0,1 9-inch pie,"c(""Graham Cracker Crust: In small bowl, combin..."
6,44,Warm Chicken A La King,1596,Joan Edington,PT3M,PT35M,PT38M,1999-09-17T04:47:00Z,I copied this one out of a friend's book so ma...,"""https://img.sndimg.com/food/image/upload/w_55...",...,31.9,405.8,557.2,29.1,3.1,5.0,45.3,2.0,,"c(""Melt 1 1/2 ozs butter, add the flour and co..."
7,45,Buttermilk Pie With Gingersnap Crumb Crust,1580,tristitia,PT50M,PT30M,PT1H20M,1999-08-06T00:40:00Z,Make and share this Buttermilk Pie With Ginger...,"""https://img.sndimg.com/food/image/upload/w_55...",...,1.7,24.5,281.8,37.5,0.5,24.7,4.2,8.0,,"c(""Preheat oven to 350°F."", ""Make pie crust, u..."
8,46,A Jad - Cucumber Pickle,1533,Dancer,,PT25M,PT25M,1999-08-11T19:48:00Z,Make and share this A Jad - Cucumber Pickle re...,character(0),...,0.0,0.0,0.7,1.1,0.2,0.2,0.1,,1 cup,"c(""Slice the cucumber in four lengthwise, then..."
9,47,Butter Pecan Cookies,1573,benluc,PT9M,PT55M,PT1H4M,1999-09-07T09:01:00Z,Make and share this Butter Pecan Cookies recip...,"c(""https://img.sndimg.com/food/image/upload/w_...",...,1.4,6.3,15.0,4.5,0.6,1.6,0.8,,84 cookies,"c(""Preheat oven to 350 degrees."", ""Cream butte..."


3. Data Cleaning and Preprocessing

In [11]:
# Feature extraction
selected_columns = [
    'RecipeId', 'Name', 'CookTime','RecipeServings','RecipeCategory','RecipeIngredientQuantities','RecipeIngredientParts', 'AggregatedRating','Calories', 'FatContent', 'SaturatedFatContent',
    'CholesterolContent', 'SodiumContent', 'CarbohydrateContent','FiberContent', 'SugarContent', 'ProteinContent', 'RecipeInstructions'
]
data_extracted = data[selected_columns]
print("\nData Extracted Completed. Preview:")
data_extracted.shape


Data Extracted Completed. Preview:


(522517, 18)

In [12]:
data_extracted.isnull().sum()

RecipeId                           0
Name                               0
CookTime                       82545
RecipeServings                182911
RecipeCategory                   751
RecipeIngredientQuantities         3
RecipeIngredientParts              0
AggregatedRating              253223
Calories                           0
FatContent                         0
SaturatedFatContent                0
CholesterolContent                 0
SodiumContent                      0
CarbohydrateContent                0
FiberContent                       0
SugarContent                       0
ProteinContent                     0
RecipeInstructions                 0
dtype: int64

In [13]:
# Handling missing values
data_cleaned = data_extracted.copy() #data_cleaned.dropna(inplace=True)
data_cleaned.CookTime = data_cleaned.CookTime.apply(convert_iso_duration)
data_cleaned.CookTime.head(20)

0          1 day
1     25 minutes
2      5 minutes
3     20 minutes
4     30 minutes
5        2 hours
6      3 minutes
7     50 minutes
8        unknown
9      9 minutes
10       unknown
11    30 minutes
12    50 minutes
13    25 minutes
14       unknown
15    45 minutes
16    50 minutes
17       2 hours
18        1 hour
19       unknown
Name: CookTime, dtype: object

In [14]:
data_cleaned = data_cleaned.dropna(subset=["RecipeCategory"])

In [15]:
data_cleaned = data_cleaned.dropna(subset=["RecipeIngredientQuantities"])
data_cleaned['RecipeIngredientParts'] = data_cleaned['RecipeIngredientParts'].apply(clean_ingredients)


In [16]:
data_cleaned["AggregatedRating"] = data_cleaned["AggregatedRating"].fillna(data_cleaned["AggregatedRating"].mean())

In [17]:
# Apply parsing safely
data_cleaned["RecipeInstructions"] = data_cleaned["RecipeInstructions"].apply(parse_list)
data_cleaned["RecipeIngredientQuantities"] = data_cleaned["RecipeIngredientQuantities"].apply(parse_list)


In [18]:
data_cleaned["RecipeServings"] = pd.to_numeric(data_cleaned["RecipeServings"], errors='coerce')

In [19]:
# Nutritional maximum thresholds for a single meal (assuming 3 meals a day)
max_thresholds_per_meal = {
    'Calories': 2000 / 3,  # Daily caloric intake divided by 3 meals
    'FatContent': 70 / 3,  # Fat content per meal
    'SaturatedFatContent': 22 / 3,  # Saturated fat per meal
    'CholesterolContent': 300 / 3,  # Cholesterol per meal
    'SodiumContent': 2300 / 3,  # Sodium per meal
    'CarbohydrateContent': 325 / 3,  # Carbohydrates per meal
    'FiberContent': 25 / 3,  # Fiber per meal
    'SugarContent': 50 / 3,  # Sugar per meal
    'ProteinContent': 175 / 3  # Protein per meal
}

data_cleaned_copy = data_cleaned.copy()

# Appliquer les filtres en fonction des seuils nutritionnels
def clean_recipe(row):
    for column, maximum in max_thresholds_per_meal.items():
    
        if column in row and row[column] >= maximum:
            return None  # Si une valeur dépasse le seuil, on supprime cette ligne
    # Si toutes les valeurs sont inférieures au seuil, on met 'RecipeServings' à 1
    row['RecipeServings'] = 1
    return row

data_cleaned_copy[data_cleaned_copy['RecipeServings'].isnull()].apply(clean_recipe, axis=1)


# Supprimer les lignes où 'clean_recipe' a renvoyé None (lignes à supprimer)
data_cleaned_copy = data_cleaned_copy.dropna(subset=['RecipeServings'])


# Afficher les premières lignes du DataFrame nettoyé
data_cleaned_copy.head()


Unnamed: 0,RecipeId,Name,CookTime,RecipeServings,RecipeCategory,RecipeIngredientQuantities,RecipeIngredientParts,AggregatedRating,Calories,FatContent,SaturatedFatContent,CholesterolContent,SodiumContent,CarbohydrateContent,FiberContent,SugarContent,ProteinContent,RecipeInstructions
0,38,Low-Fat Berry Blue Frozen Dessert,1 day,4.0,Frozen Desserts,"[4, 1/4, 1, 1]","blueberries, granulated sugar, vanilla yogurt,...",4.5,170.9,2.5,1.3,8.0,29.8,37.1,3.6,30.2,3.2,"[Toss 2 cups berries with sugar., Let stand fo..."
1,39,Biryani,25 minutes,6.0,Chicken Breast,[],"saffron, milk, hot green chili peppers, onions...",3.0,1110.7,58.8,16.6,372.8,368.4,84.4,9.0,20.4,63.4,[Soak saffron in warm milk for 5 minutes and p...
2,40,Best Lemonade,5 minutes,4.0,Beverages,[],"sugar, lemons, rind of, lemon, zest of, fresh ...",4.5,311.1,0.2,0.0,0.0,1.8,81.5,0.4,77.2,0.3,"[Into a 1 quart Jar with tight fitting lid, pu..."
3,41,Carina's Tofu-Vegetable Kebabs,20 minutes,2.0,Soy/Tofu,"[12, 1, 2, 1, 10, 1, 3, 2, 2, 2, 1, 2, 1/2, 1/...","extra firm tofu, eggplant, zucchini, mushrooms...",4.5,536.1,24.0,3.8,0.0,1558.6,64.2,17.3,32.1,29.3,"[Drain the tofu, carefully squeezing out exces..."
4,42,Cabbage Soup,30 minutes,4.0,Vegetable,"[46, 4, 1, 2, 1]","plain tomato juice, cabbage, onion, carrots, c...",4.5,103.6,0.4,0.1,0.0,959.3,25.1,4.8,17.7,4.3,"[Mix everything together and bring to a boil.,..."


In [20]:
data_cleaned_copy.shape


(339121, 18)

In [21]:
def handle_outliers(recipes, features):
    # Identifier les valeurs aberrantes (par exemple, tout ce qui dépasse 3 écarts-types)
    z_scores = (recipes[features] - recipes[features].mean()) / recipes[features].std()
    outliers = (z_scores.abs() > 3).any(axis=1)
    
    # Supprimer les recettes avec des valeurs aberrantes
    recipes_cleaned = recipes[~outliers]
    
    return recipes_cleaned

data_cleaned_copy = handle_outliers(recipes=data_cleaned_copy, features=['Calories', 'FatContent', 'ProteinContent', 'CarbohydrateContent'])

In [22]:
data_cleaned_copy.isnull().sum()

RecipeId                      0
Name                          0
CookTime                      0
RecipeServings                0
RecipeCategory                0
RecipeIngredientQuantities    0
RecipeIngredientParts         0
AggregatedRating              0
Calories                      0
FatContent                    0
SaturatedFatContent           0
CholesterolContent            0
SodiumContent                 0
CarbohydrateContent           0
FiberContent                  0
SugarContent                  0
ProteinContent                0
RecipeInstructions            0
dtype: int64

In [23]:
# Afficher les premières lignes du DataFrame après division
data_cleaned_copy.head()

Unnamed: 0,RecipeId,Name,CookTime,RecipeServings,RecipeCategory,RecipeIngredientQuantities,RecipeIngredientParts,AggregatedRating,Calories,FatContent,SaturatedFatContent,CholesterolContent,SodiumContent,CarbohydrateContent,FiberContent,SugarContent,ProteinContent,RecipeInstructions
0,38,Low-Fat Berry Blue Frozen Dessert,1 day,4.0,Frozen Desserts,"[4, 1/4, 1, 1]","blueberries, granulated sugar, vanilla yogurt,...",4.5,170.9,2.5,1.3,8.0,29.8,37.1,3.6,30.2,3.2,"[Toss 2 cups berries with sugar., Let stand fo..."
1,39,Biryani,25 minutes,6.0,Chicken Breast,[],"saffron, milk, hot green chili peppers, onions...",3.0,1110.7,58.8,16.6,372.8,368.4,84.4,9.0,20.4,63.4,[Soak saffron in warm milk for 5 minutes and p...
2,40,Best Lemonade,5 minutes,4.0,Beverages,[],"sugar, lemons, rind of, lemon, zest of, fresh ...",4.5,311.1,0.2,0.0,0.0,1.8,81.5,0.4,77.2,0.3,"[Into a 1 quart Jar with tight fitting lid, pu..."
3,41,Carina's Tofu-Vegetable Kebabs,20 minutes,2.0,Soy/Tofu,"[12, 1, 2, 1, 10, 1, 3, 2, 2, 2, 1, 2, 1/2, 1/...","extra firm tofu, eggplant, zucchini, mushrooms...",4.5,536.1,24.0,3.8,0.0,1558.6,64.2,17.3,32.1,29.3,"[Drain the tofu, carefully squeezing out exces..."
4,42,Cabbage Soup,30 minutes,4.0,Vegetable,"[46, 4, 1, 2, 1]","plain tomato juice, cabbage, onion, carrots, c...",4.5,103.6,0.4,0.1,0.0,959.3,25.1,4.8,17.7,4.3,"[Mix everything together and bring to a boil.,..."


In [24]:
print(data_cleaned_copy.shape)

(334580, 18)


4. Filtering Nutritional Information for Recommendations

In [25]:
# Nutritional maximum thresholds for a single meal (assuming 3 meals a day)
healthy_thresholds_per_meal = {
    'FatContent': 35,  # Fat content per meal
    'SaturatedFatContent': 20,  # Saturated fat per meal
    'CholesterolContent': 100,  # Cholesterol per meal
    'SodiumContent': 1500,  # Sodium per meal
    'CarbohydrateContent': 50,  # Carbohydrates per meal
    'FiberContent': 20,  # Fiber per meal
    'SugarContent': 20,  # Sugar per meal
    'ProteinContent': 60  # Protein per meal
}

# Create a copy of the original dataset to preserve the data_cleaned
data_prepared = data_cleaned_copy.copy()

# Initialize a new column for health status
data_prepared['HealthStatus'] = 'Healthy'  # Assume healthy by default

# Define a function to check health status based on nutritional content
def check_health_status(row, healthy_thresholds):
    for column in healthy_thresholds:
        if column in row:
            # Check if the recipe exceeds the "Unhealthy" threshold
            if row[column] > healthy_thresholds_per_meal[column]:
                #row['HealthyCause'] = f"because of : {column}"
                return "Unhealthy"


                
    return row['HealthStatus']  # Return 'Healthy' if within limits

# Apply the function to each row of the dataset
data_prepared['HealthStatus'] = data_prepared.apply(
    lambda row: check_health_status(row, healthy_thresholds_per_meal), axis=1
)

# Display the filtered data with health status
print("\nData Filtered Based on Nutritional Information (per meal):")
print(data_prepared.describe())  # Descriptive statistics for the filtered data
print(data_prepared.shape)  # The shape of the filtered data



Data Filtered Based on Nutritional Information (per meal):
       RecipeId  RecipeServings  AggregatedRating  Calories  FatContent  \
count 334580.00       334580.00         334580.00 334580.00   334580.00   
mean  279524.69            8.67              4.63    338.56       16.67   
std   154990.90          115.17              0.46    248.77       16.17   
min       38.00            1.00              1.00      0.00        0.00   
25%   147776.50            4.00              4.63    164.10        5.20   
50%   282018.50            6.00              4.63    286.00       12.30   
75%   413835.50            8.00              5.00    450.20       23.10   
max   541379.00        32767.00              5.00   2998.10      114.10   

       SaturatedFatContent  CholesterolContent  SodiumContent  \
count            334580.00           334580.00      334580.00   
mean                  6.46               66.73         540.83   
std                   7.32               83.69        2103.87   
min 

In [26]:
# Example of viewing the health status column
print("\nHealth Status for Each Recipe (per meal):")
data_prepared.head()


Health Status for Each Recipe (per meal):


Unnamed: 0,RecipeId,Name,CookTime,RecipeServings,RecipeCategory,RecipeIngredientQuantities,RecipeIngredientParts,AggregatedRating,Calories,FatContent,SaturatedFatContent,CholesterolContent,SodiumContent,CarbohydrateContent,FiberContent,SugarContent,ProteinContent,RecipeInstructions,HealthStatus
0,38,Low-Fat Berry Blue Frozen Dessert,1 day,4.0,Frozen Desserts,"[4, 1/4, 1, 1]","blueberries, granulated sugar, vanilla yogurt,...",4.5,170.9,2.5,1.3,8.0,29.8,37.1,3.6,30.2,3.2,"[Toss 2 cups berries with sugar., Let stand fo...",Unhealthy
1,39,Biryani,25 minutes,6.0,Chicken Breast,[],"saffron, milk, hot green chili peppers, onions...",3.0,1110.7,58.8,16.6,372.8,368.4,84.4,9.0,20.4,63.4,[Soak saffron in warm milk for 5 minutes and p...,Unhealthy
2,40,Best Lemonade,5 minutes,4.0,Beverages,[],"sugar, lemons, rind of, lemon, zest of, fresh ...",4.5,311.1,0.2,0.0,0.0,1.8,81.5,0.4,77.2,0.3,"[Into a 1 quart Jar with tight fitting lid, pu...",Unhealthy
3,41,Carina's Tofu-Vegetable Kebabs,20 minutes,2.0,Soy/Tofu,"[12, 1, 2, 1, 10, 1, 3, 2, 2, 2, 1, 2, 1/2, 1/...","extra firm tofu, eggplant, zucchini, mushrooms...",4.5,536.1,24.0,3.8,0.0,1558.6,64.2,17.3,32.1,29.3,"[Drain the tofu, carefully squeezing out exces...",Unhealthy
4,42,Cabbage Soup,30 minutes,4.0,Vegetable,"[46, 4, 1, 2, 1]","plain tomato juice, cabbage, onion, carrots, c...",4.5,103.6,0.4,0.1,0.0,959.3,25.1,4.8,17.7,4.3,"[Mix everything together and bring to a boil.,...",Healthy


In [27]:
data_prepared[data_prepared['HealthStatus'].str.startswith('U')].shape

(159143, 19)

In [28]:
# Classifier les recettes par type de repas (Petit-déjeuner, Déjeuner, Dîner)
def classify_recipes(recipes):
    breakfast_keywords = [
    'Breakfast', 'Scones', 'Smoothies', 'Oatmeal', 'Breads', 'Frozen Desserts',
    'Breakfast Eggs', 'Pancakes', 'Croissants', 'Cereals', 'Yogurt', 'Coffee', 
    'Juices', 'Tarts', 'Muffins', 'Fruit', 'Toast', 'Bagels'
]

    lunch_dinner_keywords = [
    'Lunch/Snacks', 'Dinner', 'Sandwich', 'Salad', 'Soup', 'Stew', 'Chicken', 
    'Wrap', 'Pasta', 'Roast', 'Lamb', 'Meatloaf', 'Fish', 'Vegetable', 'Rice', 
    'Spaghetti', 'Duck', 'Lobster', 'Crab', 'Turkey Breasts', 'Casseroles', 
    'Main Dish Casseroles', 'Pot Roast', 'Ham', 'Seafood', 'Beans', 'Potato', 
    'Savory Pies', 'Tex Mex', 'Grilled Chicken', 'Pork', 'Meat', 'Tuna', 
    'Quiche', 'Bar Cookie', 'Roast Beef', 'Turkey', 'Salmon', 'Curries', 
    'Peppers', 'Stuffed Bell Peppers', 'Cajun', 'Asian', 'Mexican', 'Healthy', 
    'High Protein', 'Vegetarian', 'Poultry', 'Barbecue', 'Quick Breads', 
    'Cornbread', 'Coconut', 'Squash', 'Stuffing'
]

    def classify_category(category):
        if any(keyword in category for keyword in breakfast_keywords):
            return 'Breakfast'
        elif any(keyword in category for keyword in lunch_dinner_keywords):
            return 'Dinner_Lunch'
        else:
            return 'Other'
    
    recipes['MealType'] = recipes['RecipeCategory'].apply(classify_category)
    return recipes



In [30]:
data_prepared = classify_recipes(data_prepared)


Deployment

In [33]:
# Save data for deployment
data_prepared.to_csv('cleaned_recipes_.csv', index=False)
print("Data Saved for Deployment.")

Data Saved for Deployment.


In [31]:
data_prepared

Unnamed: 0,RecipeId,Name,CookTime,RecipeServings,RecipeCategory,RecipeIngredientQuantities,RecipeIngredientParts,AggregatedRating,Calories,FatContent,SaturatedFatContent,CholesterolContent,SodiumContent,CarbohydrateContent,FiberContent,SugarContent,ProteinContent,RecipeInstructions,HealthStatus,MealType
0,38,Low-Fat Berry Blue Frozen Dessert,1 day,4.00,Frozen Desserts,"[4, 1/4, 1, 1]","blueberries, granulated sugar, vanilla yogurt,...",4.50,170.90,2.50,1.30,8.00,29.80,37.10,3.60,30.20,3.20,"[Toss 2 cups berries with sugar., Let stand fo...",Unhealthy,Breakfast
1,39,Biryani,25 minutes,6.00,Chicken Breast,[],"saffron, milk, hot green chili peppers, onions...",3.00,1110.70,58.80,16.60,372.80,368.40,84.40,9.00,20.40,63.40,[Soak saffron in warm milk for 5 minutes and p...,Unhealthy,Dinner_Lunch
2,40,Best Lemonade,5 minutes,4.00,Beverages,[],"sugar, lemons, rind of, lemon, zest of, fresh ...",4.50,311.10,0.20,0.00,0.00,1.80,81.50,0.40,77.20,0.30,"[Into a 1 quart Jar with tight fitting lid, pu...",Unhealthy,Other
3,41,Carina's Tofu-Vegetable Kebabs,20 minutes,2.00,Soy/Tofu,"[12, 1, 2, 1, 10, 1, 3, 2, 2, 2, 1, 2, 1/2, 1/...","extra firm tofu, eggplant, zucchini, mushrooms...",4.50,536.10,24.00,3.80,0.00,1558.60,64.20,17.30,32.10,29.30,"[Drain the tofu, carefully squeezing out exces...",Unhealthy,Other
4,42,Cabbage Soup,30 minutes,4.00,Vegetable,"[46, 4, 1, 2, 1]","plain tomato juice, cabbage, onion, carrots, c...",4.50,103.60,0.40,0.10,0.00,959.30,25.10,4.80,17.70,4.30,"[Mix everything together and bring to a boil.,...",Healthy,Dinner_Lunch
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
522507,541374,MaMa's Bean Salad,unknown,15.00,Vegetable,"[1, 1, 1, 1, 1, 1, 1, 1/2, 1/2, 1/4]","green beans, English peas, red bell pepper, ce...",4.63,141.70,4.00,0.30,0.00,11.70,22.40,5.30,10.00,5.20,[Drain and rinse all the beans. (It is unneces...,Healthy,Dinner_Lunch
522508,541375,Amazing Ground Beef Stroganoff,20 minutes,4.00,Meat,"[1, 1, 1, 1/2, 1/4, 1, 1, 1, 1, 1/2]","hamburger, onion, celery, water chestnut, drie...",4.63,422.30,28.60,12.60,106.00,634.70,14.10,1.40,5.70,27.30,[Saute meat in a medium skillet until it loses...,Unhealthy,Dinner_Lunch
522509,541376,Spanish Coffee with Tia Maria,unknown,1.00,Beverages,"[1, 1, 1, 1 1/2, 6, 3, 1, 1]","lemon wedge, granulated sugar, cognac, brandy,...",4.63,84.30,2.10,1.20,6.80,15.70,16.60,0.40,15.40,0.60,[Cut a small slit in the lemon wedge and slide...,Healthy,Other
522510,541377,Slow-Cooker Classic Coffee Cake,3 hours,12.00,Breads,"[1, 1/2, 4, 2, 1/8, 1, 1, 1/2, 4, 1/2, 2 -3, 1/4]","all-purpose flour, brown sugar, butter, ground...",4.63,358.90,19.80,10.50,103.10,323.40,41.50,0.80,24.80,4.80,[Line bottom and sides of 5-quart oval slow co...,Unhealthy,Breakfast
