In [1]:
import kagglehub
import pandas as pd 
import os
import yaml

with open("config.yaml", 'r') as f:
    config = yaml.safe_load(f)

def fetch_data(dataset_name, version=None)->str:
    if version:
        dataset_name = f"{dataset_name}:{version}"
    return kagglehub.dataset_download(dataset_name)

def load_data(path,files)->pd.DataFrame:
    data_frames = {}
    for file in files : 
        file_path = os.path.join(path, file)
        if os.path.exists(file_path):
            data_frames[file] = pd.read_csv(file_path)
        else:
            raise FileNotFoundError(f"{file} introuvable dans le {path}")
    return data_frames

# Charger les données au niveau global
dataset_path = fetch_data(config['dataset']['name'])
dfs = load_data(dataset_path, config['dataset']['files'])
recipes_df = dfs.get('RAW_recipes.csv')
interactions_df = dfs.get('RAW_interactions.csv')

if __name__ == "__main__":
    for name, df in dfs.items():
        print(f"Data from {name}:")
        print(df.head())
    print(" Recipes shape:", recipes_df.shape)
    print(" Interactions shape:", interactions_df.shape)

  from .autonotebook import tqdm as notebook_tqdm


Data from RAW_interactions.csv:
   user_id  recipe_id        date  rating  \
0    38094      40893  2003-02-17       4   
1  1293707      40893  2011-12-21       5   
2     8937      44394  2002-12-01       4   
3   126440      85009  2010-02-27       5   
4    57222      85009  2011-10-01       5   

                                              review  
0  Great with a salad. Cooked on top of stove for...  
1  So simple, so delicious! Great for chilly fall...  
2  This worked very well and is EASY.  I used not...  
3  I made the Mexican topping and took it to bunk...  
4  Made the cheddar bacon topping, adding a sprin...  
Data from RAW_recipes.csv:
                                         name      id  minutes  \
0  arriba   baked winter squash mexican style  137739       55   
1            a bit different  breakfast pizza   31490       30   
2                   all in the kitchen  chili  112140      130   
3                          alouette  potatoes   59389       45   
4         

In [3]:
# Rechargement du module pour prendre en compte les modifications
import importlib
import data_prepro
importlib.reload(data_prepro)
from data_prepro import IngredientPreprocessor

# Charger le fichier CSV directement depuis le répertoire courant
preproc = IngredientPreprocessor("ingr_map.csv")
print("IngredientPreprocessor initialisé avec le fichier CSV")

# Vérifier que la carte des ingrédients est bien chargée
print(f"Nombre d'ingrédients dans la carte: {len(preproc.raw_to_normalized)}")

# Tester avec un ingrédient qui devrait être dans la carte
test_result = preproc.normalize_ingredient("4 extra virgin olive oil")
print(f"Test normalization: 'extra virgin olive oil' -> '{test_result}'")

# Tester avec quelques autres ingrédients
test_ingredients = ["large eggs", "all purpose flour", "unsalted butter"]
for ing in test_ingredients:
    result = preproc.normalize_ingredient(ing)
    print(f"'{ing}' -> '{result}'")

INFO:data_prepro:Ingredient map loaded successfully.


IngredientPreprocessor initialisé avec le fichier CSV
Nombre d'ingrédients dans la carte: 11659
Test normalization: 'extra virgin olive oil' -> 'olive oil'
'large eggs' -> 'large eggs'
'all purpose flour' -> 'all purpose flour'
'unsalted butter' -> 'unsalted butter'


In [20]:
#recipes_df.head()
recipes_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 231637 entries, 0 to 231636
Data columns (total 12 columns):
 #   Column          Non-Null Count   Dtype 
---  ------          --------------   ----- 
 0   name            231636 non-null  object
 1   id              231637 non-null  int64 
 2   minutes         231637 non-null  int64 
 3   contributor_id  231637 non-null  int64 
 4   submitted       231637 non-null  object
 5   tags            231637 non-null  object
 6   nutrition       231637 non-null  object
 7   n_steps         231637 non-null  int64 
 8   steps           231637 non-null  object
 9   description     226658 non-null  object
 10  ingredients     231637 non-null  object
 11  n_ingredients   231637 non-null  int64 
dtypes: int64(5), object(7)
memory usage: 21.2+ MB


In [21]:
#steps_list=list(recipes_df['steps'])
#steps_list[0][0]
# steps va etre convertit en objet list 
recipes_df['description'][0]
recipes_df['nutrition'][0]
#recipes_df['ingredients'][0]
#recipes_df['tags'][0]
recipes_df['ingredients'][0][0]

'['

In [22]:
interactions_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1132367 entries, 0 to 1132366
Data columns (total 5 columns):
 #   Column     Non-Null Count    Dtype 
---  ------     --------------    ----- 
 0   user_id    1132367 non-null  int64 
 1   recipe_id  1132367 non-null  int64 
 2   date       1132367 non-null  object
 3   rating     1132367 non-null  int64 
 4   review     1132198 non-null  object
dtypes: int64(3), object(2)
memory usage: 43.2+ MB


In [46]:
interactions_df.head()

Unnamed: 0,user_id,recipe_id,date,rating,review
0,38094,40893,2003-02-17,4,Great with a salad. Cooked on top of stove for...
1,1293707,40893,2011-12-21,5,"So simple, so delicious! Great for chilly fall..."
2,8937,44394,2002-12-01,4,This worked very well and is EASY. I used not...
3,126440,85009,2010-02-27,5,I made the Mexican topping and took it to bunk...
4,57222,85009,2011-10-01,5,"Made the cheddar bacon topping, adding a sprin..."


In [23]:
correlation = recipes_df[['minutes', 'n_ingredients']].corr()
print(f"Corrélation temps/ingrédients: {correlation.iloc[0,1]:.3f}")

Corrélation temps/ingrédients: -0.001


In [None]:
#recipes_df['steps_count'] = recipes_df['steps'].apply(lambda x: len(x.split('||')) if pd.notnull(x) else 0)
recipes_df['steps'][1]

"['preheat oven to 425 degrees f', 'press dough into the bottom and sides of a 12 inch pizza pan', 'bake for 5 minutes until set but not browned', 'cut sausage into small pieces', 'whisk eggs and milk in a bowl until frothy', 'spoon sausage over baked crust and sprinkle with cheese', 'pour egg mixture slowly over sausage and cheese', 's& p to taste', 'bake 15-20 minutes or until eggs are set and crust is brown']"

In [10]:
recipes_df.shape

(231637, 12)

In [None]:
#print(fastest[['name', 'minutes', 'n_ingredients']])
#recipes_df[recipes_df['minutes'] == 0].count()
# imputing missing values de minutes en utilisant la variable nombre d'ingrédients 
recipes_df[recipes_df['minutes'] == 0].head()

Unnamed: 0,name,id,minutes,contributor_id,submitted,tags,nutrition,n_steps,steps,description,ingredients,n_ingredients
5,apple a day milk shake,5289,0,1533,1999-12-06,"['15-minutes-or-less', 'time-to-make', 'course...","[160.2, 10.0, 55.0, 3.0, 9.0, 20.0, 7.0]",4,"['combine ingredients in blender', 'cover and ...",,"['milk', 'vanilla ice cream', 'frozen apple ju...",4
2451,acorn magic delights,1712,0,1534,1999-10-01,"['15-minutes-or-less', 'time-to-make', 'course...","[148.4, 15.0, 28.0, 2.0, 3.0, 21.0, 4.0]",13,"['melt the butter or margarine over low heat',...",,"['butter', 'brown sugar', 'pecans', 'all-purpo...",7
3079,albanian byrek,4880,0,1534,1999-11-24,"['15-minutes-or-less', 'time-to-make', 'course...","[354.4, 42.0, 25.0, 59.0, 37.0, 37.0, 2.0]",14,"['prepare the dough with flour , 1 and a half ...","the directions to this are vague, but maybe yo...","['flour', 'water', 'oil', 'vinegar', 'salt', '...",9
3193,alfredo sauce with pasta,3258,0,1534,1999-10-10,"['15-minutes-or-less', 'time-to-make', 'course...","[1902.9, 287.0, 5.0, 140.0, 104.0, 583.0, 3.0]",8,['cook noodles or fettuccine according to pack...,,"['butter', 'heavy cream', 'parmesan cheese', '...",6
3259,alice s doughnuts,2284,0,1752,1999-10-18,"['15-minutes-or-less', 'time-to-make', 'course...","[107.3, 6.0, 20.0, 3.0, 3.0, 10.0, 4.0]",17,"['in a large bowl , beat the eggs until foamy'...",,"['eggs', 'sugar', 'milk', 'shortening', 'vanil...",9


In [None]:
fastest = recipes_df.nsmallest(10, 'minutes')


In [25]:
# catégories des ingrédients
categories = {
        'proteins': ['chicken', 'beef', 'pork', 'fish', 'salmon', 'tuna', 'shrimp', 
                     'turkey', 'lamb', 'egg', 'tofu', 'tempeh'],
        'dairy': ['milk', 'cheese', 'butter', 'cream', 'yogurt', 'sour cream'],
        'vegetables': ['tomato', 'onion', 'garlic', 'carrot', 'potato', 'broccoli',
                       'spinach', 'pepper', 'mushroom', 'lettuce', 'cucumber'],
        'fruits': ['apple', 'banana', 'orange', 'lemon', 'strawberry', 'blueberry'],
        'grains': ['flour', 'rice', 'pasta', 'bread', 'oat', 'quinoa', 'wheat'],
        'spices': ['salt', 'pepper', 'cumin', 'paprika', 'cinnamon', 'basil', 
                   'oregano', 'thyme', 'rosemary'],
        'oils': ['olive oil', 'vegetable oil', 'coconut oil', 'butter'],
        'sweeteners': ['sugar', 'honey', 'maple syrup', 'brown sugar']
    }


In [30]:
subset = recipes_df.head(500).copy()
subset["clean_ingredients"] = subset["ingredients"].apply(preproc.parse_and_clean)
subset[["ingredients", "clean_ingredients"]].head()

Unnamed: 0,ingredients,clean_ingredients
0,"['winter squash', 'mexican seasoning', 'mixed ...","[olive oil, butter, honey, winter squash, salt..."
1,"['prepared pizza crust', 'sausage patty', 'egg...","[sausage, egg, milk, pizza crust, salt and pep..."
2,"['ground beef', 'yellow onions', 'diced tomato...","[yellow onion, ground cumin, lettuce, ground b..."
3,"['spreadable cheese with garlic and herbs', 'n...","[olive oil, yellow bell pepper, red bell peppe..."
4,"['tomato juice', 'apple cider vinegar', 'sugar...","[pepper, clove oil, cinnamon oil, sugar, salt,..."


In [31]:
subset["ingredient_categories"] = subset["clean_ingredients"].apply(preproc.categorize)
subset["ingredient_categories"].head()

0    {'oils': ['olive oil', 'butter'], 'sweeteners'...
1    {'other': ['sausage', 'pizza crust'], 'protein...
2    {'vegetables': ['yellow onion', 'lettuce', 'ro...
3    {'oils': ['olive oil'], 'spices': ['yellow bel...
4    {'spices': ['pepper', 'cinnamon oil', 'salt'],...
Name: ingredient_categories, dtype: object

In [36]:
tests = ["extra virgin olive oil", "large eggs", "fresh basil leaves", "granulated sugar", "unsalted butter"]
for t in tests:
    print(t, "->", preproc.normalize_ingredient(t))

extra virgin olive oil -> olive oil
large eggs -> large eggs
fresh basil leaves -> fresh basil leaf
granulated sugar -> granulated sugar
unsalted butter -> unsalted butter


In [37]:
from collections import Counter
all_clean = [ing for lst in subset["clean_ingredients"] for ing in lst]
Counter(all_clean).most_common(15)

[('salt', 178),
 ('egg', 125),
 ('butter', 111),
 ('onion', 109),
 ('sugar', 80),
 ('water', 76),
 ('milk', 74),
 ('flmy', 69),
 ('pepper', 60),
 ('garlic clove', 57),
 ('olive oil', 54),
 ('brown sugar', 46),
 ('vanilla', 42),
 ('baking powder', 39),
 ('baking soda', 37)]

In [38]:
subset["clean_ingredients"].apply(len).describe()

count    500.000000
mean       9.092000
std        4.042303
min        2.000000
25%        6.000000
50%        9.000000
75%       11.250000
max       23.000000
Name: clean_ingredients, dtype: float64

In [42]:
raw_ingredients = ["2 Cups All-Purpose Flour", "Fresh Garlic", "Olive Oil", "Ground black pepper", "Diced Tomatoes"]
cleaned = [preproc.normalize_ingredient(x) for x in raw_ingredients]
cleaned

['cups all-purpose flour', 'garlic', 'olive oil', 'black pepper', 'tomatoes']

In [43]:
tests = ["extra virgin olive oil", "large eggs", "fresh basil leaves", "granulated sugar", "unsalted butter"]
for t in tests:
    print(t, "->", preproc.normalize_ingredient(t))

extra virgin olive oil -> extra virgin olive oil
large eggs -> large eggs
fresh basil leaves -> basil leaves
granulated sugar -> granulated sugar
unsalted butter -> unsalted butter


Fin de la première étape de pre-traitement. Nous avons donc réussi à convertir le fichier pkl en csv et l'exploiter pour normaliser nos données textuelles.

On va donc appliquer la fonction normalisation sur la variable ingredients de notre dataset qui est dans recipes_df. 

In [75]:
# Appliquer la normalisation sur toute la colonne ingredients
# Ajouter directement une nouvelle colonne au DataFrame existant
print("Normalisation en cours...")

# Appliquer la fonction parse_and_clean sur chaque recette
recipes_df["normalized_ingredients"] = recipes_df["ingredients"].apply(preproc.parse_and_clean)

print("✅ Normalisation terminée!")
print(f"Forme du dataset: {recipes_df.shape}")


Normalisation en cours...
✅ Normalisation terminée!
Forme du dataset: (231637, 13)
✅ Normalisation terminée!
Forme du dataset: (231637, 13)


In [76]:
# Comparer quelques exemples avant/après
print("\n📋 Exemples avant/après normalisation:")
for i in range(3):
    print(f"\n--- Recette {i+1} ---")
    print(f"Avant: {recipes_df.iloc[i]['ingredients']}")
    print(f"Après: {recipes_df.iloc[i]['normalized_ingredients']}")
    
recipes_df[["name", "ingredients", "normalized_ingredients"]].head()


📋 Exemples avant/après normalisation:

--- Recette 1 ---
Avant: ['winter squash', 'mexican seasoning', 'mixed spice', 'honey', 'butter', 'olive oil', 'salt']
Après: ['olive oil', 'butter', 'honey', 'winter squash', 'salt', 'mixed spice', 'mexican seasoning']

--- Recette 2 ---
Avant: ['prepared pizza crust', 'sausage patty', 'eggs', 'milk', 'salt and pepper', 'cheese']
Après: ['eggs', 'milk', 'salt and pepper', 'cheese', 'sausage patty', 'prepared pizza crust']

--- Recette 3 ---
Avant: ['ground beef', 'yellow onions', 'diced tomatoes', 'tomato paste', 'tomato soup', 'rotel tomatoes', 'kidney beans', 'water', 'chili powder', 'ground cumin', 'salt', 'lettuce', 'cheddar cheese']
Après: ['lettuce', 'yellow onions', 'chili powder', 'tomatoes', 'cumin', 'rotel tomatoes', 'water', 'tomato paste', 'cheddar cheese', 'salt', 'kidney beans', 'tomato soup', 'beef']


Unnamed: 0,name,ingredients,normalized_ingredients
0,arriba baked winter squash mexican style,"['winter squash', 'mexican seasoning', 'mixed ...","[olive oil, butter, honey, winter squash, salt..."
1,a bit different breakfast pizza,"['prepared pizza crust', 'sausage patty', 'egg...","[eggs, milk, salt and pepper, cheese, sausage ..."
2,all in the kitchen chili,"['ground beef', 'yellow onions', 'diced tomato...","[lettuce, yellow onions, chili powder, tomatoe..."
3,alouette potatoes,"['spreadable cheese with garlic and herbs', 'n...","[olive oil, yellow bell pepper, new potatoes, ..."
4,amish tomato ketchup for canning,"['tomato juice', 'apple cider vinegar', 'sugar...","[pepper, clove oil, cinnamon oil, sugar, salt,..."


In [84]:
# Analyser les résultats de la normalisation
print("📊 Analyse des ingrédients normalisés:")
print(f"Nombre total de recettes: {len(recipes_df)}")

# Calculer le nombre moyen d'ingrédients par recette
avg_ingredients = recipes_df["normalized_ingredients"].apply(len).mean()
print(f"Nombre moyen d'ingrédients par recette: {avg_ingredients:.1f}")

# Top 20 des ingrédients les plus fréquents après normalisation
from collections import Counter
all_normalized_ingredients = [ing for ingredients_list in recipes_df["normalized_ingredients"] for ing in ingredients_list]
most_common = Counter(all_normalized_ingredients).most_common(20)

print("\n🥇 Top 20 des ingrédients les plus fréquents:")
for i, (ingredient, count) in enumerate(most_common, 1):
    print(f"{i:2d}. {ingredient:<20} : {count:>6,} fois")


# Afficher les colonnes du DataFrame
print(f"\n📋 Colonnes du DataFrame: {list(recipes_df.columns)}")

📊 Analyse des ingrédients normalisés:
Nombre total de recettes: 231637
Nombre moyen d'ingrédients par recette: 9.0

🥇 Top 20 des ingrédients les plus fréquents:
 1. salt                 : 85,746 fois
 2. butter               : 54,975 fois
 3. sugar                : 44,535 fois
 4. onion                : 39,786 fois
 5. water                : 34,926 fois
 6. eggs                 : 33,761 fois
 7. olive oil            : 32,822 fois
 8. garlic cloves        : 26,723 fois
 9. pepper               : 26,633 fois
10. flour                : 26,266 fois
11. milk                 : 25,799 fois
12. black pepper         : 24,271 fois
13. lemon juice          : 19,506 fois
14. cinnamon             : 19,316 fois
15. garlic               : 19,072 fois
16. brown sugar          : 18,655 fois
17. all-purpose flour    : 17,659 fois
18. baking powder        : 17,504 fois
19. egg                  : 17,304 fois
20. tomatoes             : 16,602 fois

📋 Colonnes du DataFrame: ['name', 'id', 'minutes', 'contri

In [4]:
from data_prepro import NutritionPreprocessor
# Nouvelle cellule - Test de NutritionPreprocessor
print("🧪 TEST DE LA CLASSE NutritionPreprocessor")
print("=" * 50)

# Recharger le module pour prendre en compte les modifications
import importlib
importlib.reload(data_prepro)
from data_prepro import NutritionPreprocessor

# Créer une instance du preprocessor
nutrition_processor = NutritionPreprocessor()

# Test 1: Parsing d'une chaîne nutrition normale
print("\n📊 Test 1: Parsing nutrition normale")
test_nutrition_str = "[200.5, 10.2, 15.8, 25.0, 5.5, 12.3, 800.0]"
print(f"Input: {test_nutrition_str}")
parsed_nutrition = nutrition_processor.parse_nutrition(test_nutrition_str)
print(f"Output: {parsed_nutrition}")

# Test 2: Calcul du health score
print(f"\n🏥 Test 2: Calcul du health score")
health_score = nutrition_processor.compute_health_score(parsed_nutrition)
print(f"Health Score: {health_score}")

# Test 3: Exemple avec des données réelles du dataset
print(f"\n🥗 Test 3: Données réelles du dataset")
if 'nutrition' in recipes_df.columns:
    real_nutrition_str = recipes_df['nutrition'].iloc[0]
    print(f"Nutrition originale: {real_nutrition_str}")
    
    real_parsed = nutrition_processor.parse_nutrition(real_nutrition_str)
    print(f"Parsed: {real_parsed}")
    
    real_health_score = nutrition_processor.compute_health_score(real_parsed)
    print(f"Health Score: {real_health_score}")

# Test 4: Cas d'erreur - chaîne malformée
print(f"\n❌ Test 4: Gestion d'erreurs")
malformed_str = "[200.5, 10.2, invalid, 25.0]"
print(f"Input malformé: {malformed_str}")
error_result = nutrition_processor.parse_nutrition(malformed_str)
print(f"Résultat: {error_result}")

# Test 5: Test avec plusieurs exemples du dataset
print(f"\n📈 Test 5: Analyse de plusieurs recettes")
sample_size = 10
nutrition_results = []

for i in range(min(sample_size, len(recipes_df))):
    nutrition_str = recipes_df['nutrition'].iloc[i]
    parsed = nutrition_processor.parse_nutrition(nutrition_str)
    if parsed:  # Si le parsing a réussi
        health_score = nutrition_processor.compute_health_score(parsed)
        nutrition_results.append({
            'recipe_id': i,
            'calories': parsed.get('calories', 0),
            'protein': parsed.get('protein', 0),
            'sugar': parsed.get('sugar', 0),
            'health_score': health_score
        })

# Afficher les résultats
print(f"\n📋 Résultats pour {len(nutrition_results)} recettes:")
print(f"{'ID':<3} {'Calories':<8} {'Protein':<7} {'Sugar':<6} {'Health Score':<12}")
print("-" * 40)
for result in nutrition_results:
    print(f"{result['recipe_id']:<3} {result['calories']:<8.1f} {result['protein']:<7.1f} "
          f"{result['sugar']:<6.1f} {result['health_score']:<12.2f}")

# Test 6: Comparaison de différents profils nutritionnels
print(f"\n🔬 Test 6: Comparaison de profils nutritionnels")

test_profiles = [
    {
        'name': 'Recette saine',
        'nutrition': [300, 8, 12, 40, 5, 20, 500]  # Faible en calories, bon en protéines
    },
    {
        'name': 'Recette riche',
        'nutrition': [800, 35, 45, 60, 15, 15, 1500]  # Riche en calories et sodium
    },
    {
        'name': 'Dessert sucré',
        'nutrition': [450, 20, 25, 55, 35, 8, 200]  # Riche en sucre
    }
]

for profile in test_profiles:
    # Simuler une chaîne nutrition
    nutrition_str = str(profile['nutrition'])
    parsed = nutrition_processor.parse_nutrition(nutrition_str)
    health_score = NutritionPreprocessor.compute_health_score(parsed)
    
    print(f"\n{profile['name']}:")
    print(f"  Calories: {parsed['calories']}")
    print(f"  Protéines: {parsed['protein']}g")
    print(f"  Sucre: {parsed['sugar']}g")
    print(f"  Sodium: {parsed['sodium']}mg")
    print(f"  🏥 Health Score: {health_score}")

print(f"\n✅ Tests terminés!")



ERROR:data_prepro:Erreur parsing nutrition: malformed node or string on line 1: <ast.Name object at 0x7f10fee61b10>


🧪 TEST DE LA CLASSE NutritionPreprocessor

📊 Test 1: Parsing nutrition normale
Input: [200.5, 10.2, 15.8, 25.0, 5.5, 12.3, 800.0]
Output: {'calories': 200.5, 'fat': 10.2, 'total_fat': 15.8, 'carbohydrates': 25.0, 'sugar': 5.5, 'protein': 12.3, 'sodium': 800.0}

🏥 Test 2: Calcul du health score
Health Score: 1

🥗 Test 3: Données réelles du dataset
Nutrition originale: [51.5, 0.0, 13.0, 0.0, 2.0, 0.0, 4.0]
Parsed: {'calories': 51.5, 'fat': 0.0, 'total_fat': 13.0, 'carbohydrates': 0.0, 'sugar': 2.0, 'protein': 0.0, 'sodium': 4.0}
Health Score: 1

❌ Test 4: Gestion d'erreurs
Input malformé: [200.5, 10.2, invalid, 25.0]
Résultat: {}

📈 Test 5: Analyse de plusieurs recettes

📋 Résultats pour 10 recettes:
ID  Calories Protein Sugar  Health Score
----------------------------------------
0   51.5     0.0     2.0    1.00        
1   173.4    35.0    22.0   1.00        
2   269.8    27.0    39.0   1.00        
3   368.1    8.0     14.0   1.00        
4   352.9    0.0     3.0    1.00        
5   1

In [83]:
recipes_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 231637 entries, 0 to 231636
Data columns (total 13 columns):
 #   Column                  Non-Null Count   Dtype 
---  ------                  --------------   ----- 
 0   name                    231636 non-null  object
 1   id                      231637 non-null  int64 
 2   minutes                 231637 non-null  int64 
 3   contributor_id          231637 non-null  int64 
 4   submitted               231637 non-null  object
 5   tags                    231637 non-null  object
 6   nutrition               231637 non-null  object
 7   n_steps                 231637 non-null  int64 
 8   steps                   231637 non-null  object
 9   description             226658 non-null  object
 10  ingredients             231637 non-null  object
 11  n_ingredients           231637 non-null  int64 
 12  normalized_ingredients  231637 non-null  object
dtypes: int64(5), object(8)
memory usage: 23.0+ MB


In [None]:
categorized_test = preproc.categorize(["chicken", "olive oil", "salt", "tomato", "basil", "sugar", "flour"])
print(categorized_test)

{'proteins': ['chicken'], 'oils': ['olive oil'], 'spices': ['salt', 'basil'], 'vegetables': ['tomato'], 'sweeteners': ['sugar'], 'grains': ['flour']}


In [None]:
categorized = recipes_df["normalized_ingredients"].apply(preproc.categorize)

In [90]:
recipes_df["normalized_ingredients"][0]

['olive oil',
 'butter',
 'honey',
 'winter squash',
 'salt',
 'mixed spice',
 'mexican seasoning']

In [89]:
categorized[0]

{'oils': ['olive oil', 'butter'],
 'sweeteners': ['honey'],
 'other': ['winter squash', 'mixed spice', 'mexican seasoning'],
 'spices': ['salt']}

In [5]:
# Nouvelle cellule - Test du RecipePreprocessor complet
print("🧪 TEST DU RECIPEPREPROCESSOR COMPLET")
print("=" * 50)

# Recharger le module
import importlib
import data_prepro
importlib.reload(data_prepro)
from data_prepro import RecipePreprocessor

# Initialiser le preprocessor
preprocessor = RecipePreprocessor()
print("✅ RecipePreprocessor initialisé avec le fichier CSV")

# Test sur UN échantillon de recette d'abord
print("\n📋 Test sur une recette individuelle:")
sample_row = recipes_df.iloc[0]  # Première recette
print(f"Recette ID: {sample_row['id']}")
print(f"Nom: {sample_row['name']}")

# Prétraiter cette recette
try:
    recipe_features = preprocessor.preprocess_recipe(sample_row)
    print(f"✅ Prétraitement réussi!")
    print(f"Ingrédients extraits: {len(recipe_features.ingredients)}")
    print(f"Catégories: {list(recipe_features.ingredient_categories.keys())}")
    print(f"Type de repas: {recipe_features.meal_type}")
    print(f"Cuisine: {recipe_features.cuisine_type}")
    print(f"Score d'effort: {recipe_features.effort_score}")
except Exception as e:
    print(f"❌ Erreur: {e}")

🧪 TEST DU RECIPEPREPROCESSOR COMPLET


INFO:data_prepro:Ingredient map loaded successfully.
INFO:data_prepro:RecipePreprocessor initialisé avec succès
INFO:data_prepro:RecipePreprocessor initialisé avec succès


✅ RecipePreprocessor initialisé avec le fichier CSV

📋 Test sur une recette individuelle:
Recette ID: 137739
Nom: arriba   baked winter squash mexican style
✅ Prétraitement réussi!
Ingrédients extraits: 7
Catégories: ['other', 'oils', 'spices', 'sweeteners']
Type de repas: None
Cuisine: mexican
Score d'effort: 0.45272727272727276


In [6]:
# Test après correction
print("🧪 TEST APRÈS CORRECTION")
print("=" * 30)

# Recharger le module modifié
import importlib
import data_prepro
importlib.reload(data_prepro)
from data_prepro import RecipePreprocessor

# Réessayer
preprocessor = RecipePreprocessor()
sample_row = recipes_df.iloc[100]

try:
    recipe_features = preprocessor.preprocess_recipe(sample_row)
    print(f"✅ Prétraitement réussi!")
    print(f"Ingrédients extraits: {len(recipe_features.ingredients)}")
    print(f"Catégories: {list(recipe_features.ingredient_categories.keys())}")
    print(f"Type de repas: {recipe_features.meal_type}")
    print(f"Cuisine: {recipe_features.cuisine_type}")
    print(f"Score d'effort: {recipe_features.effort_score}")
except Exception as e:
    print(f"❌ Erreur: {e}")
    import traceback
    traceback.print_exc()

🧪 TEST APRÈS CORRECTION


INFO:data_prepro:Ingredient map loaded successfully.
INFO:data_prepro:RecipePreprocessor initialisé avec succès
INFO:data_prepro:RecipePreprocessor initialisé avec succès


✅ Prétraitement réussi!
Ingrédients extraits: 7
Catégories: ['vegetables', 'other', 'dairy', 'spices']
Type de repas: lunch
Cuisine: indian
Score d'effort: 0.26249999999999996


In [None]:
recipes_df.head()

In [12]:
interactions_df[interactions_df['recipe_id']==40893]

Unnamed: 0,user_id,recipe_id,date,rating,review
0,38094,40893,2003-02-17,4,Great with a salad. Cooked on top of stove for...
1,1293707,40893,2011-12-21,5,"So simple, so delicious! Great for chilly fall..."


In [13]:
interactions_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1132367 entries, 0 to 1132366
Data columns (total 5 columns):
 #   Column     Non-Null Count    Dtype 
---  ------     --------------    ----- 
 0   user_id    1132367 non-null  int64 
 1   recipe_id  1132367 non-null  int64 
 2   date       1132367 non-null  object
 3   rating     1132367 non-null  int64 
 4   review     1132198 non-null  object
dtypes: int64(3), object(2)
memory usage: 43.2+ MB


In [9]:
# Test complet du RecipeScorer
print("🧪 TEST DU SYSTÈME DE SCORING")
print("=" * 50)

# Importer la classe corrigée
import preprocessing.score as score
importlib.reload(score)
from preprocessing.score import RecipScorer

# Initialiser le scorer
scorer = RecipScorer(alpha=0.5, beta=0.3, gamma=0.2)
print("✅ RecipeScorer initialisé")

# Test 1: Similarité Jaccard
print("\n📊 Test 1: Similarité Jaccard")
user_ingredients = ["chicken", "onion", "garlic", "salt", "pepper"]
recipe_ingredients_1 = ["chicken", "onion", "tomato", "salt"]
recipe_ingredients_2 = ["beef", "carrot", "potato"]

jaccard_1 = RecipScorer.jaccard_similarity(user_ingredients, recipe_ingredients_1)
jaccard_2 = RecipScorer.jaccard_similarity(user_ingredients, recipe_ingredients_2)

print(f"Ingrédients utilisateur: {user_ingredients}")
print(f"Recette 1: {recipe_ingredients_1} → Jaccard: {jaccard_1:.3f}")
print(f"Recette 2: {recipe_ingredients_2} → Jaccard: {jaccard_2:.3f}")

🧪 TEST DU SYSTÈME DE SCORING
✅ RecipeScorer initialisé

📊 Test 1: Similarité Jaccard
Ingrédients utilisateur: ['chicken', 'onion', 'garlic', 'salt', 'pepper']
Recette 1: ['chicken', 'onion', 'tomato', 'salt'] → Jaccard: 0.500
Recette 2: ['beef', 'carrot', 'potato'] → Jaccard: 0.000


In [10]:
# Test 2: Scores de base avec un échantillon
print("\n📈 Test 2: Calcul des scores de base")
# Prendre un échantillon pour test
sample_recipes = recipes_df.head(100).copy()
sample_interactions = interactions_df[interactions_df['recipe_id'].isin(sample_recipes['id'])].copy()

# Renommer la colonne pour correspondre au code
sample_interactions = sample_interactions.rename(columns={'id': 'recipe_id'})

base_scores = scorer.compute_base_score(sample_recipes, sample_interactions)
print(f"Scores calculés pour {len(base_scores)} recettes")
print("Aperçu des scores:")
print(base_scores.head())


📈 Test 2: Calcul des scores de base
Scores calculés pour 100 recettes
Aperçu des scores:
   recipe_id  mean_rating  n_reviews  mean_rating_norm  popularity
0       5060          0.0          1               0.0    0.000000
1       5289          5.0          2               1.0    0.008929
2       8559          2.0          4               0.4    0.026786
3      19208          5.0          1               1.0    0.000000
4      22123          4.5          6               0.9    0.044643


In [11]:
# Test 3: Recommandations complètes
print("\n🍽️ Test 3: Recommandations complètes")

# Utiliser les ingrédients normalisés si disponibles
if 'normalized_ingredients' in recipes_df.columns:
    user_ingredients_test = ["chicken", "onion", "garlic", "salt"]
    print(f"Ingrédients utilisateur: {user_ingredients_test}")
    
    # Obtenir des recommandations
    recommendations = scorer.recommend(
        recipes_df=recipes_df.head(1000),  # Échantillon pour test rapide
        interactions_df=interactions_df,
        user_ingredients=user_ingredients_test,
        time_limit=60,  # Recettes de moins de 60 minutes
        top_n=10
    )
    
    print(f"\n🏆 Top 10 des recommandations:")
    print("=" * 80)
    for i, row in recommendations.iterrows():
        print(f"{len(recommendations) - list(recommendations.index).index(i):2d}. {row['name'][:50]:<50}")
        print(f"    Score: {row['score']:.3f} | Jaccard: {row['jaccard']:.3f} | Rating: {row.get('mean_rating_norm', 0):.3f}")
        print(f"    Ingrédients: {row['normalized_ingredients'][:5]}...")  # Premiers 5 ingrédients
        print()



🍽️ Test 3: Recommandations complètes


In [21]:
# Test 4: Comparaison avec différents profils d'ingrédients
print("\n🔬 Test 4: Comparaison de profils d'ingrédients")

test_profiles = [
    {
        'name': 'Cuisine italienne',
        'ingredients': ['tomato', 'basil', 'mozzarella', 'pasta', 'olive oil']
    },
    {
        'name': 'Cuisine asiatique', 
        'ingredients': ['soy sauce', 'ginger', 'garlic', 'rice', 'sesame oil']
    },
    {
        'name': 'Pâtisserie',
        'ingredients': ['flour', 'sugar', 'butter', 'egg', 'vanilla']
    }
]

for profile in test_profiles:
    print(f"\n--- {profile['name']} ---")
    print(f"Ingrédients: {profile['ingredients']}")
    
    if 'normalized_ingredients' in recipes_df.columns:
        recs = scorer.recommend(
            recipes_df=recipes_df.head(500),
            interactions_df=interactions_df,
            user_ingredients=profile['ingredients'],
            top_n=3
        )
        
        print("Top 3 recommandations:")
        for idx, (_, row) in enumerate(recs.iterrows(), 1):
            print(f"  {idx}. {row['name'][:40]} (Score: {row['score']:.3f})")

print("\n✅ Tests terminés!")


🔬 Test 4: Comparaison de profils d'ingrédients

--- Cuisine italienne ---
Ingrédients: ['tomato', 'basil', 'mozzarella', 'pasta', 'olive oil']

--- Cuisine asiatique ---
Ingrédients: ['soy sauce', 'ginger', 'garlic', 'rice', 'sesame oil']

--- Pâtisserie ---
Ingrédients: ['flour', 'sugar', 'butter', 'egg', 'vanilla']

✅ Tests terminés!


In [22]:
print("Colonnes dans interactions_df:")
print(interactions_df.columns.tolist())

Colonnes dans interactions_df:
['user_id', 'recipe_id', 'date', 'rating', 'review']


In [35]:
# -*- coding: utf-8 -*-
"""
🍽️ SYSTÈME DE RECOMMANDATION DE RECETTES - VERSION OPTIMISÉE
Auteur : Mohamed Kabbaj
Description :
Ce module propose une interface de test pour un système de recommandation de recettes
basé sur les ingrédients disponibles dans le frigo de l'utilisateur.
"""
import pandas as pd
import importlib
import logging
from datetime import datetime

# --- Configuration du logging ---
logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s | %(levelname)s | %(message)s",
    datefmt="%H:%M:%S"
)

# --- Imports dynamiques pour recharger les modules ---
import data_prepro
import preprocessing.score as score
importlib.reload(data_prepro)
importlib.reload(score)

from data_prepro import IngredientPreprocessor
from preprocessing.score import RecipScorer


class RecipeRecommender:
    """Interface principale pour recommander des recettes à partir des ingrédients utilisateur."""

    def __init__(self, recipes_df, interactions_df):
        self.recipes_df = recipes_df
        self.interactions_df = interactions_df

        logging.info("Chargement du préprocesseur et du moteur de score...")
        self.ingredient_preprocessor = IngredientPreprocessor("ingr_map.csv")
        self.scorer = RecipScorer(alpha=0.5, beta=0.3, gamma=0.2)
        logging.info("✅ Système de recommandation initialisé avec succès")

    # -------------------------------------------------------
    # 🔧 Normalisation des ingrédients utilisateur
    # -------------------------------------------------------
    def normalize_user_ingredients(self, raw_ingredients):
        normalized = []
        print("\n🔄 Normalisation des ingrédients :")
        for ingredient in raw_ingredients:
            cleaned = self.ingredient_preprocessor.normalize_ingredient(ingredient)
            if cleaned:
                normalized.append(cleaned)
                print(f"  ✓ '{ingredient}' → '{cleaned}'")
            else:
                print(f"  ⚠️ '{ingredient}' non reconnu")
        return normalized

    # -------------------------------------------------------
    # 🧠 Recommandation
    # -------------------------------------------------------
    def recommend(self, ingredients, time_limit=None, n_recommendations=5):
        logging.info(f"Recommandation basée sur {len(ingredients)} ingrédients...")
        return self.scorer.recommend(
            recipes_df=self.recipes_df,
            interactions_df=self.interactions_df,
            user_ingredients=ingredients,
            time_limit=time_limit,
            top_n=n_recommendations
        )

    # -------------------------------------------------------
    # 🎯 Affichage formaté
    # -------------------------------------------------------
    def display_recommendations(self, recommendations, user_ingredients, n=3):
    
        if recommendations is None or len(recommendations) == 0:
            print("❌ Aucune recette trouvée avec ces critères")
            return

        # Prendre les n premières lignes
        top_recipes = recommendations.head(n)
        print(f"\n🏆 TOP {n} RECOMMANDATIONS ({len(top_recipes)} recettes affichées)")
        print("=" * 90)

        for i, (_, recipe) in enumerate(top_recipes.iterrows(), 1):
            print(f"\n{i}. 🍴 {recipe['name'].capitalize()}")
            print(f"   📊 Score global: {recipe['score']:.3f} | 🔗 Jaccard: {recipe['jaccard']:.3f}")

            if isinstance(recipe.get("normalized_ingredients"), list):
                ing_list = recipe["normalized_ingredients"]
                matched = set(user_ingredients) & set(ing_list)
                missing = [ing for ing in ing_list if ing not in user_ingredients]

                if matched:
                    print(f"   ✅ En commun ({len(matched)}): {', '.join(list(matched)[:5])}")
                if missing:
                    print(f"   ➕ À ajouter ({len(missing)}): {', '.join(missing[:5])}"
                        + (" ..." if len(missing) > 5 else ""))

            if "minutes" in recipe:
                print(f"   ⏱️ Temps: {recipe['minutes']} min")
            print("   " + "-" * 80)


# -------------------------------------------------------
# 🚀 Fonction principale de test (sans input interactif)
# -------------------------------------------------------
def test_recipe_recommender(recipes_df, interactions_df):
    print("\n🍽️ SYSTÈME DE RECOMMANDATION DE RECETTES")
    print("=" * 50)
    start_time = datetime.now()

    recommender = RecipeRecommender(recipes_df, interactions_df)

    test_cases = [
        {
            "name": "Cuisine italienne",
            "ingredients": ["tomato", "garlic", "basil", "pasta", "olive oil"],
            "time_limit": 45
        },
        {
            "name": "Petit-déjeuner rapide",
            "ingredients": ["eggs", "butter", "milk", "flour"],
            "time_limit": 20
        }
    ]

    for case in test_cases:
        print(f"\n{'=' * 90}")
        print(f"🧪 TEST: {case['name']}")
        print(f"{'=' * 90}")

        normalized = recommender.normalize_user_ingredients(case["ingredients"])

        if not normalized:
            print(" Aucun ingrédient valide trouvé")
            continue

        try:
            recommendations = recommender.recommend(
                ingredients=normalized,
                time_limit=case.get("time_limit"),
                n_recommendations=3
            )
            recommender.display_recommendations(recommendations, normalized)
        except Exception as e:
            logging.error(f"Erreur lors du test '{case['name']}': {e}", exc_info=True)

    print(f"\n✅ Tests terminés en {(datetime.now() - start_time).seconds}s")


# Exemple d’exécution
if __name__ == "__main__":
    # Ces DataFrames doivent être importés depuis ton environnement principal
    # (par exemple depuis un notebook ou une fonction setup)
    try:
        test_recipe_recommender(recipes_df, interactions_df)
    except NameError:
        print("⚠️ Les DataFrames 'recipes_df' et 'interactions_df' doivent être définis avant d'exécuter le script.")


INFO:root:Chargement du préprocesseur et du moteur de score...



🍽️ SYSTÈME DE RECOMMANDATION DE RECETTES


INFO:data_prepro:Ingredient map loaded successfully.
INFO:root:✅ Système de recommandation initialisé avec succès
INFO:root:Recommandation basée sur 5 ingrédients...
INFO:root:✅ Système de recommandation initialisé avec succès
INFO:root:Recommandation basée sur 5 ingrédients...



🧪 TEST: Cuisine italienne

🔄 Normalisation des ingrédients :
  ✓ 'tomato' → 'tomato'
  ✓ 'garlic' → 'garlic'
  ✓ 'basil' → 'basil'
  ✓ 'pasta' → 'pastum'
  ✓ 'olive oil' → 'olive oil'


INFO:root:Recommandation basée sur 4 ingrédients...



🏆 TOP 3 RECOMMANDATIONS (3 recettes affichées)

1. 🍴 To die for crock pot roast
   📊 Score global: 0.456 | 🔗 Jaccard: 0.000
   --------------------------------------------------------------------------------

2. 🍴 Creamy cajun chicken pasta
   📊 Score global: 0.452 | 🔗 Jaccard: 0.000
   --------------------------------------------------------------------------------

3. 🍴 Best banana bread
   📊 Score global: 0.451 | 🔗 Jaccard: 0.000
   --------------------------------------------------------------------------------

🧪 TEST: Petit-déjeuner rapide

🔄 Normalisation des ingrédients :
  ✓ 'eggs' → 'egg'
  ✓ 'butter' → 'butter'
  ✓ 'milk' → 'milk'
  ✓ 'flour' → 'flmy'

🏆 TOP 3 RECOMMANDATIONS (3 recettes affichées)

1. 🍴 To die for crock pot roast
   📊 Score global: 0.456 | 🔗 Jaccard: 0.000
   --------------------------------------------------------------------------------

2. 🍴 Creamy cajun chicken pasta
   📊 Score global: 0.452 | 🔗 Jaccard: 0.000
   ---------------------------------------

In [36]:
# -*- coding: utf-8 -*-
"""
🍽️ SYSTÈME DE RECOMMANDATION DE RECETTES - MODE INTERACTIF
Auteur : Mohamed Kabbaj
"""

import logging
from datetime import datetime
from data_prepro import IngredientPreprocessor
from preprocessing.score import RecipScorer

class RecipeRecommender:
    """Interface principale pour recommander des recettes à partir des ingrédients utilisateur."""

    def __init__(self, recipes_df, interactions_df):
        self.recipes_df = recipes_df
        self.interactions_df = interactions_df

        logging.info("Chargement du préprocesseur et du moteur de score...")
        self.ingredient_preprocessor = IngredientPreprocessor("ingr_map.csv")
        self.scorer = RecipScorer(alpha=0.5, beta=0.3, gamma=0.2)
        logging.info("✅ Système de recommandation initialisé avec succès")

    def normalize_user_ingredients(self, raw_ingredients):
        normalized = []
        print("\n🔄 Normalisation des ingrédients :")
        for ingredient in raw_ingredients:
            cleaned = self.ingredient_preprocessor.normalize_ingredient(ingredient)
            if cleaned:
                normalized.append(cleaned)
                print(f"  ✓ '{ingredient}' → '{cleaned}'")
            else:
                print(f"  ⚠️ '{ingredient}' non reconnu")
        return normalized

    def recommend(self, ingredients, time_limit=None, n_recommendations=5):
        logging.info(f"Recommandation basée sur {len(ingredients)} ingrédients...")
        return self.scorer.recommend(
            recipes_df=self.recipes_df,
            interactions_df=self.interactions_df,
            user_ingredients=ingredients,
            time_limit=time_limit,
            top_n=n_recommendations
        )

    def display_recommendations(self, recommendations, user_ingredients, n=5):
        if recommendations is None or len(recommendations) == 0:
            print("❌ Aucune recette trouvée avec ces critères")
            return

        top_recipes = recommendations.head(n)
        print(f"\n🏆 TOP {n} RECOMMANDATIONS")
        print("=" * 90)

        for i, (_, recipe) in enumerate(top_recipes.iterrows(), 1):
            print(f"\n{i}. 🍽️ {recipe['name'].capitalize()}")
            print(f"   📊 Score: {recipe['score']:.3f} | 🔗 Jaccard: {recipe['jaccard']:.3f}")

            if isinstance(recipe.get("normalized_ingredients"), list):
                ing_list = recipe["normalized_ingredients"]
                matched = set(user_ingredients) & set(ing_list)
                missing = [ing for ing in ing_list if ing not in user_ingredients]

                if matched:
                    print(f"   ✅ En commun: {', '.join(list(matched)[:5])}")
                if missing:
                    print(f"   ➕ À ajouter: {', '.join(missing[:5])}")

            if "minutes" in recipe:
                print(f"   ⏱️ Temps: {recipe['minutes']} min")
            print("   " + "-" * 80)


# -------------------------------------------------------
# 🚀 MODE INTERACTIF UTILISATEUR
# -------------------------------------------------------
def interactive_recommendation(recipes_df, interactions_df):
    print("\n🍳 Bienvenue dans le Système de Recommandation de Recettes 🍳")
    print("=" * 70)
    print("Entrez les ingrédients que vous avez dans votre frigo (séparés par des virgules).")
    print("Exemple : tomato, cheese, basil, pasta")
    print("Tapez 'exit' pour quitter.\n")

    recommender = RecipeRecommender(recipes_df, interactions_df)

    while True:
        user_input = input("\n📝 Vos ingrédients : ").strip().lower()
        if user_input in ["exit", "quit", "q"]:
            print("👋 Fin de la session. Bon appétit !")
            break

        raw_ingredients = [ing.strip() for ing in user_input.split(",") if ing.strip()]
        if not raw_ingredients:
            print("⚠️ Veuillez entrer au moins un ingrédient.")
            continue

        normalized = recommender.normalize_user_ingredients(raw_ingredients)

        time_limit = input("⏱️ Temps max (en minutes) [appuyez sur Entrée pour ignorer] : ").strip()
        time_limit = int(time_limit) if time_limit.isdigit() else None

        print("\n🔍 Recherche de recettes correspondantes...")
        recommendations = recommender.recommend(
            ingredients=normalized,
            time_limit=time_limit,
            n_recommendations=5
        )

        recommender.display_recommendations(recommendations, normalized, n=5)


In [41]:
print("🎯 TEST FINAL - SYSTÈME DE RECOMMANDATION COMPLET")
print("="*60)

# Test 1: Vérifier que le preprocessor fonctionne
print("📊 1. Test du Preprocessor")
print(f"   ✅ Preprocessor créé: {type(preprocessor)}")
print(f"   ✅ Recipes chargées: {len(recipes_df)} recettes")
print(f"   ✅ Interactions chargées: {len(interactions_df)} interactions")

# Test 2: Vérifier le scorer
print("\n🏆 2. Test du Scorer")
print(f"   ✅ Scorer créé: {type(scorer)}")

# Test 3: Test de recommandation complète
print("\n🥕 3. Test de Recommandation Complète")
test_ingredients = ["chicken", "onion", "garlic"]
print(f"   🔎 Ingrédients utilisateur: {test_ingredients}")

try:
    # Utiliser un échantillon pour le test (plus rapide)
    sample_recipes = recipes_df.sample(min(1000, len(recipes_df)), random_state=42)
    sample_interactions = interactions_df[interactions_df['recipe_id'].isin(sample_recipes['id'])]
    
    print(f"   📋 Échantillon: {len(sample_recipes)} recettes, {len(sample_interactions)} interactions")
    
    # Faire la recommandation
    recommendations = scorer.recommend(
        recipes_df=sample_recipes,
        interactions_df=sample_interactions,
        user_ingredients=test_ingredients,
        time_limit=60,
        top_n=5
    )
    
    print(f"   ✅ Recommandations générées: {len(recommendations)}")
    print("\n📋 TOP 5 RECOMMANDATIONS:")
    
    for i, (_, rec) in enumerate(recommendations.iterrows(), 1):
        print(f"   {i}. {rec['name'][:50]}...")
        print(f"      🎯 Score: {rec['score']:.3f} | 🥕 Jaccard: {rec['jaccard']:.3f}")
        if 'minutes' in rec:
            print(f"      ⏱️ Temps: {rec['minutes']} min")
        
        # Afficher les ingrédients si disponibles
        ing_col = 'normalized_ingredients' if 'normalized_ingredients' in rec else 'ingredients'
        if ing_col in rec and pd.notnull(rec[ing_col]):
            ingredients = rec[ing_col][:5] if isinstance(rec[ing_col], list) else []
            if ingredients:
                print(f"      🥄 Ingrédients: {', '.join(ingredients)}")
        print()
    
    print("🎉 SYSTÈME FONCTIONNEL !")
    
except Exception as e:
    print(f"   ❌ Erreur lors de la recommandation: {e}")
    import traceback
    traceback.print_exc()

🎯 TEST FINAL - SYSTÈME DE RECOMMANDATION COMPLET
📊 1. Test du Preprocessor
   ✅ Preprocessor créé: <class 'data_prepro.RecipePreprocessor'>
   ✅ Recipes chargées: 231637 recettes
   ✅ Interactions chargées: 1132367 interactions

🏆 2. Test du Scorer
   ✅ Scorer créé: <class 'reco_score.RecipScorer'>

🥕 3. Test de Recommandation Complète
   🔎 Ingrédients utilisateur: ['chicken', 'onion', 'garlic']
   📋 Échantillon: 1000 recettes, 4333 interactions
⏱️ Filtrage temps: 1000 → 738 recettes
🥕 Utilisation de la colonne: ingredients
📊 Stats calculées pour 1000 recettes
🔗 Après fusion: 738 recettes
🏆 Retour de 5 recommandations
   ✅ Recommandations générées: 5

📋 TOP 5 RECOMMANDATIONS:
   1. the thigh who loved me...
      🎯 Score: 0.471 | 🥕 Jaccard: 0.000
      ⏱️ Temps: 50 min

   2. cinnamon loaf...
      🎯 Score: 0.454 | 🥕 Jaccard: 0.000
      ⏱️ Temps: 60 min

   3. chicken souvlaki marinade...
      🎯 Score: 0.423 | 🥕 Jaccard: 0.000
      ⏱️ Temps: 20 min

   4. breaded  n baked zucchini c

In [42]:
print("\n🔍 DIAGNOSTIC DES INGRÉDIENTS")
print("="*40)

# Regarder quelques recettes avec leurs ingrédients
sample = sample_recipes.head(3)
for idx, (_, recipe) in enumerate(sample.iterrows(), 1):
    print(f"\n📋 Recette {idx}: {recipe['name'][:40]}...")
    
    # Ingrédients bruts
    if 'ingredients' in recipe and pd.notnull(recipe['ingredients']):
        ingredients = recipe['ingredients']
        if isinstance(ingredients, list):
            print(f"   🥄 Bruts: {ingredients[:3]}...")
        elif isinstance(ingredients, str):
            print(f"   🥄 Bruts: {ingredients[:100]}...")
    
    # Ingrédients normalisés
    if 'normalized_ingredients' in recipe and pd.notnull(recipe['normalized_ingredients']):
        norm_ing = recipe['normalized_ingredients']
        if isinstance(norm_ing, list):
            print(f"   ✅ Normalisés: {norm_ing[:3]}...")
        else:
            print(f"   ✅ Normalisés: {norm_ing}")
    else:
        print("   ❌ Pas d'ingrédients normalisés")

# Test avec ingrédients plus génériques
print("\n🧪 TEST AVEC INGRÉDIENTS GÉNÉRIQUES")
generic_ingredients = ["salt", "sugar", "flour"]
print(f"Ingrédients: {generic_ingredients}")

try:
    recs = scorer.recommend(
        recipes_df=sample_recipes.head(100),
        interactions_df=sample_interactions,
        user_ingredients=generic_ingredients,
        top_n=3
    )
    
    print(f"✅ Top 3 avec ingrédients génériques:")
    for i, (_, rec) in enumerate(recs.iterrows(), 1):
        print(f"   {i}. Score: {rec['score']:.3f} | Jaccard: {rec['jaccard']:.3f}")
        print(f"      {rec['name'][:50]}...")
        
except Exception as e:
    print(f"❌ Erreur: {e}")


🔍 DIAGNOSTIC DES INGRÉDIENTS

📋 Recette 1: crab filled crescent snacks...
   🥄 Bruts: ['crabmeat', 'cream cheese', 'green onions', 'garlic salt', 'refrigerated crescent dinner rolls', 'e...
   ❌ Pas d'ingrédients normalisés

📋 Recette 2: curried bean salad...
   🥄 Bruts: ['garbanzo beans', 'black beans', 'onion', 'ginger paste', 'mild curry powder', 'dried cilantro', 'l...
   ❌ Pas d'ingrédients normalisés

📋 Recette 3: delicious steak with onion marinade...
   🥄 Bruts: ['olive oil', 'red onion', 'light brown sugar', 'balsamic vinegar', 'steaks']...
   ❌ Pas d'ingrédients normalisés

🧪 TEST AVEC INGRÉDIENTS GÉNÉRIQUES
Ingrédients: ['salt', 'sugar', 'flour']
🥕 Utilisation de la colonne: ingredients
📊 Stats calculées pour 1000 recettes
🔗 Après fusion: 100 recettes
🏆 Retour de 3 recommandations
✅ Top 3 avec ingrédients génériques:
   1. Score: 0.348 | Jaccard: 0.000
      hockey puck potatoes...
   2. Score: 0.335 | Jaccard: 0.000
      oataroons...
   3. Score: 0.319 | Jaccard: 0.000
  

In [43]:
print("\n🚀 TEST AVEC DONNÉES PREPROCESSÉES")
print("="*50)

# Vérifier si nous avons des données preprocessées
if hasattr(preprocessor, 'recipes_df') and preprocessor.recipes_df is not None:
    processed_recipes = preprocessor.recipes_df
    print(f"✅ Données preprocessées disponibles: {len(processed_recipes)} recettes")
    
    # Vérifier les colonnes d'ingrédients
    if 'normalized_ingredients' in processed_recipes.columns:
        # Compter les recettes avec ingrédients normalisés
        has_norm = processed_recipes['normalized_ingredients'].notna().sum()
        print(f"✅ Recettes avec ingrédients normalisés: {has_norm}")
        
        # Prendre un échantillon des données preprocessées
        sample_processed = processed_recipes.sample(min(500, len(processed_recipes)), random_state=42)
        
        # Test avec les données preprocessées
        user_ingredients = ["chicken", "onion", "garlic"]
        print(f"\n🎯 Test avec ingrédients: {user_ingredients}")
        
        try:
            final_recs = scorer.recommend(
                recipes_df=sample_processed,
                interactions_df=interactions_df,
                user_ingredients=user_ingredients,
                time_limit=60,
                top_n=5
            )
            
            print(f"\n🏆 TOP 5 AVEC DONNÉES PREPROCESSÉES:")
            for i, (_, rec) in enumerate(final_recs.iterrows(), 1):
                print(f"   {i}. {rec['name'][:45]}...")
                print(f"      🎯 Score: {rec['score']:.3f} | 🥕 Jaccard: {rec['jaccard']:.3f}")
                if 'minutes' in rec and pd.notnull(rec['minutes']):
                    print(f"      ⏱️ Temps: {rec['minutes']} min")
                
                # Afficher ingrédients normalisés
                if 'normalized_ingredients' in rec and pd.notnull(rec['normalized_ingredients']):
                    norm_ing = rec['normalized_ingredients']
                    if isinstance(norm_ing, list) and len(norm_ing) > 0:
                        common = set(user_ingredients) & set(norm_ing)
                        print(f"      🤝 Ingrédients communs: {list(common)}")
                        print(f"      🥄 Premiers ingrédients: {norm_ing[:5]}")
                print()
            
        except Exception as e:
            print(f"❌ Erreur: {e}")
            import traceback
            traceback.print_exc()
    else:
        print("❌ Pas de colonne 'normalized_ingredients' dans les données preprocessées")
else:
    print("❌ Pas de données preprocessées disponibles")
    print("💡 Il faut d'abord exécuter le preprocessing complet")


🚀 TEST AVEC DONNÉES PREPROCESSÉES
❌ Pas de données preprocessées disponibles
💡 Il faut d'abord exécuter le preprocessing complet


In [44]:
print("🔧 LANCEMENT DU PREPROCESSING COMPLET")
print("="*50)

try:
    # Prendre un échantillon pour le preprocessing (plus rapide pour le test)
    sample_size = 5000
    sample_recipes = recipes_df.sample(min(sample_size, len(recipes_df)), random_state=42)
    sample_interactions = interactions_df[interactions_df['recipe_id'].isin(sample_recipes['id'])]
    
    print(f"📊 Échantillon pour preprocessing:")
    print(f"   - Recettes: {len(sample_recipes)}")
    print(f"   - Interactions: {len(sample_interactions)}")
    
    # Lancer le preprocessing sur l'échantillon
    print("\n⚙️ Preprocessing en cours...")
    processed_data = preprocessor.preprocess_recipes(sample_recipes)
    
    print(f"✅ Preprocessing terminé: {len(processed_data)} recettes")
    
    # Vérifier les ingrédients normalisés
    if 'normalized_ingredients' in processed_data.columns:
        has_norm = processed_data['normalized_ingredients'].notna().sum()
        print(f"✅ Recettes avec ingrédients normalisés: {has_norm}")
        
        # Regarder quelques exemples
        sample_proc = processed_data[processed_data['normalized_ingredients'].notna()].head(3)
        print("\n📋 EXEMPLES D'INGRÉDIENTS NORMALISÉS:")
        for i, (_, recipe) in enumerate(sample_proc.iterrows(), 1):
            print(f"\n{i}. {recipe['name'][:40]}...")
            
            # Ingrédients bruts
            if 'ingredients' in recipe and pd.notnull(recipe['ingredients']):
                raw_ing = recipe['ingredients']
                if isinstance(raw_ing, list):
                    print(f"   🥄 Bruts: {raw_ing[:3]}...")
            
            # Ingrédients normalisés
            norm_ing = recipe['normalized_ingredients']
            if isinstance(norm_ing, list):
                print(f"   ✅ Normalisés: {norm_ing[:3]}...")
        
        # Test de recommandation avec ingrédients normalisés
        print(f"\n🎯 TEST RECOMMANDATION AVEC INGRÉDIENTS NORMALISÉS")
        user_ingredients = ["chicken", "onion", "garlic"]
        print(f"Ingrédients utilisateur: {user_ingredients}")
        
        final_recs = scorer.recommend(
            recipes_df=processed_data,
            interactions_df=sample_interactions,
            user_ingredients=user_ingredients,
            time_limit=90,
            top_n=5
        )
        
        print(f"\n🏆 TOP 5 RECOMMANDATIONS FINALES:")
        for i, (_, rec) in enumerate(final_recs.iterrows(), 1):
            print(f"\n{i}. {rec['name'][:50]}...")
            print(f"   🎯 Score total: {rec['score']:.3f}")
            print(f"   🥕 Jaccard: {rec['jaccard']:.3f}")
            print(f"   ⭐ Rating: {rec['mean_rating_norm']:.3f}")
            print(f"   🔥 Popularité: {rec['popularity']:.3f}")
            
            if 'minutes' in rec and pd.notnull(rec['minutes']):
                print(f"   ⏱️ Temps: {rec['minutes']} min")
            
            # Ingrédients communs
            if 'normalized_ingredients' in rec and pd.notnull(rec['normalized_ingredients']):
                norm_ing = rec['normalized_ingredients']
                if isinstance(norm_ing, list):
                    common = set(user_ingredients) & set(norm_ing)
                    print(f"   🤝 Communs: {list(common)}")
                    print(f"   🥄 Ingrédients: {norm_ing[:6]}...")
    
    else:
        print("❌ Erreur: Pas de colonne 'normalized_ingredients' après preprocessing")

except Exception as e:
    print(f"❌ Erreur durant le preprocessing: {e}")
    import traceback
    traceback.print_exc()

🔧 LANCEMENT DU PREPROCESSING COMPLET
📊 Échantillon pour preprocessing:
   - Recettes: 5000
   - Interactions: 23121

⚙️ Preprocessing en cours...
❌ Erreur durant le preprocessing: 'RecipePreprocessor' object has no attribute 'preprocess_recipes'


Traceback (most recent call last):
  File "/tmp/ipykernel_40/2618704074.py", line 16, in <module>
    processed_data = preprocessor.preprocess_recipes(sample_recipes)
                     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
AttributeError: 'RecipePreprocessor' object has no attribute 'preprocess_recipes'


In [45]:
# Vérifier les méthodes disponibles du preprocessor
print("🔍 MÉTHODES DISPONIBLES DU PREPROCESSOR:")
methods = [method for method in dir(preprocessor) if not method.startswith('_')]
print(f"   {methods}")

# Utiliser la bonne méthode
print(f"\n🔧 PREPROCESSING AVEC LA BONNE MÉTHODE")
try:
    # Prendre un échantillon plus petit pour commencer
    sample_size = 1000
    sample_recipes = recipes_df.sample(min(sample_size, len(recipes_df)), random_state=42)
    sample_interactions = interactions_df[interactions_df['recipe_id'].isin(sample_recipes['id'])]
    
    print(f"📊 Échantillon: {len(sample_recipes)} recettes, {len(sample_interactions)} interactions")
    
    # Tester la méthode preprocess
    print("\n⚙️ Lancement du preprocessing...")
    processed_data = preprocessor.preprocess(sample_recipes)
    
    print(f"✅ Preprocessing réussi: {len(processed_data)} recettes")
    print(f"📋 Colonnes disponibles: {list(processed_data.columns)}")
    
    # Vérifier les ingrédients normalisés
    if 'normalized_ingredients' in processed_data.columns:
        has_norm = processed_data['normalized_ingredients'].notna().sum()
        has_list_norm = processed_data['normalized_ingredients'].apply(
            lambda x: isinstance(x, list) and len(x) > 0
        ).sum()
        
        print(f"✅ Recettes avec ingrédients normalisés: {has_norm}")
        print(f"✅ Recettes avec listes d'ingrédients: {has_list_norm}")
        
        # Exemple d'ingrédients normalisés
        if has_list_norm > 0:
            example = processed_data[
                processed_data['normalized_ingredients'].apply(
                    lambda x: isinstance(x, list) and len(x) > 0
                )
            ].iloc[0]
            
            print(f"\n📋 EXEMPLE D'INGRÉDIENTS NORMALISÉS:")
            print(f"   Recette: {example['name'][:40]}...")
            print(f"   Bruts: {example['ingredients'][:3] if isinstance(example['ingredients'], list) else 'N/A'}")
            print(f"   Normalisés: {example['normalized_ingredients'][:5]}")
            
            # Test final de recommandation
            print(f"\n🎯 TEST FINAL DE RECOMMANDATION")
            user_ingredients = ["chicken", "onion", "garlic"]
            
            recommendations = scorer.recommend(
                recipes_df=processed_data,
                interactions_df=sample_interactions,
                user_ingredients=user_ingredients,
                time_limit=60,
                top_n=5
            )
            
            print(f"\n🏆 🎉 RECOMMANDATIONS FINALES 🎉 🏆")
            for i, (_, rec) in enumerate(recommendations.iterrows(), 1):
                print(f"\n🥇 {i}. {rec['name'][:45]}...")
                print(f"   🎯 Score: {rec['score']:.3f} | 🥕 Jaccard: {rec['jaccard']:.3f}")
                
                if rec['jaccard'] > 0:  # Afficher détails si match trouvé
                    norm_ing = rec['normalized_ingredients']
                    if isinstance(norm_ing, list):
                        common = set(user_ingredients) & set(norm_ing)
                        print(f"   🤝 Ingrédients communs: {sorted(common)}")
                        print(f"   📝 Ingrédients recette: {norm_ing[:6]}...")
                
                if 'minutes' in rec and pd.notnull(rec['minutes']):
                    print(f"   ⏱️ Temps: {rec['minutes']} min")
        
        if recommendations.empty:
            print("❌ Aucune recommandation générée")
        elif recommendations['jaccard'].sum() == 0:
            print("⚠️ Aucun match d'ingrédients trouvé - vérifier la normalisation")
        else:
            max_jaccard = recommendations['jaccard'].max()
            print(f"\n🎊 SYSTÈME OPÉRATIONNEL ! Score Jaccard max: {max_jaccard:.3f}")
    
    else:
        print("❌ Pas de colonne 'normalized_ingredients' créée")

except Exception as e:
    print(f"❌ Erreur: {e}")
    import traceback
    traceback.print_exc()

🔍 MÉTHODES DISPONIBLES DU PREPROCESSOR:
   ['description_prep', 'ingredients_prep', 'nutrition_prep', 'preprocess_dataframe', 'preprocess_recipe', 'steps_prep', 'tags_prep']

🔧 PREPROCESSING AVEC LA BONNE MÉTHODE
📊 Échantillon: 1000 recettes, 4333 interactions

⚙️ Lancement du preprocessing...
❌ Erreur: 'RecipePreprocessor' object has no attribute 'preprocess'


Traceback (most recent call last):
  File "/tmp/ipykernel_40/2943854384.py", line 18, in <module>
    processed_data = preprocessor.preprocess(sample_recipes)
                     ^^^^^^^^^^^^^^^^^^^^^^^
AttributeError: 'RecipePreprocessor' object has no attribute 'preprocess'


In [46]:
print("🔧 PREPROCESSING AVEC preprocess_dataframe")
print("="*50)

try:
    # Échantillon pour test
    sample_size = 500  # Plus petit pour commencer
    sample_recipes = recipes_df.sample(min(sample_size, len(recipes_df)), random_state=42)
    sample_interactions = interactions_df[interactions_df['recipe_id'].isin(sample_recipes['id'])]
    
    print(f"📊 Échantillon: {len(sample_recipes)} recettes, {len(sample_interactions)} interactions")
    
    # Utiliser la bonne méthode
    print("\n⚙️ Preprocessing en cours...")
    processed_data = preprocessor.preprocess_dataframe(sample_recipes)
    
    print(f"✅ Preprocessing terminé: {len(processed_data)} recettes")
    print(f"📋 Colonnes: {list(processed_data.columns)}")
    
    # Vérifier les ingrédients normalisés
    if 'normalized_ingredients' in processed_data.columns:
        has_norm = processed_data['normalized_ingredients'].notna().sum()
        has_list = processed_data['normalized_ingredients'].apply(
            lambda x: isinstance(x, list) and len(x) > 0
        ).sum()
        
        print(f"✅ Recettes avec ingrédients normalisés: {has_norm}/{len(processed_data)}")
        print(f"✅ Recettes avec listes d'ingrédients: {has_list}")
        
        if has_list > 0:
            # Exemple
            example = processed_data[
                processed_data['normalized_ingredients'].apply(
                    lambda x: isinstance(x, list) and len(x) > 0
                )
            ].iloc[0]
            
            print(f"\n📋 EXEMPLE:")
            print(f"   Recette: {example['name']}")
            print(f"   Normalisés: {example['normalized_ingredients'][:5]}")
            
            # TEST FINAL 🎯
            print(f"\n🎯 🎉 TEST RECOMMANDATION FINALE 🎉")
            user_ingredients = ["chicken", "onion", "garlic", "salt"]
            print(f"Ingrédients utilisateur: {user_ingredients}")
            
            final_recommendations = scorer.recommend(
                recipes_df=processed_data,
                interactions_df=sample_interactions,
                user_ingredients=user_ingredients,
                time_limit=90,
                top_n=5
            )
            
            print(f"\n🏆 TOP 5 RECOMMANDATIONS:")
            
            success = False
            for i, (_, rec) in enumerate(final_recommendations.iterrows(), 1):
                jaccard_score = rec['jaccard']
                total_score = rec['score']
                
                print(f"\n🥇 {i}. {rec['name'][:50]}...")
                print(f"   🎯 Score total: {total_score:.3f}")
                print(f"   🥕 Jaccard: {jaccard_score:.3f}")
                print(f"   ⭐ Rating norm: {rec.get('mean_rating_norm', 'N/A'):.3f}")
                print(f"   🔥 Popularité: {rec.get('popularity', 'N/A'):.3f}")
                
                if 'minutes' in rec and pd.notnull(rec['minutes']):
                    print(f"   ⏱️ Temps: {rec['minutes']} min")
                
                # Détails des ingrédients si match
                if jaccard_score > 0 and 'normalized_ingredients' in rec:
                    norm_ing = rec['normalized_ingredients']
                    if isinstance(norm_ing, list):
                        common = set(user_ingredients) & set(norm_ing)
                        print(f"   🤝 Ingrédients communs: {sorted(common)}")
                        print(f"   📝 Tous les ingrédients: {norm_ing}")
                        success = True
                elif isinstance(rec.get('normalized_ingredients'), list):
                    print(f"   📝 Ingrédients de la recette: {rec['normalized_ingredients'][:5]}...")
            
            # Résumé final
            max_jaccard = final_recommendations['jaccard'].max()
            has_matches = (final_recommendations['jaccard'] > 0).sum()
            
            print(f"\n🎊 RÉSULTAT FINAL:")
            print(f"   ✅ Système opérationnel: OUI")
            print(f"   📊 Recommandations générées: {len(final_recommendations)}")
            print(f"   🥕 Score Jaccard maximum: {max_jaccard:.3f}")
            print(f"   🤝 Recettes avec ingrédients communs: {has_matches}")
            
            if max_jaccard > 0:
                print(f"   🎉 SUCCESS: Le système trouve des correspondances d'ingrédients!")
            else:
                print(f"   ⚠️  INFO: Pas de correspondance exacte, mais le système fonctionne")
        
        else:
            print("❌ Aucune recette avec des ingrédients normalisés sous forme de liste")
    
    else:
        print("❌ Colonne 'normalized_ingredients' manquante")

except Exception as e:
    print(f"❌ Erreur finale: {e}")
    import traceback
    traceback.print_exc()

INFO:data_prepro:Début du prétraitement de 500 recettes
INFO:data_prepro:Prétraitement terminé: 500 recettes traitées


🔧 PREPROCESSING AVEC preprocess_dataframe
📊 Échantillon: 500 recettes, 2212 interactions

⚙️ Preprocessing en cours...
✅ Preprocessing terminé: 500 recettes
📋 Colonnes: ['recipe_id', 'ingredients', 'ingredient_categories', 'normalized_ingredients_list', 'nutrition_dict', 'tags', 'meal_type', 'dietary_restrictions', 'cuisine_type', 'n_steps', 'effort_score', 'cooking_techniques', 'description_keywords']
❌ Colonne 'normalized_ingredients' manquante


In [47]:
print("🔄 CORRECTION: Utiliser normalized_ingredients_list")
print("="*55)

# Vérifier les données preprocessées
print(f"✅ Colonnes disponibles: {list(processed_data.columns)}")

# Vérifier la colonne normalized_ingredients_list  
if 'normalized_ingredients_list' in processed_data.columns:
    has_norm = processed_data['normalized_ingredients_list'].notna().sum()
    has_list = processed_data['normalized_ingredients_list'].apply(
        lambda x: isinstance(x, list) and len(x) > 0
    ).sum()
    
    print(f"✅ Recettes avec ingrédients normalisés: {has_norm}/{len(processed_data)}")
    print(f"✅ Recettes avec listes d'ingrédients: {has_list}")
    
    if has_list > 0:
        # Regarder un exemple
        example = processed_data[
            processed_data['normalized_ingredients_list'].apply(
                lambda x: isinstance(x, list) and len(x) > 0
            )
        ].iloc[0]
        
        print(f"\n📋 EXEMPLE D'INGRÉDIENTS NORMALISÉS:")
        print(f"   Recipe ID: {example['recipe_id']}")
        print(f"   Ingrédients bruts: {example.get('ingredients', 'N/A')}")
        print(f"   Ingrédients normalisés: {example['normalized_ingredients_list'][:5]}...")
        
        # Créer une copie avec le bon nom de colonne pour le scorer
        processed_for_scorer = processed_data.copy()
        processed_for_scorer['normalized_ingredients'] = processed_data['normalized_ingredients_list']
        
        # Il faut aussi les autres colonnes (id, name, minutes)
        # Merger avec les données originales
        processed_with_orig = processed_for_scorer.merge(
            sample_recipes[['id', 'name', 'minutes']].rename(columns={'id': 'recipe_id'}),
            on='recipe_id',
            how='left'
        )
        processed_with_orig['id'] = processed_with_orig['recipe_id']
        
        print(f"\n📊 Données préparées pour le scorer:")
        print(f"   Colonnes: {list(processed_with_orig.columns)}")
        print(f"   Taille: {len(processed_with_orig)} recettes")
        
        # TEST FINAL DE RECOMMANDATION 🎯
        print(f"\n🎯 🎉 TEST RECOMMANDATION FINAL 🎉")
        user_ingredients = ["chicken", "onion", "garlic", "salt"]
        print(f"Ingrédients utilisateur: {user_ingredients}")
        
        try:
            final_recs = scorer.recommend(
                recipes_df=processed_with_orig,
                interactions_df=sample_interactions,
                user_ingredients=user_ingredients,
                time_limit=120,
                top_n=5
            )
            
            print(f"\n🏆 TOP 5 RECOMMANDATIONS FINALES:")
            
            for i, (_, rec) in enumerate(final_recs.iterrows(), 1):
                jaccard_score = rec['jaccard']
                total_score = rec['score']
                
                print(f"\n🥇 {i}. {rec['name'][:50]}...")
                print(f"   🎯 Score total: {total_score:.3f}")
                print(f"   🥕 Jaccard: {jaccard_score:.3f}")
                print(f"   ⭐ Rating: {rec.get('mean_rating_norm', 0):.3f}")
                print(f"   🔥 Popularité: {rec.get('popularity', 0):.3f}")
                
                if 'minutes' in rec and pd.notnull(rec['minutes']):
                    print(f"   ⏱️ Temps: {rec['minutes']} min")
                
                # Afficher les correspondances d'ingrédients
                if jaccard_score > 0 and 'normalized_ingredients' in rec:
                    norm_ing = rec['normalized_ingredients']
                    if isinstance(norm_ing, list):
                        common = set(user_ingredients) & set(norm_ing)
                        print(f"   🤝 Ingrédients communs: {sorted(common)} ({len(common)}/{len(norm_ing)})")
                        print(f"   📝 Ingrédients recette: {norm_ing}")
                
                elif 'normalized_ingredients' in rec and isinstance(rec['normalized_ingredients'], list):
                    print(f"   📝 Ingrédients: {rec['normalized_ingredients'][:6]}...")
            
            # Statistiques finales
            max_jaccard = final_recs['jaccard'].max()
            avg_jaccard = final_recs['jaccard'].mean()
            has_matches = (final_recs['jaccard'] > 0).sum()
            
            print(f"\n🎊 🎊 SYSTÈME OPÉRATIONNEL ! 🎊 🎊")
            print(f"   ✅ Recommandations: {len(final_recs)}")
            print(f"   🥕 Jaccard max: {max_jaccard:.3f}")
            print(f"   📊 Jaccard moyen: {avg_jaccard:.3f}")
            print(f"   🤝 Recettes avec matches: {has_matches}/5")
            
            if max_jaccard > 0:
                print(f"   🎉 PARFAIT: Le système trouve des correspondances d'ingrédients!")
            else:
                print(f"   💡 INFO: Système fonctionnel, optimiser la normalisation des ingrédients")
            
            print(f"\n✅ ✅ ✅ TOUTES LES CORRECTIONS APPLIQUÉES AVEC SUCCÈS ! ✅ ✅ ✅")
        
        except Exception as e:
            print(f"❌ Erreur recommandation: {e}")
            import traceback
            traceback.print_exc()
    
    else:
        print("❌ Pas d'ingrédients normalisés sous forme de liste")
else:
    print("❌ Colonne normalized_ingredients_list introuvable")

🔄 CORRECTION: Utiliser normalized_ingredients_list
✅ Colonnes disponibles: ['recipe_id', 'ingredients', 'ingredient_categories', 'normalized_ingredients_list', 'nutrition_dict', 'tags', 'meal_type', 'dietary_restrictions', 'cuisine_type', 'n_steps', 'effort_score', 'cooking_techniques', 'description_keywords']
✅ Recettes avec ingrédients normalisés: 500/500
✅ Recettes avec listes d'ingrédients: 500

📋 EXEMPLE D'INGRÉDIENTS NORMALISÉS:
   Recipe ID: 94947
   Ingrédients bruts: {'sweet and sour sauce', 'scallion', 'crabmeat', 'refrigerated crescent dinner roll', 'egg yolk', 'sesame seed', 'cream cheese', 'garlic salt', 'water'}
   Ingrédients normalisés: ['scallion', 'sweet and sour sauce', 'refrigerated crescent dinner roll', 'crabmeat', 'egg yolk']...

📊 Données préparées pour le scorer:
   Colonnes: ['recipe_id', 'ingredients', 'ingredient_categories', 'normalized_ingredients_list', 'nutrition_dict', 'tags', 'meal_type', 'dietary_restrictions', 'cuisine_type', 'n_steps', 'effort_score

Traceback (most recent call last):
  File "/tmp/ipykernel_40/2135436364.py", line 53, in <module>
    final_recs = scorer.recommend(
                 ^^^^^^^^^^^^^^^^^
  File "/app/reco_score.py", line 105, in recommend
    df["jaccard"] = df[ingredient_col].apply(
                    ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/tmp/poetry_cache/virtualenvs/preprocessing-9TtSrW0h-py3.11/lib/python3.11/site-packages/pandas/core/series.py", line 4943, in apply
    ).apply()
      ^^^^^^^
  File "/tmp/poetry_cache/virtualenvs/preprocessing-9TtSrW0h-py3.11/lib/python3.11/site-packages/pandas/core/apply.py", line 1422, in apply
    return self.apply_standard()
           ^^^^^^^^^^^^^^^^^^^^^
  File "/tmp/poetry_cache/virtualenvs/preprocessing-9TtSrW0h-py3.11/lib/python3.11/site-packages/pandas/core/apply.py", line 1502, in apply_standard
    mapped = obj._map_values(
             ^^^^^^^^^^^^^^^^
  File "/tmp/poetry_cache/virtualenvs/preprocessing-9TtSrW0h-py3.11/lib/python3.11/site-packages/pandas/

In [48]:
# Recharger le module corrigé
import importlib
importlib.reload(reco_score)
scorer = reco_score.RecipScorer()

print("🔄 Scorer rechargé avec correction des listes")

# Relancer le test
print(f"\n🎯 TEST RECOMMANDATION CORRIGÉ")
user_ingredients = ["chicken", "onion", "garlic", "salt"]

try:
    final_recs = scorer.recommend(
        recipes_df=processed_with_orig,
        interactions_df=sample_interactions,
        user_ingredients=user_ingredients,
        time_limit=120,
        top_n=5
    )
    
    print(f"\n🏆 🎉 TOP 5 RECOMMANDATIONS FINALES 🎉 🏆")
    
    for i, (_, rec) in enumerate(final_recs.iterrows(), 1):
        jaccard_score = rec['jaccard']
        total_score = rec['score']
        
        print(f"\n🥇 RANG {i}: {rec['name']}")
        print(f"   🎯 Score total: {total_score:.4f}")
        print(f"   🥕 Jaccard: {jaccard_score:.4f}")
        print(f"   ⭐ Rating: {rec.get('mean_rating_norm', 0):.3f}")
        print(f"   🔥 Popularité: {rec.get('popularity', 0):.3f}")
        
        if 'minutes' in rec and pd.notnull(rec['minutes']):
            print(f"   ⏱️ Temps: {rec['minutes']} min")
        
        # Détails des ingrédients
        if jaccard_score > 0 and 'normalized_ingredients' in rec:
            norm_ing = rec['normalized_ingredients']
            if isinstance(norm_ing, list):
                common = set(user_ingredients) & set(norm_ing)
                print(f"   🤝 INGRÉDIENTS COMMUNS: {sorted(common)}")
                print(f"   📝 Recette contient: {norm_ing[:8]}...")
                print(f"   📊 Match: {len(common)}/{len(norm_ing)} ingrédients")
        
        elif 'normalized_ingredients' in rec and isinstance(rec['normalized_ingredients'], list):
            ing = rec['normalized_ingredients'][:5]
            print(f"   📝 Ingrédients: {ing}...")
    
    # 🎊 STATISTIQUES FINALES
    max_jaccard = final_recs['jaccard'].max()
    avg_jaccard = final_recs['jaccard'].mean()
    matches = (final_recs['jaccard'] > 0).sum()
    best_score = final_recs['score'].max()
    
    print(f"\n" + "="*60)
    print(f"🎊 🎊 🎊 RÉSUMÉ FINAL DU SYSTÈME 🎊 🎊 🎊")
    print(f"="*60)
    print(f"✅ Système de recommandation: OPÉRATIONNEL")
    print(f"📊 Recommandations générées: {len(final_recs)}")
    print(f"🥕 Score Jaccard maximum: {max_jaccard:.4f}")
    print(f"📈 Score Jaccard moyen: {avg_jaccard:.4f}")
    print(f"🤝 Recettes avec correspondances: {matches}/5")
    print(f"🏆 Meilleur score global: {best_score:.4f}")
    
    if max_jaccard > 0:
        print(f"🎉 SUCCÈS COMPLET: Correspondances d'ingrédients trouvées!")
    else:
        print(f"⚠️  Système fonctionnel - Améliorer la normalisation des ingrédients")
    
    print(f"\n🚀 TOUTES LES CORRECTIONS ONT ÉTÉ APPLIQUÉES AVEC SUCCÈS!")
    print(f"📝 Le projet MangeTaMain est maintenant entièrement opérationnel.")

except Exception as e:
    print(f"❌ Erreur lors de la recommandation: {e}")
    import traceback
    traceback.print_exc()

🔄 Scorer rechargé avec correction des listes

🎯 TEST RECOMMANDATION CORRIGÉ
⏱️ Filtrage temps: 500 → 436 recettes
🥕 Utilisation de la colonne: normalized_ingredients
📊 Stats calculées pour 500 recettes
🔗 Après fusion: 436 recettes
🏆 Retour de 5 recommandations

🏆 🎉 TOP 5 RECOMMANDATIONS FINALES 🎉 🏆

🥇 RANG 1: chicken souvlaki marinade
   🎯 Score total: 0.5276
   🥕 Jaccard: 0.1000
   ⭐ Rating: 0.925
   🔥 Popularité: 1.000
   ⏱️ Temps: 20 min
   🤝 INGRÉDIENTS COMMUNS: ['salt']
   📝 Recette contient: ['chicken breast', 'minced garlic clove', 'lemon juice', 'salt', 'olive oil', 'dried oregano', 'black pepper']...
   📊 Match: 1/7 ingrédients

🥇 RANG 2: breaded  n baked zucchini chips
   🎯 Score total: 0.5028
   🥕 Jaccard: 0.0909
   ⭐ Rating: 0.881
   🔥 Popularité: 0.966
   ⏱️ Temps: 30 min
   🤝 INGRÉDIENTS COMMUNS: ['garlic']
   📝 Recette contient: ['egg', 'garlic', 'fresh parsley', 'italian breadcrumb', 'black pepper', 'zucchini', 'milk', 'parmesan cheese']...
   📊 Match: 1/8 ingrédients



In [40]:
# Recharger le module reco_score avec les corrections
import sys
import importlib
if 'reco_score' in sys.modules:
    importlib.reload(reco_score)
else:
    import reco_score

# Créer un nouveau scorer avec la version corrigée
scorer = reco_score.RecipScorer()
print("🔄 Module reco_score rechargé avec les corrections")

🔄 Module reco_score rechargé avec les corrections
