In [1]:
import kagglehub
import pandas as pd 
import os
import yaml

with open("config.yaml", 'r') as f:
    config = yaml.safe_load(f)

def fetch_data(dataset_name, version=None)->str:
    if version:
        dataset_name = f"{dataset_name}:{version}"
    return kagglehub.dataset_download(dataset_name)

def load_data(path,files)->pd.DataFrame:
    data_frames = {}
    for file in files : 
        file_path = os.path.join(path, file)
        if os.path.exists(file_path):
            data_frames[file] = pd.read_csv(file_path)
        else:
            raise FileNotFoundError(f"{file} introuvable dans le {path}")
    return data_frames

# Charger les donn√©es au niveau global
dataset_path = fetch_data(config['dataset']['name'])
dfs = load_data(dataset_path, config['dataset']['files'])
recipes_df = dfs.get('RAW_recipes.csv')
interactions_df = dfs.get('RAW_interactions.csv')

if __name__ == "__main__":
    for name, df in dfs.items():
        print(f"Data from {name}:")
        print(df.head())
    print(" Recipes shape:", recipes_df.shape)
    print(" Interactions shape:", interactions_df.shape)

  from .autonotebook import tqdm as notebook_tqdm


Data from RAW_interactions.csv:
   user_id  recipe_id        date  rating  \
0    38094      40893  2003-02-17       4   
1  1293707      40893  2011-12-21       5   
2     8937      44394  2002-12-01       4   
3   126440      85009  2010-02-27       5   
4    57222      85009  2011-10-01       5   

                                              review  
0  Great with a salad. Cooked on top of stove for...  
1  So simple, so delicious! Great for chilly fall...  
2  This worked very well and is EASY.  I used not...  
3  I made the Mexican topping and took it to bunk...  
4  Made the cheddar bacon topping, adding a sprin...  
Data from RAW_recipes.csv:
                                         name      id  minutes  \
0  arriba   baked winter squash mexican style  137739       55   
1            a bit different  breakfast pizza   31490       30   
2                   all in the kitchen  chili  112140      130   
3                          alouette  potatoes   59389       45   
4         

In [3]:
# Rechargement du module pour prendre en compte les modifications
import importlib
import data_prepro
importlib.reload(data_prepro)
from data_prepro import IngredientPreprocessor

# Charger le fichier CSV directement depuis le r√©pertoire courant
preproc = IngredientPreprocessor("ingr_map.csv")
print("IngredientPreprocessor initialis√© avec le fichier CSV")

# V√©rifier que la carte des ingr√©dients est bien charg√©e
print(f"Nombre d'ingr√©dients dans la carte: {len(preproc.raw_to_normalized)}")

# Tester avec un ingr√©dient qui devrait √™tre dans la carte
test_result = preproc.normalize_ingredient("4 extra virgin olive oil")
print(f"Test normalization: 'extra virgin olive oil' -> '{test_result}'")

# Tester avec quelques autres ingr√©dients
test_ingredients = ["large eggs", "all purpose flour", "unsalted butter"]
for ing in test_ingredients:
    result = preproc.normalize_ingredient(ing)
    print(f"'{ing}' -> '{result}'")

INFO:data_prepro:Ingredient map loaded successfully.


IngredientPreprocessor initialis√© avec le fichier CSV
Nombre d'ingr√©dients dans la carte: 11659
Test normalization: 'extra virgin olive oil' -> 'olive oil'
'large eggs' -> 'large eggs'
'all purpose flour' -> 'all purpose flour'
'unsalted butter' -> 'unsalted butter'


In [20]:
#recipes_df.head()
recipes_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 231637 entries, 0 to 231636
Data columns (total 12 columns):
 #   Column          Non-Null Count   Dtype 
---  ------          --------------   ----- 
 0   name            231636 non-null  object
 1   id              231637 non-null  int64 
 2   minutes         231637 non-null  int64 
 3   contributor_id  231637 non-null  int64 
 4   submitted       231637 non-null  object
 5   tags            231637 non-null  object
 6   nutrition       231637 non-null  object
 7   n_steps         231637 non-null  int64 
 8   steps           231637 non-null  object
 9   description     226658 non-null  object
 10  ingredients     231637 non-null  object
 11  n_ingredients   231637 non-null  int64 
dtypes: int64(5), object(7)
memory usage: 21.2+ MB


In [21]:
#steps_list=list(recipes_df['steps'])
#steps_list[0][0]
# steps va etre convertit en objet list 
recipes_df['description'][0]
recipes_df['nutrition'][0]
#recipes_df['ingredients'][0]
#recipes_df['tags'][0]
recipes_df['ingredients'][0][0]

'['

In [22]:
interactions_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1132367 entries, 0 to 1132366
Data columns (total 5 columns):
 #   Column     Non-Null Count    Dtype 
---  ------     --------------    ----- 
 0   user_id    1132367 non-null  int64 
 1   recipe_id  1132367 non-null  int64 
 2   date       1132367 non-null  object
 3   rating     1132367 non-null  int64 
 4   review     1132198 non-null  object
dtypes: int64(3), object(2)
memory usage: 43.2+ MB


In [46]:
interactions_df.head()

Unnamed: 0,user_id,recipe_id,date,rating,review
0,38094,40893,2003-02-17,4,Great with a salad. Cooked on top of stove for...
1,1293707,40893,2011-12-21,5,"So simple, so delicious! Great for chilly fall..."
2,8937,44394,2002-12-01,4,This worked very well and is EASY. I used not...
3,126440,85009,2010-02-27,5,I made the Mexican topping and took it to bunk...
4,57222,85009,2011-10-01,5,"Made the cheddar bacon topping, adding a sprin..."


In [23]:
correlation = recipes_df[['minutes', 'n_ingredients']].corr()
print(f"Corr√©lation temps/ingr√©dients: {correlation.iloc[0,1]:.3f}")

Corr√©lation temps/ingr√©dients: -0.001


In [None]:
#recipes_df['steps_count'] = recipes_df['steps'].apply(lambda x: len(x.split('||')) if pd.notnull(x) else 0)
recipes_df['steps'][1]

"['preheat oven to 425 degrees f', 'press dough into the bottom and sides of a 12 inch pizza pan', 'bake for 5 minutes until set but not browned', 'cut sausage into small pieces', 'whisk eggs and milk in a bowl until frothy', 'spoon sausage over baked crust and sprinkle with cheese', 'pour egg mixture slowly over sausage and cheese', 's& p to taste', 'bake 15-20 minutes or until eggs are set and crust is brown']"

In [10]:
recipes_df.shape

(231637, 12)

In [None]:
#print(fastest[['name', 'minutes', 'n_ingredients']])
#recipes_df[recipes_df['minutes'] == 0].count()
# imputing missing values de minutes en utilisant la variable nombre d'ingr√©dients 
recipes_df[recipes_df['minutes'] == 0].head()

Unnamed: 0,name,id,minutes,contributor_id,submitted,tags,nutrition,n_steps,steps,description,ingredients,n_ingredients
5,apple a day milk shake,5289,0,1533,1999-12-06,"['15-minutes-or-less', 'time-to-make', 'course...","[160.2, 10.0, 55.0, 3.0, 9.0, 20.0, 7.0]",4,"['combine ingredients in blender', 'cover and ...",,"['milk', 'vanilla ice cream', 'frozen apple ju...",4
2451,acorn magic delights,1712,0,1534,1999-10-01,"['15-minutes-or-less', 'time-to-make', 'course...","[148.4, 15.0, 28.0, 2.0, 3.0, 21.0, 4.0]",13,"['melt the butter or margarine over low heat',...",,"['butter', 'brown sugar', 'pecans', 'all-purpo...",7
3079,albanian byrek,4880,0,1534,1999-11-24,"['15-minutes-or-less', 'time-to-make', 'course...","[354.4, 42.0, 25.0, 59.0, 37.0, 37.0, 2.0]",14,"['prepare the dough with flour , 1 and a half ...","the directions to this are vague, but maybe yo...","['flour', 'water', 'oil', 'vinegar', 'salt', '...",9
3193,alfredo sauce with pasta,3258,0,1534,1999-10-10,"['15-minutes-or-less', 'time-to-make', 'course...","[1902.9, 287.0, 5.0, 140.0, 104.0, 583.0, 3.0]",8,['cook noodles or fettuccine according to pack...,,"['butter', 'heavy cream', 'parmesan cheese', '...",6
3259,alice s doughnuts,2284,0,1752,1999-10-18,"['15-minutes-or-less', 'time-to-make', 'course...","[107.3, 6.0, 20.0, 3.0, 3.0, 10.0, 4.0]",17,"['in a large bowl , beat the eggs until foamy'...",,"['eggs', 'sugar', 'milk', 'shortening', 'vanil...",9


In [None]:
fastest = recipes_df.nsmallest(10, 'minutes')


In [25]:
# cat√©gories des ingr√©dients
categories = {
        'proteins': ['chicken', 'beef', 'pork', 'fish', 'salmon', 'tuna', 'shrimp', 
                     'turkey', 'lamb', 'egg', 'tofu', 'tempeh'],
        'dairy': ['milk', 'cheese', 'butter', 'cream', 'yogurt', 'sour cream'],
        'vegetables': ['tomato', 'onion', 'garlic', 'carrot', 'potato', 'broccoli',
                       'spinach', 'pepper', 'mushroom', 'lettuce', 'cucumber'],
        'fruits': ['apple', 'banana', 'orange', 'lemon', 'strawberry', 'blueberry'],
        'grains': ['flour', 'rice', 'pasta', 'bread', 'oat', 'quinoa', 'wheat'],
        'spices': ['salt', 'pepper', 'cumin', 'paprika', 'cinnamon', 'basil', 
                   'oregano', 'thyme', 'rosemary'],
        'oils': ['olive oil', 'vegetable oil', 'coconut oil', 'butter'],
        'sweeteners': ['sugar', 'honey', 'maple syrup', 'brown sugar']
    }


In [30]:
subset = recipes_df.head(500).copy()
subset["clean_ingredients"] = subset["ingredients"].apply(preproc.parse_and_clean)
subset[["ingredients", "clean_ingredients"]].head()

Unnamed: 0,ingredients,clean_ingredients
0,"['winter squash', 'mexican seasoning', 'mixed ...","[olive oil, butter, honey, winter squash, salt..."
1,"['prepared pizza crust', 'sausage patty', 'egg...","[sausage, egg, milk, pizza crust, salt and pep..."
2,"['ground beef', 'yellow onions', 'diced tomato...","[yellow onion, ground cumin, lettuce, ground b..."
3,"['spreadable cheese with garlic and herbs', 'n...","[olive oil, yellow bell pepper, red bell peppe..."
4,"['tomato juice', 'apple cider vinegar', 'sugar...","[pepper, clove oil, cinnamon oil, sugar, salt,..."


In [31]:
subset["ingredient_categories"] = subset["clean_ingredients"].apply(preproc.categorize)
subset["ingredient_categories"].head()

0    {'oils': ['olive oil', 'butter'], 'sweeteners'...
1    {'other': ['sausage', 'pizza crust'], 'protein...
2    {'vegetables': ['yellow onion', 'lettuce', 'ro...
3    {'oils': ['olive oil'], 'spices': ['yellow bel...
4    {'spices': ['pepper', 'cinnamon oil', 'salt'],...
Name: ingredient_categories, dtype: object

In [36]:
tests = ["extra virgin olive oil", "large eggs", "fresh basil leaves", "granulated sugar", "unsalted butter"]
for t in tests:
    print(t, "->", preproc.normalize_ingredient(t))

extra virgin olive oil -> olive oil
large eggs -> large eggs
fresh basil leaves -> fresh basil leaf
granulated sugar -> granulated sugar
unsalted butter -> unsalted butter


In [37]:
from collections import Counter
all_clean = [ing for lst in subset["clean_ingredients"] for ing in lst]
Counter(all_clean).most_common(15)

[('salt', 178),
 ('egg', 125),
 ('butter', 111),
 ('onion', 109),
 ('sugar', 80),
 ('water', 76),
 ('milk', 74),
 ('flmy', 69),
 ('pepper', 60),
 ('garlic clove', 57),
 ('olive oil', 54),
 ('brown sugar', 46),
 ('vanilla', 42),
 ('baking powder', 39),
 ('baking soda', 37)]

In [38]:
subset["clean_ingredients"].apply(len).describe()

count    500.000000
mean       9.092000
std        4.042303
min        2.000000
25%        6.000000
50%        9.000000
75%       11.250000
max       23.000000
Name: clean_ingredients, dtype: float64

In [42]:
raw_ingredients = ["2 Cups All-Purpose Flour", "Fresh Garlic", "Olive Oil", "Ground black pepper", "Diced Tomatoes"]
cleaned = [preproc.normalize_ingredient(x) for x in raw_ingredients]
cleaned

['cups all-purpose flour', 'garlic', 'olive oil', 'black pepper', 'tomatoes']

In [43]:
tests = ["extra virgin olive oil", "large eggs", "fresh basil leaves", "granulated sugar", "unsalted butter"]
for t in tests:
    print(t, "->", preproc.normalize_ingredient(t))

extra virgin olive oil -> extra virgin olive oil
large eggs -> large eggs
fresh basil leaves -> basil leaves
granulated sugar -> granulated sugar
unsalted butter -> unsalted butter


Fin de la premi√®re √©tape de pre-traitement. Nous avons donc r√©ussi √† convertir le fichier pkl en csv et l'exploiter pour normaliser nos donn√©es textuelles.

On va donc appliquer la fonction normalisation sur la variable ingredients de notre dataset qui est dans recipes_df. 

In [75]:
# Appliquer la normalisation sur toute la colonne ingredients
# Ajouter directement une nouvelle colonne au DataFrame existant
print("Normalisation en cours...")

# Appliquer la fonction parse_and_clean sur chaque recette
recipes_df["normalized_ingredients"] = recipes_df["ingredients"].apply(preproc.parse_and_clean)

print("‚úÖ Normalisation termin√©e!")
print(f"Forme du dataset: {recipes_df.shape}")


Normalisation en cours...
‚úÖ Normalisation termin√©e!
Forme du dataset: (231637, 13)
‚úÖ Normalisation termin√©e!
Forme du dataset: (231637, 13)


In [76]:
# Comparer quelques exemples avant/apr√®s
print("\nüìã Exemples avant/apr√®s normalisation:")
for i in range(3):
    print(f"\n--- Recette {i+1} ---")
    print(f"Avant: {recipes_df.iloc[i]['ingredients']}")
    print(f"Apr√®s: {recipes_df.iloc[i]['normalized_ingredients']}")
    
recipes_df[["name", "ingredients", "normalized_ingredients"]].head()


üìã Exemples avant/apr√®s normalisation:

--- Recette 1 ---
Avant: ['winter squash', 'mexican seasoning', 'mixed spice', 'honey', 'butter', 'olive oil', 'salt']
Apr√®s: ['olive oil', 'butter', 'honey', 'winter squash', 'salt', 'mixed spice', 'mexican seasoning']

--- Recette 2 ---
Avant: ['prepared pizza crust', 'sausage patty', 'eggs', 'milk', 'salt and pepper', 'cheese']
Apr√®s: ['eggs', 'milk', 'salt and pepper', 'cheese', 'sausage patty', 'prepared pizza crust']

--- Recette 3 ---
Avant: ['ground beef', 'yellow onions', 'diced tomatoes', 'tomato paste', 'tomato soup', 'rotel tomatoes', 'kidney beans', 'water', 'chili powder', 'ground cumin', 'salt', 'lettuce', 'cheddar cheese']
Apr√®s: ['lettuce', 'yellow onions', 'chili powder', 'tomatoes', 'cumin', 'rotel tomatoes', 'water', 'tomato paste', 'cheddar cheese', 'salt', 'kidney beans', 'tomato soup', 'beef']


Unnamed: 0,name,ingredients,normalized_ingredients
0,arriba baked winter squash mexican style,"['winter squash', 'mexican seasoning', 'mixed ...","[olive oil, butter, honey, winter squash, salt..."
1,a bit different breakfast pizza,"['prepared pizza crust', 'sausage patty', 'egg...","[eggs, milk, salt and pepper, cheese, sausage ..."
2,all in the kitchen chili,"['ground beef', 'yellow onions', 'diced tomato...","[lettuce, yellow onions, chili powder, tomatoe..."
3,alouette potatoes,"['spreadable cheese with garlic and herbs', 'n...","[olive oil, yellow bell pepper, new potatoes, ..."
4,amish tomato ketchup for canning,"['tomato juice', 'apple cider vinegar', 'sugar...","[pepper, clove oil, cinnamon oil, sugar, salt,..."


In [84]:
# Analyser les r√©sultats de la normalisation
print("üìä Analyse des ingr√©dients normalis√©s:")
print(f"Nombre total de recettes: {len(recipes_df)}")

# Calculer le nombre moyen d'ingr√©dients par recette
avg_ingredients = recipes_df["normalized_ingredients"].apply(len).mean()
print(f"Nombre moyen d'ingr√©dients par recette: {avg_ingredients:.1f}")

# Top 20 des ingr√©dients les plus fr√©quents apr√®s normalisation
from collections import Counter
all_normalized_ingredients = [ing for ingredients_list in recipes_df["normalized_ingredients"] for ing in ingredients_list]
most_common = Counter(all_normalized_ingredients).most_common(20)

print("\nü•á Top 20 des ingr√©dients les plus fr√©quents:")
for i, (ingredient, count) in enumerate(most_common, 1):
    print(f"{i:2d}. {ingredient:<20} : {count:>6,} fois")


# Afficher les colonnes du DataFrame
print(f"\nüìã Colonnes du DataFrame: {list(recipes_df.columns)}")

üìä Analyse des ingr√©dients normalis√©s:
Nombre total de recettes: 231637
Nombre moyen d'ingr√©dients par recette: 9.0

ü•á Top 20 des ingr√©dients les plus fr√©quents:
 1. salt                 : 85,746 fois
 2. butter               : 54,975 fois
 3. sugar                : 44,535 fois
 4. onion                : 39,786 fois
 5. water                : 34,926 fois
 6. eggs                 : 33,761 fois
 7. olive oil            : 32,822 fois
 8. garlic cloves        : 26,723 fois
 9. pepper               : 26,633 fois
10. flour                : 26,266 fois
11. milk                 : 25,799 fois
12. black pepper         : 24,271 fois
13. lemon juice          : 19,506 fois
14. cinnamon             : 19,316 fois
15. garlic               : 19,072 fois
16. brown sugar          : 18,655 fois
17. all-purpose flour    : 17,659 fois
18. baking powder        : 17,504 fois
19. egg                  : 17,304 fois
20. tomatoes             : 16,602 fois

üìã Colonnes du DataFrame: ['name', 'id', 'min

In [4]:
from data_prepro import NutritionPreprocessor
# Nouvelle cellule - Test de NutritionPreprocessor
print("üß™ TEST DE LA CLASSE NutritionPreprocessor")
print("=" * 50)

# Recharger le module pour prendre en compte les modifications
import importlib
importlib.reload(data_prepro)
from data_prepro import NutritionPreprocessor

# Cr√©er une instance du preprocessor
nutrition_processor = NutritionPreprocessor()

# Test 1: Parsing d'une cha√Æne nutrition normale
print("\nüìä Test 1: Parsing nutrition normale")
test_nutrition_str = "[200.5, 10.2, 15.8, 25.0, 5.5, 12.3, 800.0]"
print(f"Input: {test_nutrition_str}")
parsed_nutrition = nutrition_processor.parse_nutrition(test_nutrition_str)
print(f"Output: {parsed_nutrition}")

# Test 2: Calcul du health score
print(f"\nüè• Test 2: Calcul du health score")
health_score = nutrition_processor.compute_health_score(parsed_nutrition)
print(f"Health Score: {health_score}")

# Test 3: Exemple avec des donn√©es r√©elles du dataset
print(f"\nü•ó Test 3: Donn√©es r√©elles du dataset")
if 'nutrition' in recipes_df.columns:
    real_nutrition_str = recipes_df['nutrition'].iloc[0]
    print(f"Nutrition originale: {real_nutrition_str}")
    
    real_parsed = nutrition_processor.parse_nutrition(real_nutrition_str)
    print(f"Parsed: {real_parsed}")
    
    real_health_score = nutrition_processor.compute_health_score(real_parsed)
    print(f"Health Score: {real_health_score}")

# Test 4: Cas d'erreur - cha√Æne malform√©e
print(f"\n‚ùå Test 4: Gestion d'erreurs")
malformed_str = "[200.5, 10.2, invalid, 25.0]"
print(f"Input malform√©: {malformed_str}")
error_result = nutrition_processor.parse_nutrition(malformed_str)
print(f"R√©sultat: {error_result}")

# Test 5: Test avec plusieurs exemples du dataset
print(f"\nüìà Test 5: Analyse de plusieurs recettes")
sample_size = 10
nutrition_results = []

for i in range(min(sample_size, len(recipes_df))):
    nutrition_str = recipes_df['nutrition'].iloc[i]
    parsed = nutrition_processor.parse_nutrition(nutrition_str)
    if parsed:  # Si le parsing a r√©ussi
        health_score = nutrition_processor.compute_health_score(parsed)
        nutrition_results.append({
            'recipe_id': i,
            'calories': parsed.get('calories', 0),
            'protein': parsed.get('protein', 0),
            'sugar': parsed.get('sugar', 0),
            'health_score': health_score
        })

# Afficher les r√©sultats
print(f"\nüìã R√©sultats pour {len(nutrition_results)} recettes:")
print(f"{'ID':<3} {'Calories':<8} {'Protein':<7} {'Sugar':<6} {'Health Score':<12}")
print("-" * 40)
for result in nutrition_results:
    print(f"{result['recipe_id']:<3} {result['calories']:<8.1f} {result['protein']:<7.1f} "
          f"{result['sugar']:<6.1f} {result['health_score']:<12.2f}")

# Test 6: Comparaison de diff√©rents profils nutritionnels
print(f"\nüî¨ Test 6: Comparaison de profils nutritionnels")

test_profiles = [
    {
        'name': 'Recette saine',
        'nutrition': [300, 8, 12, 40, 5, 20, 500]  # Faible en calories, bon en prot√©ines
    },
    {
        'name': 'Recette riche',
        'nutrition': [800, 35, 45, 60, 15, 15, 1500]  # Riche en calories et sodium
    },
    {
        'name': 'Dessert sucr√©',
        'nutrition': [450, 20, 25, 55, 35, 8, 200]  # Riche en sucre
    }
]

for profile in test_profiles:
    # Simuler une cha√Æne nutrition
    nutrition_str = str(profile['nutrition'])
    parsed = nutrition_processor.parse_nutrition(nutrition_str)
    health_score = NutritionPreprocessor.compute_health_score(parsed)
    
    print(f"\n{profile['name']}:")
    print(f"  Calories: {parsed['calories']}")
    print(f"  Prot√©ines: {parsed['protein']}g")
    print(f"  Sucre: {parsed['sugar']}g")
    print(f"  Sodium: {parsed['sodium']}mg")
    print(f"  üè• Health Score: {health_score}")

print(f"\n‚úÖ Tests termin√©s!")



ERROR:data_prepro:Erreur parsing nutrition: malformed node or string on line 1: <ast.Name object at 0x7f10fee61b10>


üß™ TEST DE LA CLASSE NutritionPreprocessor

üìä Test 1: Parsing nutrition normale
Input: [200.5, 10.2, 15.8, 25.0, 5.5, 12.3, 800.0]
Output: {'calories': 200.5, 'fat': 10.2, 'total_fat': 15.8, 'carbohydrates': 25.0, 'sugar': 5.5, 'protein': 12.3, 'sodium': 800.0}

üè• Test 2: Calcul du health score
Health Score: 1

ü•ó Test 3: Donn√©es r√©elles du dataset
Nutrition originale: [51.5, 0.0, 13.0, 0.0, 2.0, 0.0, 4.0]
Parsed: {'calories': 51.5, 'fat': 0.0, 'total_fat': 13.0, 'carbohydrates': 0.0, 'sugar': 2.0, 'protein': 0.0, 'sodium': 4.0}
Health Score: 1

‚ùå Test 4: Gestion d'erreurs
Input malform√©: [200.5, 10.2, invalid, 25.0]
R√©sultat: {}

üìà Test 5: Analyse de plusieurs recettes

üìã R√©sultats pour 10 recettes:
ID  Calories Protein Sugar  Health Score
----------------------------------------
0   51.5     0.0     2.0    1.00        
1   173.4    35.0    22.0   1.00        
2   269.8    27.0    39.0   1.00        
3   368.1    8.0     14.0   1.00        
4   352.9    0.0     

In [83]:
recipes_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 231637 entries, 0 to 231636
Data columns (total 13 columns):
 #   Column                  Non-Null Count   Dtype 
---  ------                  --------------   ----- 
 0   name                    231636 non-null  object
 1   id                      231637 non-null  int64 
 2   minutes                 231637 non-null  int64 
 3   contributor_id          231637 non-null  int64 
 4   submitted               231637 non-null  object
 5   tags                    231637 non-null  object
 6   nutrition               231637 non-null  object
 7   n_steps                 231637 non-null  int64 
 8   steps                   231637 non-null  object
 9   description             226658 non-null  object
 10  ingredients             231637 non-null  object
 11  n_ingredients           231637 non-null  int64 
 12  normalized_ingredients  231637 non-null  object
dtypes: int64(5), object(8)
memory usage: 23.0+ MB


In [None]:
categorized_test = preproc.categorize(["chicken", "olive oil", "salt", "tomato", "basil", "sugar", "flour"])
print(categorized_test)

{'proteins': ['chicken'], 'oils': ['olive oil'], 'spices': ['salt', 'basil'], 'vegetables': ['tomato'], 'sweeteners': ['sugar'], 'grains': ['flour']}


In [None]:
categorized = recipes_df["normalized_ingredients"].apply(preproc.categorize)

In [90]:
recipes_df["normalized_ingredients"][0]

['olive oil',
 'butter',
 'honey',
 'winter squash',
 'salt',
 'mixed spice',
 'mexican seasoning']

In [89]:
categorized[0]

{'oils': ['olive oil', 'butter'],
 'sweeteners': ['honey'],
 'other': ['winter squash', 'mixed spice', 'mexican seasoning'],
 'spices': ['salt']}

In [5]:
# Nouvelle cellule - Test du RecipePreprocessor complet
print("üß™ TEST DU RECIPEPREPROCESSOR COMPLET")
print("=" * 50)

# Recharger le module
import importlib
import data_prepro
importlib.reload(data_prepro)
from data_prepro import RecipePreprocessor

# Initialiser le preprocessor
preprocessor = RecipePreprocessor()
print("‚úÖ RecipePreprocessor initialis√© avec le fichier CSV")

# Test sur UN √©chantillon de recette d'abord
print("\nüìã Test sur une recette individuelle:")
sample_row = recipes_df.iloc[0]  # Premi√®re recette
print(f"Recette ID: {sample_row['id']}")
print(f"Nom: {sample_row['name']}")

# Pr√©traiter cette recette
try:
    recipe_features = preprocessor.preprocess_recipe(sample_row)
    print(f"‚úÖ Pr√©traitement r√©ussi!")
    print(f"Ingr√©dients extraits: {len(recipe_features.ingredients)}")
    print(f"Cat√©gories: {list(recipe_features.ingredient_categories.keys())}")
    print(f"Type de repas: {recipe_features.meal_type}")
    print(f"Cuisine: {recipe_features.cuisine_type}")
    print(f"Score d'effort: {recipe_features.effort_score}")
except Exception as e:
    print(f"‚ùå Erreur: {e}")

üß™ TEST DU RECIPEPREPROCESSOR COMPLET


INFO:data_prepro:Ingredient map loaded successfully.
INFO:data_prepro:RecipePreprocessor initialis√© avec succ√®s
INFO:data_prepro:RecipePreprocessor initialis√© avec succ√®s


‚úÖ RecipePreprocessor initialis√© avec le fichier CSV

üìã Test sur une recette individuelle:
Recette ID: 137739
Nom: arriba   baked winter squash mexican style
‚úÖ Pr√©traitement r√©ussi!
Ingr√©dients extraits: 7
Cat√©gories: ['other', 'oils', 'spices', 'sweeteners']
Type de repas: None
Cuisine: mexican
Score d'effort: 0.45272727272727276


In [6]:
# Test apr√®s correction
print("üß™ TEST APR√àS CORRECTION")
print("=" * 30)

# Recharger le module modifi√©
import importlib
import data_prepro
importlib.reload(data_prepro)
from data_prepro import RecipePreprocessor

# R√©essayer
preprocessor = RecipePreprocessor()
sample_row = recipes_df.iloc[100]

try:
    recipe_features = preprocessor.preprocess_recipe(sample_row)
    print(f"‚úÖ Pr√©traitement r√©ussi!")
    print(f"Ingr√©dients extraits: {len(recipe_features.ingredients)}")
    print(f"Cat√©gories: {list(recipe_features.ingredient_categories.keys())}")
    print(f"Type de repas: {recipe_features.meal_type}")
    print(f"Cuisine: {recipe_features.cuisine_type}")
    print(f"Score d'effort: {recipe_features.effort_score}")
except Exception as e:
    print(f"‚ùå Erreur: {e}")
    import traceback
    traceback.print_exc()

üß™ TEST APR√àS CORRECTION


INFO:data_prepro:Ingredient map loaded successfully.
INFO:data_prepro:RecipePreprocessor initialis√© avec succ√®s
INFO:data_prepro:RecipePreprocessor initialis√© avec succ√®s


‚úÖ Pr√©traitement r√©ussi!
Ingr√©dients extraits: 7
Cat√©gories: ['vegetables', 'other', 'dairy', 'spices']
Type de repas: lunch
Cuisine: indian
Score d'effort: 0.26249999999999996


In [None]:
recipes_df.head()

In [12]:
interactions_df[interactions_df['recipe_id']==40893]

Unnamed: 0,user_id,recipe_id,date,rating,review
0,38094,40893,2003-02-17,4,Great with a salad. Cooked on top of stove for...
1,1293707,40893,2011-12-21,5,"So simple, so delicious! Great for chilly fall..."


In [13]:
interactions_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1132367 entries, 0 to 1132366
Data columns (total 5 columns):
 #   Column     Non-Null Count    Dtype 
---  ------     --------------    ----- 
 0   user_id    1132367 non-null  int64 
 1   recipe_id  1132367 non-null  int64 
 2   date       1132367 non-null  object
 3   rating     1132367 non-null  int64 
 4   review     1132198 non-null  object
dtypes: int64(3), object(2)
memory usage: 43.2+ MB


In [9]:
# Test complet du RecipeScorer
print("üß™ TEST DU SYST√àME DE SCORING")
print("=" * 50)

# Importer la classe corrig√©e
import preprocessing.score as score
importlib.reload(score)
from preprocessing.score import RecipScorer

# Initialiser le scorer
scorer = RecipScorer(alpha=0.5, beta=0.3, gamma=0.2)
print("‚úÖ RecipeScorer initialis√©")

# Test 1: Similarit√© Jaccard
print("\nüìä Test 1: Similarit√© Jaccard")
user_ingredients = ["chicken", "onion", "garlic", "salt", "pepper"]
recipe_ingredients_1 = ["chicken", "onion", "tomato", "salt"]
recipe_ingredients_2 = ["beef", "carrot", "potato"]

jaccard_1 = RecipScorer.jaccard_similarity(user_ingredients, recipe_ingredients_1)
jaccard_2 = RecipScorer.jaccard_similarity(user_ingredients, recipe_ingredients_2)

print(f"Ingr√©dients utilisateur: {user_ingredients}")
print(f"Recette 1: {recipe_ingredients_1} ‚Üí Jaccard: {jaccard_1:.3f}")
print(f"Recette 2: {recipe_ingredients_2} ‚Üí Jaccard: {jaccard_2:.3f}")

üß™ TEST DU SYST√àME DE SCORING
‚úÖ RecipeScorer initialis√©

üìä Test 1: Similarit√© Jaccard
Ingr√©dients utilisateur: ['chicken', 'onion', 'garlic', 'salt', 'pepper']
Recette 1: ['chicken', 'onion', 'tomato', 'salt'] ‚Üí Jaccard: 0.500
Recette 2: ['beef', 'carrot', 'potato'] ‚Üí Jaccard: 0.000


In [10]:
# Test 2: Scores de base avec un √©chantillon
print("\nüìà Test 2: Calcul des scores de base")
# Prendre un √©chantillon pour test
sample_recipes = recipes_df.head(100).copy()
sample_interactions = interactions_df[interactions_df['recipe_id'].isin(sample_recipes['id'])].copy()

# Renommer la colonne pour correspondre au code
sample_interactions = sample_interactions.rename(columns={'id': 'recipe_id'})

base_scores = scorer.compute_base_score(sample_recipes, sample_interactions)
print(f"Scores calcul√©s pour {len(base_scores)} recettes")
print("Aper√ßu des scores:")
print(base_scores.head())


üìà Test 2: Calcul des scores de base
Scores calcul√©s pour 100 recettes
Aper√ßu des scores:
   recipe_id  mean_rating  n_reviews  mean_rating_norm  popularity
0       5060          0.0          1               0.0    0.000000
1       5289          5.0          2               1.0    0.008929
2       8559          2.0          4               0.4    0.026786
3      19208          5.0          1               1.0    0.000000
4      22123          4.5          6               0.9    0.044643


In [11]:
# Test 3: Recommandations compl√®tes
print("\nüçΩÔ∏è Test 3: Recommandations compl√®tes")

# Utiliser les ingr√©dients normalis√©s si disponibles
if 'normalized_ingredients' in recipes_df.columns:
    user_ingredients_test = ["chicken", "onion", "garlic", "salt"]
    print(f"Ingr√©dients utilisateur: {user_ingredients_test}")
    
    # Obtenir des recommandations
    recommendations = scorer.recommend(
        recipes_df=recipes_df.head(1000),  # √âchantillon pour test rapide
        interactions_df=interactions_df,
        user_ingredients=user_ingredients_test,
        time_limit=60,  # Recettes de moins de 60 minutes
        top_n=10
    )
    
    print(f"\nüèÜ Top 10 des recommandations:")
    print("=" * 80)
    for i, row in recommendations.iterrows():
        print(f"{len(recommendations) - list(recommendations.index).index(i):2d}. {row['name'][:50]:<50}")
        print(f"    Score: {row['score']:.3f} | Jaccard: {row['jaccard']:.3f} | Rating: {row.get('mean_rating_norm', 0):.3f}")
        print(f"    Ingr√©dients: {row['normalized_ingredients'][:5]}...")  # Premiers 5 ingr√©dients
        print()



üçΩÔ∏è Test 3: Recommandations compl√®tes


In [21]:
# Test 4: Comparaison avec diff√©rents profils d'ingr√©dients
print("\nüî¨ Test 4: Comparaison de profils d'ingr√©dients")

test_profiles = [
    {
        'name': 'Cuisine italienne',
        'ingredients': ['tomato', 'basil', 'mozzarella', 'pasta', 'olive oil']
    },
    {
        'name': 'Cuisine asiatique', 
        'ingredients': ['soy sauce', 'ginger', 'garlic', 'rice', 'sesame oil']
    },
    {
        'name': 'P√¢tisserie',
        'ingredients': ['flour', 'sugar', 'butter', 'egg', 'vanilla']
    }
]

for profile in test_profiles:
    print(f"\n--- {profile['name']} ---")
    print(f"Ingr√©dients: {profile['ingredients']}")
    
    if 'normalized_ingredients' in recipes_df.columns:
        recs = scorer.recommend(
            recipes_df=recipes_df.head(500),
            interactions_df=interactions_df,
            user_ingredients=profile['ingredients'],
            top_n=3
        )
        
        print("Top 3 recommandations:")
        for idx, (_, row) in enumerate(recs.iterrows(), 1):
            print(f"  {idx}. {row['name'][:40]} (Score: {row['score']:.3f})")

print("\n‚úÖ Tests termin√©s!")


üî¨ Test 4: Comparaison de profils d'ingr√©dients

--- Cuisine italienne ---
Ingr√©dients: ['tomato', 'basil', 'mozzarella', 'pasta', 'olive oil']

--- Cuisine asiatique ---
Ingr√©dients: ['soy sauce', 'ginger', 'garlic', 'rice', 'sesame oil']

--- P√¢tisserie ---
Ingr√©dients: ['flour', 'sugar', 'butter', 'egg', 'vanilla']

‚úÖ Tests termin√©s!


In [22]:
print("Colonnes dans interactions_df:")
print(interactions_df.columns.tolist())

Colonnes dans interactions_df:
['user_id', 'recipe_id', 'date', 'rating', 'review']


In [35]:
# -*- coding: utf-8 -*-
"""
üçΩÔ∏è SYST√àME DE RECOMMANDATION DE RECETTES - VERSION OPTIMIS√âE
Auteur : Mohamed Kabbaj
Description :
Ce module propose une interface de test pour un syst√®me de recommandation de recettes
bas√© sur les ingr√©dients disponibles dans le frigo de l'utilisateur.
"""
import pandas as pd
import importlib
import logging
from datetime import datetime

# --- Configuration du logging ---
logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s | %(levelname)s | %(message)s",
    datefmt="%H:%M:%S"
)

# --- Imports dynamiques pour recharger les modules ---
import data_prepro
import preprocessing.score as score
importlib.reload(data_prepro)
importlib.reload(score)

from data_prepro import IngredientPreprocessor
from preprocessing.score import RecipScorer


class RecipeRecommender:
    """Interface principale pour recommander des recettes √† partir des ingr√©dients utilisateur."""

    def __init__(self, recipes_df, interactions_df):
        self.recipes_df = recipes_df
        self.interactions_df = interactions_df

        logging.info("Chargement du pr√©processeur et du moteur de score...")
        self.ingredient_preprocessor = IngredientPreprocessor("ingr_map.csv")
        self.scorer = RecipScorer(alpha=0.5, beta=0.3, gamma=0.2)
        logging.info("‚úÖ Syst√®me de recommandation initialis√© avec succ√®s")

    # -------------------------------------------------------
    # üîß Normalisation des ingr√©dients utilisateur
    # -------------------------------------------------------
    def normalize_user_ingredients(self, raw_ingredients):
        normalized = []
        print("\nüîÑ Normalisation des ingr√©dients :")
        for ingredient in raw_ingredients:
            cleaned = self.ingredient_preprocessor.normalize_ingredient(ingredient)
            if cleaned:
                normalized.append(cleaned)
                print(f"  ‚úì '{ingredient}' ‚Üí '{cleaned}'")
            else:
                print(f"  ‚ö†Ô∏è '{ingredient}' non reconnu")
        return normalized

    # -------------------------------------------------------
    # üß† Recommandation
    # -------------------------------------------------------
    def recommend(self, ingredients, time_limit=None, n_recommendations=5):
        logging.info(f"Recommandation bas√©e sur {len(ingredients)} ingr√©dients...")
        return self.scorer.recommend(
            recipes_df=self.recipes_df,
            interactions_df=self.interactions_df,
            user_ingredients=ingredients,
            time_limit=time_limit,
            top_n=n_recommendations
        )

    # -------------------------------------------------------
    # üéØ Affichage format√©
    # -------------------------------------------------------
    def display_recommendations(self, recommendations, user_ingredients, n=3):
    
        if recommendations is None or len(recommendations) == 0:
            print("‚ùå Aucune recette trouv√©e avec ces crit√®res")
            return

        # Prendre les n premi√®res lignes
        top_recipes = recommendations.head(n)
        print(f"\nüèÜ TOP {n} RECOMMANDATIONS ({len(top_recipes)} recettes affich√©es)")
        print("=" * 90)

        for i, (_, recipe) in enumerate(top_recipes.iterrows(), 1):
            print(f"\n{i}. üç¥ {recipe['name'].capitalize()}")
            print(f"   üìä Score global: {recipe['score']:.3f} | üîó Jaccard: {recipe['jaccard']:.3f}")

            if isinstance(recipe.get("normalized_ingredients"), list):
                ing_list = recipe["normalized_ingredients"]
                matched = set(user_ingredients) & set(ing_list)
                missing = [ing for ing in ing_list if ing not in user_ingredients]

                if matched:
                    print(f"   ‚úÖ En commun ({len(matched)}): {', '.join(list(matched)[:5])}")
                if missing:
                    print(f"   ‚ûï √Ä ajouter ({len(missing)}): {', '.join(missing[:5])}"
                        + (" ..." if len(missing) > 5 else ""))

            if "minutes" in recipe:
                print(f"   ‚è±Ô∏è Temps: {recipe['minutes']} min")
            print("   " + "-" * 80)


# -------------------------------------------------------
# üöÄ Fonction principale de test (sans input interactif)
# -------------------------------------------------------
def test_recipe_recommender(recipes_df, interactions_df):
    print("\nüçΩÔ∏è SYST√àME DE RECOMMANDATION DE RECETTES")
    print("=" * 50)
    start_time = datetime.now()

    recommender = RecipeRecommender(recipes_df, interactions_df)

    test_cases = [
        {
            "name": "Cuisine italienne",
            "ingredients": ["tomato", "garlic", "basil", "pasta", "olive oil"],
            "time_limit": 45
        },
        {
            "name": "Petit-d√©jeuner rapide",
            "ingredients": ["eggs", "butter", "milk", "flour"],
            "time_limit": 20
        }
    ]

    for case in test_cases:
        print(f"\n{'=' * 90}")
        print(f"üß™ TEST: {case['name']}")
        print(f"{'=' * 90}")

        normalized = recommender.normalize_user_ingredients(case["ingredients"])

        if not normalized:
            print(" Aucun ingr√©dient valide trouv√©")
            continue

        try:
            recommendations = recommender.recommend(
                ingredients=normalized,
                time_limit=case.get("time_limit"),
                n_recommendations=3
            )
            recommender.display_recommendations(recommendations, normalized)
        except Exception as e:
            logging.error(f"Erreur lors du test '{case['name']}': {e}", exc_info=True)

    print(f"\n‚úÖ Tests termin√©s en {(datetime.now() - start_time).seconds}s")


# Exemple d‚Äôex√©cution
if __name__ == "__main__":
    # Ces DataFrames doivent √™tre import√©s depuis ton environnement principal
    # (par exemple depuis un notebook ou une fonction setup)
    try:
        test_recipe_recommender(recipes_df, interactions_df)
    except NameError:
        print("‚ö†Ô∏è Les DataFrames 'recipes_df' et 'interactions_df' doivent √™tre d√©finis avant d'ex√©cuter le script.")


INFO:root:Chargement du pr√©processeur et du moteur de score...



üçΩÔ∏è SYST√àME DE RECOMMANDATION DE RECETTES


INFO:data_prepro:Ingredient map loaded successfully.
INFO:root:‚úÖ Syst√®me de recommandation initialis√© avec succ√®s
INFO:root:Recommandation bas√©e sur 5 ingr√©dients...
INFO:root:‚úÖ Syst√®me de recommandation initialis√© avec succ√®s
INFO:root:Recommandation bas√©e sur 5 ingr√©dients...



üß™ TEST: Cuisine italienne

üîÑ Normalisation des ingr√©dients :
  ‚úì 'tomato' ‚Üí 'tomato'
  ‚úì 'garlic' ‚Üí 'garlic'
  ‚úì 'basil' ‚Üí 'basil'
  ‚úì 'pasta' ‚Üí 'pastum'
  ‚úì 'olive oil' ‚Üí 'olive oil'


INFO:root:Recommandation bas√©e sur 4 ingr√©dients...



üèÜ TOP 3 RECOMMANDATIONS (3 recettes affich√©es)

1. üç¥ To die for crock pot roast
   üìä Score global: 0.456 | üîó Jaccard: 0.000
   --------------------------------------------------------------------------------

2. üç¥ Creamy cajun chicken pasta
   üìä Score global: 0.452 | üîó Jaccard: 0.000
   --------------------------------------------------------------------------------

3. üç¥ Best banana bread
   üìä Score global: 0.451 | üîó Jaccard: 0.000
   --------------------------------------------------------------------------------

üß™ TEST: Petit-d√©jeuner rapide

üîÑ Normalisation des ingr√©dients :
  ‚úì 'eggs' ‚Üí 'egg'
  ‚úì 'butter' ‚Üí 'butter'
  ‚úì 'milk' ‚Üí 'milk'
  ‚úì 'flour' ‚Üí 'flmy'

üèÜ TOP 3 RECOMMANDATIONS (3 recettes affich√©es)

1. üç¥ To die for crock pot roast
   üìä Score global: 0.456 | üîó Jaccard: 0.000
   --------------------------------------------------------------------------------

2. üç¥ Creamy cajun chicken pasta
   üìä Score g

In [36]:
# -*- coding: utf-8 -*-
"""
üçΩÔ∏è SYST√àME DE RECOMMANDATION DE RECETTES - MODE INTERACTIF
Auteur : Mohamed Kabbaj
"""

import logging
from datetime import datetime
from data_prepro import IngredientPreprocessor
from preprocessing.score import RecipScorer

class RecipeRecommender:
    """Interface principale pour recommander des recettes √† partir des ingr√©dients utilisateur."""

    def __init__(self, recipes_df, interactions_df):
        self.recipes_df = recipes_df
        self.interactions_df = interactions_df

        logging.info("Chargement du pr√©processeur et du moteur de score...")
        self.ingredient_preprocessor = IngredientPreprocessor("ingr_map.csv")
        self.scorer = RecipScorer(alpha=0.5, beta=0.3, gamma=0.2)
        logging.info("‚úÖ Syst√®me de recommandation initialis√© avec succ√®s")

    def normalize_user_ingredients(self, raw_ingredients):
        normalized = []
        print("\nüîÑ Normalisation des ingr√©dients :")
        for ingredient in raw_ingredients:
            cleaned = self.ingredient_preprocessor.normalize_ingredient(ingredient)
            if cleaned:
                normalized.append(cleaned)
                print(f"  ‚úì '{ingredient}' ‚Üí '{cleaned}'")
            else:
                print(f"  ‚ö†Ô∏è '{ingredient}' non reconnu")
        return normalized

    def recommend(self, ingredients, time_limit=None, n_recommendations=5):
        logging.info(f"Recommandation bas√©e sur {len(ingredients)} ingr√©dients...")
        return self.scorer.recommend(
            recipes_df=self.recipes_df,
            interactions_df=self.interactions_df,
            user_ingredients=ingredients,
            time_limit=time_limit,
            top_n=n_recommendations
        )

    def display_recommendations(self, recommendations, user_ingredients, n=5):
        if recommendations is None or len(recommendations) == 0:
            print("‚ùå Aucune recette trouv√©e avec ces crit√®res")
            return

        top_recipes = recommendations.head(n)
        print(f"\nüèÜ TOP {n} RECOMMANDATIONS")
        print("=" * 90)

        for i, (_, recipe) in enumerate(top_recipes.iterrows(), 1):
            print(f"\n{i}. üçΩÔ∏è {recipe['name'].capitalize()}")
            print(f"   üìä Score: {recipe['score']:.3f} | üîó Jaccard: {recipe['jaccard']:.3f}")

            if isinstance(recipe.get("normalized_ingredients"), list):
                ing_list = recipe["normalized_ingredients"]
                matched = set(user_ingredients) & set(ing_list)
                missing = [ing for ing in ing_list if ing not in user_ingredients]

                if matched:
                    print(f"   ‚úÖ En commun: {', '.join(list(matched)[:5])}")
                if missing:
                    print(f"   ‚ûï √Ä ajouter: {', '.join(missing[:5])}")

            if "minutes" in recipe:
                print(f"   ‚è±Ô∏è Temps: {recipe['minutes']} min")
            print("   " + "-" * 80)


# -------------------------------------------------------
# üöÄ MODE INTERACTIF UTILISATEUR
# -------------------------------------------------------
def interactive_recommendation(recipes_df, interactions_df):
    print("\nüç≥ Bienvenue dans le Syst√®me de Recommandation de Recettes üç≥")
    print("=" * 70)
    print("Entrez les ingr√©dients que vous avez dans votre frigo (s√©par√©s par des virgules).")
    print("Exemple : tomato, cheese, basil, pasta")
    print("Tapez 'exit' pour quitter.\n")

    recommender = RecipeRecommender(recipes_df, interactions_df)

    while True:
        user_input = input("\nüìù Vos ingr√©dients : ").strip().lower()
        if user_input in ["exit", "quit", "q"]:
            print("üëã Fin de la session. Bon app√©tit !")
            break

        raw_ingredients = [ing.strip() for ing in user_input.split(",") if ing.strip()]
        if not raw_ingredients:
            print("‚ö†Ô∏è Veuillez entrer au moins un ingr√©dient.")
            continue

        normalized = recommender.normalize_user_ingredients(raw_ingredients)

        time_limit = input("‚è±Ô∏è Temps max (en minutes) [appuyez sur Entr√©e pour ignorer] : ").strip()
        time_limit = int(time_limit) if time_limit.isdigit() else None

        print("\nüîç Recherche de recettes correspondantes...")
        recommendations = recommender.recommend(
            ingredients=normalized,
            time_limit=time_limit,
            n_recommendations=5
        )

        recommender.display_recommendations(recommendations, normalized, n=5)


In [41]:
print("üéØ TEST FINAL - SYST√àME DE RECOMMANDATION COMPLET")
print("="*60)

# Test 1: V√©rifier que le preprocessor fonctionne
print("üìä 1. Test du Preprocessor")
print(f"   ‚úÖ Preprocessor cr√©√©: {type(preprocessor)}")
print(f"   ‚úÖ Recipes charg√©es: {len(recipes_df)} recettes")
print(f"   ‚úÖ Interactions charg√©es: {len(interactions_df)} interactions")

# Test 2: V√©rifier le scorer
print("\nüèÜ 2. Test du Scorer")
print(f"   ‚úÖ Scorer cr√©√©: {type(scorer)}")

# Test 3: Test de recommandation compl√®te
print("\nü•ï 3. Test de Recommandation Compl√®te")
test_ingredients = ["chicken", "onion", "garlic"]
print(f"   üîé Ingr√©dients utilisateur: {test_ingredients}")

try:
    # Utiliser un √©chantillon pour le test (plus rapide)
    sample_recipes = recipes_df.sample(min(1000, len(recipes_df)), random_state=42)
    sample_interactions = interactions_df[interactions_df['recipe_id'].isin(sample_recipes['id'])]
    
    print(f"   üìã √âchantillon: {len(sample_recipes)} recettes, {len(sample_interactions)} interactions")
    
    # Faire la recommandation
    recommendations = scorer.recommend(
        recipes_df=sample_recipes,
        interactions_df=sample_interactions,
        user_ingredients=test_ingredients,
        time_limit=60,
        top_n=5
    )
    
    print(f"   ‚úÖ Recommandations g√©n√©r√©es: {len(recommendations)}")
    print("\nüìã TOP 5 RECOMMANDATIONS:")
    
    for i, (_, rec) in enumerate(recommendations.iterrows(), 1):
        print(f"   {i}. {rec['name'][:50]}...")
        print(f"      üéØ Score: {rec['score']:.3f} | ü•ï Jaccard: {rec['jaccard']:.3f}")
        if 'minutes' in rec:
            print(f"      ‚è±Ô∏è Temps: {rec['minutes']} min")
        
        # Afficher les ingr√©dients si disponibles
        ing_col = 'normalized_ingredients' if 'normalized_ingredients' in rec else 'ingredients'
        if ing_col in rec and pd.notnull(rec[ing_col]):
            ingredients = rec[ing_col][:5] if isinstance(rec[ing_col], list) else []
            if ingredients:
                print(f"      ü•Ñ Ingr√©dients: {', '.join(ingredients)}")
        print()
    
    print("üéâ SYST√àME FONCTIONNEL !")
    
except Exception as e:
    print(f"   ‚ùå Erreur lors de la recommandation: {e}")
    import traceback
    traceback.print_exc()

üéØ TEST FINAL - SYST√àME DE RECOMMANDATION COMPLET
üìä 1. Test du Preprocessor
   ‚úÖ Preprocessor cr√©√©: <class 'data_prepro.RecipePreprocessor'>
   ‚úÖ Recipes charg√©es: 231637 recettes
   ‚úÖ Interactions charg√©es: 1132367 interactions

üèÜ 2. Test du Scorer
   ‚úÖ Scorer cr√©√©: <class 'reco_score.RecipScorer'>

ü•ï 3. Test de Recommandation Compl√®te
   üîé Ingr√©dients utilisateur: ['chicken', 'onion', 'garlic']
   üìã √âchantillon: 1000 recettes, 4333 interactions
‚è±Ô∏è Filtrage temps: 1000 ‚Üí 738 recettes
ü•ï Utilisation de la colonne: ingredients
üìä Stats calcul√©es pour 1000 recettes
üîó Apr√®s fusion: 738 recettes
üèÜ Retour de 5 recommandations
   ‚úÖ Recommandations g√©n√©r√©es: 5

üìã TOP 5 RECOMMANDATIONS:
   1. the thigh who loved me...
      üéØ Score: 0.471 | ü•ï Jaccard: 0.000
      ‚è±Ô∏è Temps: 50 min

   2. cinnamon loaf...
      üéØ Score: 0.454 | ü•ï Jaccard: 0.000
      ‚è±Ô∏è Temps: 60 min

   3. chicken souvlaki marinade...
      üéØ Sc

In [42]:
print("\nüîç DIAGNOSTIC DES INGR√âDIENTS")
print("="*40)

# Regarder quelques recettes avec leurs ingr√©dients
sample = sample_recipes.head(3)
for idx, (_, recipe) in enumerate(sample.iterrows(), 1):
    print(f"\nüìã Recette {idx}: {recipe['name'][:40]}...")
    
    # Ingr√©dients bruts
    if 'ingredients' in recipe and pd.notnull(recipe['ingredients']):
        ingredients = recipe['ingredients']
        if isinstance(ingredients, list):
            print(f"   ü•Ñ Bruts: {ingredients[:3]}...")
        elif isinstance(ingredients, str):
            print(f"   ü•Ñ Bruts: {ingredients[:100]}...")
    
    # Ingr√©dients normalis√©s
    if 'normalized_ingredients' in recipe and pd.notnull(recipe['normalized_ingredients']):
        norm_ing = recipe['normalized_ingredients']
        if isinstance(norm_ing, list):
            print(f"   ‚úÖ Normalis√©s: {norm_ing[:3]}...")
        else:
            print(f"   ‚úÖ Normalis√©s: {norm_ing}")
    else:
        print("   ‚ùå Pas d'ingr√©dients normalis√©s")

# Test avec ingr√©dients plus g√©n√©riques
print("\nüß™ TEST AVEC INGR√âDIENTS G√âN√âRIQUES")
generic_ingredients = ["salt", "sugar", "flour"]
print(f"Ingr√©dients: {generic_ingredients}")

try:
    recs = scorer.recommend(
        recipes_df=sample_recipes.head(100),
        interactions_df=sample_interactions,
        user_ingredients=generic_ingredients,
        top_n=3
    )
    
    print(f"‚úÖ Top 3 avec ingr√©dients g√©n√©riques:")
    for i, (_, rec) in enumerate(recs.iterrows(), 1):
        print(f"   {i}. Score: {rec['score']:.3f} | Jaccard: {rec['jaccard']:.3f}")
        print(f"      {rec['name'][:50]}...")
        
except Exception as e:
    print(f"‚ùå Erreur: {e}")


üîç DIAGNOSTIC DES INGR√âDIENTS

üìã Recette 1: crab filled crescent snacks...
   ü•Ñ Bruts: ['crabmeat', 'cream cheese', 'green onions', 'garlic salt', 'refrigerated crescent dinner rolls', 'e...
   ‚ùå Pas d'ingr√©dients normalis√©s

üìã Recette 2: curried bean salad...
   ü•Ñ Bruts: ['garbanzo beans', 'black beans', 'onion', 'ginger paste', 'mild curry powder', 'dried cilantro', 'l...
   ‚ùå Pas d'ingr√©dients normalis√©s

üìã Recette 3: delicious steak with onion marinade...
   ü•Ñ Bruts: ['olive oil', 'red onion', 'light brown sugar', 'balsamic vinegar', 'steaks']...
   ‚ùå Pas d'ingr√©dients normalis√©s

üß™ TEST AVEC INGR√âDIENTS G√âN√âRIQUES
Ingr√©dients: ['salt', 'sugar', 'flour']
ü•ï Utilisation de la colonne: ingredients
üìä Stats calcul√©es pour 1000 recettes
üîó Apr√®s fusion: 100 recettes
üèÜ Retour de 3 recommandations
‚úÖ Top 3 avec ingr√©dients g√©n√©riques:
   1. Score: 0.348 | Jaccard: 0.000
      hockey puck potatoes...
   2. Score: 0.335 | Jaccard: 0.0

In [43]:
print("\nüöÄ TEST AVEC DONN√âES PREPROCESS√âES")
print("="*50)

# V√©rifier si nous avons des donn√©es preprocess√©es
if hasattr(preprocessor, 'recipes_df') and preprocessor.recipes_df is not None:
    processed_recipes = preprocessor.recipes_df
    print(f"‚úÖ Donn√©es preprocess√©es disponibles: {len(processed_recipes)} recettes")
    
    # V√©rifier les colonnes d'ingr√©dients
    if 'normalized_ingredients' in processed_recipes.columns:
        # Compter les recettes avec ingr√©dients normalis√©s
        has_norm = processed_recipes['normalized_ingredients'].notna().sum()
        print(f"‚úÖ Recettes avec ingr√©dients normalis√©s: {has_norm}")
        
        # Prendre un √©chantillon des donn√©es preprocess√©es
        sample_processed = processed_recipes.sample(min(500, len(processed_recipes)), random_state=42)
        
        # Test avec les donn√©es preprocess√©es
        user_ingredients = ["chicken", "onion", "garlic"]
        print(f"\nüéØ Test avec ingr√©dients: {user_ingredients}")
        
        try:
            final_recs = scorer.recommend(
                recipes_df=sample_processed,
                interactions_df=interactions_df,
                user_ingredients=user_ingredients,
                time_limit=60,
                top_n=5
            )
            
            print(f"\nüèÜ TOP 5 AVEC DONN√âES PREPROCESS√âES:")
            for i, (_, rec) in enumerate(final_recs.iterrows(), 1):
                print(f"   {i}. {rec['name'][:45]}...")
                print(f"      üéØ Score: {rec['score']:.3f} | ü•ï Jaccard: {rec['jaccard']:.3f}")
                if 'minutes' in rec and pd.notnull(rec['minutes']):
                    print(f"      ‚è±Ô∏è Temps: {rec['minutes']} min")
                
                # Afficher ingr√©dients normalis√©s
                if 'normalized_ingredients' in rec and pd.notnull(rec['normalized_ingredients']):
                    norm_ing = rec['normalized_ingredients']
                    if isinstance(norm_ing, list) and len(norm_ing) > 0:
                        common = set(user_ingredients) & set(norm_ing)
                        print(f"      ü§ù Ingr√©dients communs: {list(common)}")
                        print(f"      ü•Ñ Premiers ingr√©dients: {norm_ing[:5]}")
                print()
            
        except Exception as e:
            print(f"‚ùå Erreur: {e}")
            import traceback
            traceback.print_exc()
    else:
        print("‚ùå Pas de colonne 'normalized_ingredients' dans les donn√©es preprocess√©es")
else:
    print("‚ùå Pas de donn√©es preprocess√©es disponibles")
    print("üí° Il faut d'abord ex√©cuter le preprocessing complet")


üöÄ TEST AVEC DONN√âES PREPROCESS√âES
‚ùå Pas de donn√©es preprocess√©es disponibles
üí° Il faut d'abord ex√©cuter le preprocessing complet


In [44]:
print("üîß LANCEMENT DU PREPROCESSING COMPLET")
print("="*50)

try:
    # Prendre un √©chantillon pour le preprocessing (plus rapide pour le test)
    sample_size = 5000
    sample_recipes = recipes_df.sample(min(sample_size, len(recipes_df)), random_state=42)
    sample_interactions = interactions_df[interactions_df['recipe_id'].isin(sample_recipes['id'])]
    
    print(f"üìä √âchantillon pour preprocessing:")
    print(f"   - Recettes: {len(sample_recipes)}")
    print(f"   - Interactions: {len(sample_interactions)}")
    
    # Lancer le preprocessing sur l'√©chantillon
    print("\n‚öôÔ∏è Preprocessing en cours...")
    processed_data = preprocessor.preprocess_recipes(sample_recipes)
    
    print(f"‚úÖ Preprocessing termin√©: {len(processed_data)} recettes")
    
    # V√©rifier les ingr√©dients normalis√©s
    if 'normalized_ingredients' in processed_data.columns:
        has_norm = processed_data['normalized_ingredients'].notna().sum()
        print(f"‚úÖ Recettes avec ingr√©dients normalis√©s: {has_norm}")
        
        # Regarder quelques exemples
        sample_proc = processed_data[processed_data['normalized_ingredients'].notna()].head(3)
        print("\nüìã EXEMPLES D'INGR√âDIENTS NORMALIS√âS:")
        for i, (_, recipe) in enumerate(sample_proc.iterrows(), 1):
            print(f"\n{i}. {recipe['name'][:40]}...")
            
            # Ingr√©dients bruts
            if 'ingredients' in recipe and pd.notnull(recipe['ingredients']):
                raw_ing = recipe['ingredients']
                if isinstance(raw_ing, list):
                    print(f"   ü•Ñ Bruts: {raw_ing[:3]}...")
            
            # Ingr√©dients normalis√©s
            norm_ing = recipe['normalized_ingredients']
            if isinstance(norm_ing, list):
                print(f"   ‚úÖ Normalis√©s: {norm_ing[:3]}...")
        
        # Test de recommandation avec ingr√©dients normalis√©s
        print(f"\nüéØ TEST RECOMMANDATION AVEC INGR√âDIENTS NORMALIS√âS")
        user_ingredients = ["chicken", "onion", "garlic"]
        print(f"Ingr√©dients utilisateur: {user_ingredients}")
        
        final_recs = scorer.recommend(
            recipes_df=processed_data,
            interactions_df=sample_interactions,
            user_ingredients=user_ingredients,
            time_limit=90,
            top_n=5
        )
        
        print(f"\nüèÜ TOP 5 RECOMMANDATIONS FINALES:")
        for i, (_, rec) in enumerate(final_recs.iterrows(), 1):
            print(f"\n{i}. {rec['name'][:50]}...")
            print(f"   üéØ Score total: {rec['score']:.3f}")
            print(f"   ü•ï Jaccard: {rec['jaccard']:.3f}")
            print(f"   ‚≠ê Rating: {rec['mean_rating_norm']:.3f}")
            print(f"   üî• Popularit√©: {rec['popularity']:.3f}")
            
            if 'minutes' in rec and pd.notnull(rec['minutes']):
                print(f"   ‚è±Ô∏è Temps: {rec['minutes']} min")
            
            # Ingr√©dients communs
            if 'normalized_ingredients' in rec and pd.notnull(rec['normalized_ingredients']):
                norm_ing = rec['normalized_ingredients']
                if isinstance(norm_ing, list):
                    common = set(user_ingredients) & set(norm_ing)
                    print(f"   ü§ù Communs: {list(common)}")
                    print(f"   ü•Ñ Ingr√©dients: {norm_ing[:6]}...")
    
    else:
        print("‚ùå Erreur: Pas de colonne 'normalized_ingredients' apr√®s preprocessing")

except Exception as e:
    print(f"‚ùå Erreur durant le preprocessing: {e}")
    import traceback
    traceback.print_exc()

üîß LANCEMENT DU PREPROCESSING COMPLET
üìä √âchantillon pour preprocessing:
   - Recettes: 5000
   - Interactions: 23121

‚öôÔ∏è Preprocessing en cours...
‚ùå Erreur durant le preprocessing: 'RecipePreprocessor' object has no attribute 'preprocess_recipes'


Traceback (most recent call last):
  File "/tmp/ipykernel_40/2618704074.py", line 16, in <module>
    processed_data = preprocessor.preprocess_recipes(sample_recipes)
                     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
AttributeError: 'RecipePreprocessor' object has no attribute 'preprocess_recipes'


In [45]:
# V√©rifier les m√©thodes disponibles du preprocessor
print("üîç M√âTHODES DISPONIBLES DU PREPROCESSOR:")
methods = [method for method in dir(preprocessor) if not method.startswith('_')]
print(f"   {methods}")

# Utiliser la bonne m√©thode
print(f"\nüîß PREPROCESSING AVEC LA BONNE M√âTHODE")
try:
    # Prendre un √©chantillon plus petit pour commencer
    sample_size = 1000
    sample_recipes = recipes_df.sample(min(sample_size, len(recipes_df)), random_state=42)
    sample_interactions = interactions_df[interactions_df['recipe_id'].isin(sample_recipes['id'])]
    
    print(f"üìä √âchantillon: {len(sample_recipes)} recettes, {len(sample_interactions)} interactions")
    
    # Tester la m√©thode preprocess
    print("\n‚öôÔ∏è Lancement du preprocessing...")
    processed_data = preprocessor.preprocess(sample_recipes)
    
    print(f"‚úÖ Preprocessing r√©ussi: {len(processed_data)} recettes")
    print(f"üìã Colonnes disponibles: {list(processed_data.columns)}")
    
    # V√©rifier les ingr√©dients normalis√©s
    if 'normalized_ingredients' in processed_data.columns:
        has_norm = processed_data['normalized_ingredients'].notna().sum()
        has_list_norm = processed_data['normalized_ingredients'].apply(
            lambda x: isinstance(x, list) and len(x) > 0
        ).sum()
        
        print(f"‚úÖ Recettes avec ingr√©dients normalis√©s: {has_norm}")
        print(f"‚úÖ Recettes avec listes d'ingr√©dients: {has_list_norm}")
        
        # Exemple d'ingr√©dients normalis√©s
        if has_list_norm > 0:
            example = processed_data[
                processed_data['normalized_ingredients'].apply(
                    lambda x: isinstance(x, list) and len(x) > 0
                )
            ].iloc[0]
            
            print(f"\nüìã EXEMPLE D'INGR√âDIENTS NORMALIS√âS:")
            print(f"   Recette: {example['name'][:40]}...")
            print(f"   Bruts: {example['ingredients'][:3] if isinstance(example['ingredients'], list) else 'N/A'}")
            print(f"   Normalis√©s: {example['normalized_ingredients'][:5]}")
            
            # Test final de recommandation
            print(f"\nüéØ TEST FINAL DE RECOMMANDATION")
            user_ingredients = ["chicken", "onion", "garlic"]
            
            recommendations = scorer.recommend(
                recipes_df=processed_data,
                interactions_df=sample_interactions,
                user_ingredients=user_ingredients,
                time_limit=60,
                top_n=5
            )
            
            print(f"\nüèÜ üéâ RECOMMANDATIONS FINALES üéâ üèÜ")
            for i, (_, rec) in enumerate(recommendations.iterrows(), 1):
                print(f"\nü•á {i}. {rec['name'][:45]}...")
                print(f"   üéØ Score: {rec['score']:.3f} | ü•ï Jaccard: {rec['jaccard']:.3f}")
                
                if rec['jaccard'] > 0:  # Afficher d√©tails si match trouv√©
                    norm_ing = rec['normalized_ingredients']
                    if isinstance(norm_ing, list):
                        common = set(user_ingredients) & set(norm_ing)
                        print(f"   ü§ù Ingr√©dients communs: {sorted(common)}")
                        print(f"   üìù Ingr√©dients recette: {norm_ing[:6]}...")
                
                if 'minutes' in rec and pd.notnull(rec['minutes']):
                    print(f"   ‚è±Ô∏è Temps: {rec['minutes']} min")
        
        if recommendations.empty:
            print("‚ùå Aucune recommandation g√©n√©r√©e")
        elif recommendations['jaccard'].sum() == 0:
            print("‚ö†Ô∏è Aucun match d'ingr√©dients trouv√© - v√©rifier la normalisation")
        else:
            max_jaccard = recommendations['jaccard'].max()
            print(f"\nüéä SYST√àME OP√âRATIONNEL ! Score Jaccard max: {max_jaccard:.3f}")
    
    else:
        print("‚ùå Pas de colonne 'normalized_ingredients' cr√©√©e")

except Exception as e:
    print(f"‚ùå Erreur: {e}")
    import traceback
    traceback.print_exc()

üîç M√âTHODES DISPONIBLES DU PREPROCESSOR:
   ['description_prep', 'ingredients_prep', 'nutrition_prep', 'preprocess_dataframe', 'preprocess_recipe', 'steps_prep', 'tags_prep']

üîß PREPROCESSING AVEC LA BONNE M√âTHODE
üìä √âchantillon: 1000 recettes, 4333 interactions

‚öôÔ∏è Lancement du preprocessing...
‚ùå Erreur: 'RecipePreprocessor' object has no attribute 'preprocess'


Traceback (most recent call last):
  File "/tmp/ipykernel_40/2943854384.py", line 18, in <module>
    processed_data = preprocessor.preprocess(sample_recipes)
                     ^^^^^^^^^^^^^^^^^^^^^^^
AttributeError: 'RecipePreprocessor' object has no attribute 'preprocess'


In [46]:
print("üîß PREPROCESSING AVEC preprocess_dataframe")
print("="*50)

try:
    # √âchantillon pour test
    sample_size = 500  # Plus petit pour commencer
    sample_recipes = recipes_df.sample(min(sample_size, len(recipes_df)), random_state=42)
    sample_interactions = interactions_df[interactions_df['recipe_id'].isin(sample_recipes['id'])]
    
    print(f"üìä √âchantillon: {len(sample_recipes)} recettes, {len(sample_interactions)} interactions")
    
    # Utiliser la bonne m√©thode
    print("\n‚öôÔ∏è Preprocessing en cours...")
    processed_data = preprocessor.preprocess_dataframe(sample_recipes)
    
    print(f"‚úÖ Preprocessing termin√©: {len(processed_data)} recettes")
    print(f"üìã Colonnes: {list(processed_data.columns)}")
    
    # V√©rifier les ingr√©dients normalis√©s
    if 'normalized_ingredients' in processed_data.columns:
        has_norm = processed_data['normalized_ingredients'].notna().sum()
        has_list = processed_data['normalized_ingredients'].apply(
            lambda x: isinstance(x, list) and len(x) > 0
        ).sum()
        
        print(f"‚úÖ Recettes avec ingr√©dients normalis√©s: {has_norm}/{len(processed_data)}")
        print(f"‚úÖ Recettes avec listes d'ingr√©dients: {has_list}")
        
        if has_list > 0:
            # Exemple
            example = processed_data[
                processed_data['normalized_ingredients'].apply(
                    lambda x: isinstance(x, list) and len(x) > 0
                )
            ].iloc[0]
            
            print(f"\nüìã EXEMPLE:")
            print(f"   Recette: {example['name']}")
            print(f"   Normalis√©s: {example['normalized_ingredients'][:5]}")
            
            # TEST FINAL üéØ
            print(f"\nüéØ üéâ TEST RECOMMANDATION FINALE üéâ")
            user_ingredients = ["chicken", "onion", "garlic", "salt"]
            print(f"Ingr√©dients utilisateur: {user_ingredients}")
            
            final_recommendations = scorer.recommend(
                recipes_df=processed_data,
                interactions_df=sample_interactions,
                user_ingredients=user_ingredients,
                time_limit=90,
                top_n=5
            )
            
            print(f"\nüèÜ TOP 5 RECOMMANDATIONS:")
            
            success = False
            for i, (_, rec) in enumerate(final_recommendations.iterrows(), 1):
                jaccard_score = rec['jaccard']
                total_score = rec['score']
                
                print(f"\nü•á {i}. {rec['name'][:50]}...")
                print(f"   üéØ Score total: {total_score:.3f}")
                print(f"   ü•ï Jaccard: {jaccard_score:.3f}")
                print(f"   ‚≠ê Rating norm: {rec.get('mean_rating_norm', 'N/A'):.3f}")
                print(f"   üî• Popularit√©: {rec.get('popularity', 'N/A'):.3f}")
                
                if 'minutes' in rec and pd.notnull(rec['minutes']):
                    print(f"   ‚è±Ô∏è Temps: {rec['minutes']} min")
                
                # D√©tails des ingr√©dients si match
                if jaccard_score > 0 and 'normalized_ingredients' in rec:
                    norm_ing = rec['normalized_ingredients']
                    if isinstance(norm_ing, list):
                        common = set(user_ingredients) & set(norm_ing)
                        print(f"   ü§ù Ingr√©dients communs: {sorted(common)}")
                        print(f"   üìù Tous les ingr√©dients: {norm_ing}")
                        success = True
                elif isinstance(rec.get('normalized_ingredients'), list):
                    print(f"   üìù Ingr√©dients de la recette: {rec['normalized_ingredients'][:5]}...")
            
            # R√©sum√© final
            max_jaccard = final_recommendations['jaccard'].max()
            has_matches = (final_recommendations['jaccard'] > 0).sum()
            
            print(f"\nüéä R√âSULTAT FINAL:")
            print(f"   ‚úÖ Syst√®me op√©rationnel: OUI")
            print(f"   üìä Recommandations g√©n√©r√©es: {len(final_recommendations)}")
            print(f"   ü•ï Score Jaccard maximum: {max_jaccard:.3f}")
            print(f"   ü§ù Recettes avec ingr√©dients communs: {has_matches}")
            
            if max_jaccard > 0:
                print(f"   üéâ SUCCESS: Le syst√®me trouve des correspondances d'ingr√©dients!")
            else:
                print(f"   ‚ö†Ô∏è  INFO: Pas de correspondance exacte, mais le syst√®me fonctionne")
        
        else:
            print("‚ùå Aucune recette avec des ingr√©dients normalis√©s sous forme de liste")
    
    else:
        print("‚ùå Colonne 'normalized_ingredients' manquante")

except Exception as e:
    print(f"‚ùå Erreur finale: {e}")
    import traceback
    traceback.print_exc()

INFO:data_prepro:D√©but du pr√©traitement de 500 recettes
INFO:data_prepro:Pr√©traitement termin√©: 500 recettes trait√©es


üîß PREPROCESSING AVEC preprocess_dataframe
üìä √âchantillon: 500 recettes, 2212 interactions

‚öôÔ∏è Preprocessing en cours...
‚úÖ Preprocessing termin√©: 500 recettes
üìã Colonnes: ['recipe_id', 'ingredients', 'ingredient_categories', 'normalized_ingredients_list', 'nutrition_dict', 'tags', 'meal_type', 'dietary_restrictions', 'cuisine_type', 'n_steps', 'effort_score', 'cooking_techniques', 'description_keywords']
‚ùå Colonne 'normalized_ingredients' manquante


In [47]:
print("üîÑ CORRECTION: Utiliser normalized_ingredients_list")
print("="*55)

# V√©rifier les donn√©es preprocess√©es
print(f"‚úÖ Colonnes disponibles: {list(processed_data.columns)}")

# V√©rifier la colonne normalized_ingredients_list  
if 'normalized_ingredients_list' in processed_data.columns:
    has_norm = processed_data['normalized_ingredients_list'].notna().sum()
    has_list = processed_data['normalized_ingredients_list'].apply(
        lambda x: isinstance(x, list) and len(x) > 0
    ).sum()
    
    print(f"‚úÖ Recettes avec ingr√©dients normalis√©s: {has_norm}/{len(processed_data)}")
    print(f"‚úÖ Recettes avec listes d'ingr√©dients: {has_list}")
    
    if has_list > 0:
        # Regarder un exemple
        example = processed_data[
            processed_data['normalized_ingredients_list'].apply(
                lambda x: isinstance(x, list) and len(x) > 0
            )
        ].iloc[0]
        
        print(f"\nüìã EXEMPLE D'INGR√âDIENTS NORMALIS√âS:")
        print(f"   Recipe ID: {example['recipe_id']}")
        print(f"   Ingr√©dients bruts: {example.get('ingredients', 'N/A')}")
        print(f"   Ingr√©dients normalis√©s: {example['normalized_ingredients_list'][:5]}...")
        
        # Cr√©er une copie avec le bon nom de colonne pour le scorer
        processed_for_scorer = processed_data.copy()
        processed_for_scorer['normalized_ingredients'] = processed_data['normalized_ingredients_list']
        
        # Il faut aussi les autres colonnes (id, name, minutes)
        # Merger avec les donn√©es originales
        processed_with_orig = processed_for_scorer.merge(
            sample_recipes[['id', 'name', 'minutes']].rename(columns={'id': 'recipe_id'}),
            on='recipe_id',
            how='left'
        )
        processed_with_orig['id'] = processed_with_orig['recipe_id']
        
        print(f"\nüìä Donn√©es pr√©par√©es pour le scorer:")
        print(f"   Colonnes: {list(processed_with_orig.columns)}")
        print(f"   Taille: {len(processed_with_orig)} recettes")
        
        # TEST FINAL DE RECOMMANDATION üéØ
        print(f"\nüéØ üéâ TEST RECOMMANDATION FINAL üéâ")
        user_ingredients = ["chicken", "onion", "garlic", "salt"]
        print(f"Ingr√©dients utilisateur: {user_ingredients}")
        
        try:
            final_recs = scorer.recommend(
                recipes_df=processed_with_orig,
                interactions_df=sample_interactions,
                user_ingredients=user_ingredients,
                time_limit=120,
                top_n=5
            )
            
            print(f"\nüèÜ TOP 5 RECOMMANDATIONS FINALES:")
            
            for i, (_, rec) in enumerate(final_recs.iterrows(), 1):
                jaccard_score = rec['jaccard']
                total_score = rec['score']
                
                print(f"\nü•á {i}. {rec['name'][:50]}...")
                print(f"   üéØ Score total: {total_score:.3f}")
                print(f"   ü•ï Jaccard: {jaccard_score:.3f}")
                print(f"   ‚≠ê Rating: {rec.get('mean_rating_norm', 0):.3f}")
                print(f"   üî• Popularit√©: {rec.get('popularity', 0):.3f}")
                
                if 'minutes' in rec and pd.notnull(rec['minutes']):
                    print(f"   ‚è±Ô∏è Temps: {rec['minutes']} min")
                
                # Afficher les correspondances d'ingr√©dients
                if jaccard_score > 0 and 'normalized_ingredients' in rec:
                    norm_ing = rec['normalized_ingredients']
                    if isinstance(norm_ing, list):
                        common = set(user_ingredients) & set(norm_ing)
                        print(f"   ü§ù Ingr√©dients communs: {sorted(common)} ({len(common)}/{len(norm_ing)})")
                        print(f"   üìù Ingr√©dients recette: {norm_ing}")
                
                elif 'normalized_ingredients' in rec and isinstance(rec['normalized_ingredients'], list):
                    print(f"   üìù Ingr√©dients: {rec['normalized_ingredients'][:6]}...")
            
            # Statistiques finales
            max_jaccard = final_recs['jaccard'].max()
            avg_jaccard = final_recs['jaccard'].mean()
            has_matches = (final_recs['jaccard'] > 0).sum()
            
            print(f"\nüéä üéä SYST√àME OP√âRATIONNEL ! üéä üéä")
            print(f"   ‚úÖ Recommandations: {len(final_recs)}")
            print(f"   ü•ï Jaccard max: {max_jaccard:.3f}")
            print(f"   üìä Jaccard moyen: {avg_jaccard:.3f}")
            print(f"   ü§ù Recettes avec matches: {has_matches}/5")
            
            if max_jaccard > 0:
                print(f"   üéâ PARFAIT: Le syst√®me trouve des correspondances d'ingr√©dients!")
            else:
                print(f"   üí° INFO: Syst√®me fonctionnel, optimiser la normalisation des ingr√©dients")
            
            print(f"\n‚úÖ ‚úÖ ‚úÖ TOUTES LES CORRECTIONS APPLIQU√âES AVEC SUCC√àS ! ‚úÖ ‚úÖ ‚úÖ")
        
        except Exception as e:
            print(f"‚ùå Erreur recommandation: {e}")
            import traceback
            traceback.print_exc()
    
    else:
        print("‚ùå Pas d'ingr√©dients normalis√©s sous forme de liste")
else:
    print("‚ùå Colonne normalized_ingredients_list introuvable")

üîÑ CORRECTION: Utiliser normalized_ingredients_list
‚úÖ Colonnes disponibles: ['recipe_id', 'ingredients', 'ingredient_categories', 'normalized_ingredients_list', 'nutrition_dict', 'tags', 'meal_type', 'dietary_restrictions', 'cuisine_type', 'n_steps', 'effort_score', 'cooking_techniques', 'description_keywords']
‚úÖ Recettes avec ingr√©dients normalis√©s: 500/500
‚úÖ Recettes avec listes d'ingr√©dients: 500

üìã EXEMPLE D'INGR√âDIENTS NORMALIS√âS:
   Recipe ID: 94947
   Ingr√©dients bruts: {'sweet and sour sauce', 'scallion', 'crabmeat', 'refrigerated crescent dinner roll', 'egg yolk', 'sesame seed', 'cream cheese', 'garlic salt', 'water'}
   Ingr√©dients normalis√©s: ['scallion', 'sweet and sour sauce', 'refrigerated crescent dinner roll', 'crabmeat', 'egg yolk']...

üìä Donn√©es pr√©par√©es pour le scorer:
   Colonnes: ['recipe_id', 'ingredients', 'ingredient_categories', 'normalized_ingredients_list', 'nutrition_dict', 'tags', 'meal_type', 'dietary_restrictions', 'cuisine_type'

Traceback (most recent call last):
  File "/tmp/ipykernel_40/2135436364.py", line 53, in <module>
    final_recs = scorer.recommend(
                 ^^^^^^^^^^^^^^^^^
  File "/app/reco_score.py", line 105, in recommend
    df["jaccard"] = df[ingredient_col].apply(
                    ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/tmp/poetry_cache/virtualenvs/preprocessing-9TtSrW0h-py3.11/lib/python3.11/site-packages/pandas/core/series.py", line 4943, in apply
    ).apply()
      ^^^^^^^
  File "/tmp/poetry_cache/virtualenvs/preprocessing-9TtSrW0h-py3.11/lib/python3.11/site-packages/pandas/core/apply.py", line 1422, in apply
    return self.apply_standard()
           ^^^^^^^^^^^^^^^^^^^^^
  File "/tmp/poetry_cache/virtualenvs/preprocessing-9TtSrW0h-py3.11/lib/python3.11/site-packages/pandas/core/apply.py", line 1502, in apply_standard
    mapped = obj._map_values(
             ^^^^^^^^^^^^^^^^
  File "/tmp/poetry_cache/virtualenvs/preprocessing-9TtSrW0h-py3.11/lib/python3.11/site-packages/pandas/

In [48]:
# Recharger le module corrig√©
import importlib
importlib.reload(reco_score)
scorer = reco_score.RecipScorer()

print("üîÑ Scorer recharg√© avec correction des listes")

# Relancer le test
print(f"\nüéØ TEST RECOMMANDATION CORRIG√â")
user_ingredients = ["chicken", "onion", "garlic", "salt"]

try:
    final_recs = scorer.recommend(
        recipes_df=processed_with_orig,
        interactions_df=sample_interactions,
        user_ingredients=user_ingredients,
        time_limit=120,
        top_n=5
    )
    
    print(f"\nüèÜ üéâ TOP 5 RECOMMANDATIONS FINALES üéâ üèÜ")
    
    for i, (_, rec) in enumerate(final_recs.iterrows(), 1):
        jaccard_score = rec['jaccard']
        total_score = rec['score']
        
        print(f"\nü•á RANG {i}: {rec['name']}")
        print(f"   üéØ Score total: {total_score:.4f}")
        print(f"   ü•ï Jaccard: {jaccard_score:.4f}")
        print(f"   ‚≠ê Rating: {rec.get('mean_rating_norm', 0):.3f}")
        print(f"   üî• Popularit√©: {rec.get('popularity', 0):.3f}")
        
        if 'minutes' in rec and pd.notnull(rec['minutes']):
            print(f"   ‚è±Ô∏è Temps: {rec['minutes']} min")
        
        # D√©tails des ingr√©dients
        if jaccard_score > 0 and 'normalized_ingredients' in rec:
            norm_ing = rec['normalized_ingredients']
            if isinstance(norm_ing, list):
                common = set(user_ingredients) & set(norm_ing)
                print(f"   ü§ù INGR√âDIENTS COMMUNS: {sorted(common)}")
                print(f"   üìù Recette contient: {norm_ing[:8]}...")
                print(f"   üìä Match: {len(common)}/{len(norm_ing)} ingr√©dients")
        
        elif 'normalized_ingredients' in rec and isinstance(rec['normalized_ingredients'], list):
            ing = rec['normalized_ingredients'][:5]
            print(f"   üìù Ingr√©dients: {ing}...")
    
    # üéä STATISTIQUES FINALES
    max_jaccard = final_recs['jaccard'].max()
    avg_jaccard = final_recs['jaccard'].mean()
    matches = (final_recs['jaccard'] > 0).sum()
    best_score = final_recs['score'].max()
    
    print(f"\n" + "="*60)
    print(f"üéä üéä üéä R√âSUM√â FINAL DU SYST√àME üéä üéä üéä")
    print(f"="*60)
    print(f"‚úÖ Syst√®me de recommandation: OP√âRATIONNEL")
    print(f"üìä Recommandations g√©n√©r√©es: {len(final_recs)}")
    print(f"ü•ï Score Jaccard maximum: {max_jaccard:.4f}")
    print(f"üìà Score Jaccard moyen: {avg_jaccard:.4f}")
    print(f"ü§ù Recettes avec correspondances: {matches}/5")
    print(f"üèÜ Meilleur score global: {best_score:.4f}")
    
    if max_jaccard > 0:
        print(f"üéâ SUCC√àS COMPLET: Correspondances d'ingr√©dients trouv√©es!")
    else:
        print(f"‚ö†Ô∏è  Syst√®me fonctionnel - Am√©liorer la normalisation des ingr√©dients")
    
    print(f"\nüöÄ TOUTES LES CORRECTIONS ONT √âT√â APPLIQU√âES AVEC SUCC√àS!")
    print(f"üìù Le projet MangeTaMain est maintenant enti√®rement op√©rationnel.")

except Exception as e:
    print(f"‚ùå Erreur lors de la recommandation: {e}")
    import traceback
    traceback.print_exc()

üîÑ Scorer recharg√© avec correction des listes

üéØ TEST RECOMMANDATION CORRIG√â
‚è±Ô∏è Filtrage temps: 500 ‚Üí 436 recettes
ü•ï Utilisation de la colonne: normalized_ingredients
üìä Stats calcul√©es pour 500 recettes
üîó Apr√®s fusion: 436 recettes
üèÜ Retour de 5 recommandations

üèÜ üéâ TOP 5 RECOMMANDATIONS FINALES üéâ üèÜ

ü•á RANG 1: chicken souvlaki marinade
   üéØ Score total: 0.5276
   ü•ï Jaccard: 0.1000
   ‚≠ê Rating: 0.925
   üî• Popularit√©: 1.000
   ‚è±Ô∏è Temps: 20 min
   ü§ù INGR√âDIENTS COMMUNS: ['salt']
   üìù Recette contient: ['chicken breast', 'minced garlic clove', 'lemon juice', 'salt', 'olive oil', 'dried oregano', 'black pepper']...
   üìä Match: 1/7 ingr√©dients

ü•á RANG 2: breaded  n baked zucchini chips
   üéØ Score total: 0.5028
   ü•ï Jaccard: 0.0909
   ‚≠ê Rating: 0.881
   üî• Popularit√©: 0.966
   ‚è±Ô∏è Temps: 30 min
   ü§ù INGR√âDIENTS COMMUNS: ['garlic']
   üìù Recette contient: ['egg', 'garlic', 'fresh parsley', 'italian brea

In [40]:
# Recharger le module reco_score avec les corrections
import sys
import importlib
if 'reco_score' in sys.modules:
    importlib.reload(reco_score)
else:
    import reco_score

# Cr√©er un nouveau scorer avec la version corrig√©e
scorer = reco_score.RecipScorer()
print("üîÑ Module reco_score recharg√© avec les corrections")

üîÑ Module reco_score recharg√© avec les corrections
