In [38]:
### FILTRAGE COLLABORATIF

import pandas as pd
from scipy.sparse import csr_matrix
from sklearn.neighbors import NearestNeighbors
from sklearn.decomposition import TruncatedSVD

In [39]:
# Charger/traiter les données
data = pd.read_csv('../data/raw/raw_recipes.csv')
data = data.drop_duplicates().dropna()
users = pd.read_csv('../data/raw/PP_users.csv')
users = users.drop_duplicates().dropna()
interactions = pd.read_csv('../data/raw/RAW_interactions.csv')
interactions = interactions.drop_duplicates().dropna()

# Filtrer les utilisateurs avec au moins 5 interactions
active_users = interactions['user_id'].value_counts()
filtered_users = active_users[active_users >= 5].index
interactions = interactions[interactions['user_id'].isin(filtered_users)]

# Filtrer les recettes avec au moins 5 interactions
popular_recipes = interactions['recipe_id'].value_counts()
filtered_recipes = popular_recipes[popular_recipes >= 5].index
interactions = interactions[interactions['recipe_id'].isin(filtered_recipes)]

In [40]:
# Créer un utilisateur fictif et lui attribuer des notes à 10 recettes
best_recipes = (
    interactions.groupby('recipe_id')['rating']
    .mean()
    .sort_values(ascending=False)
    .head(10)
    .index
)

new_user_id = users['u'].max() + 1  # Générer un nouvel ID
new_user_ratings = pd.DataFrame({
    'user_id': [new_user_id] * 10,
    'recipe_id': best_recipes,
    #'rating': [4, 3, 5, 2, 5, 1, 3, 4, 5, 5]
    'rating': [1, 5, 2, 5, 3, 5, 3, 2, 4, 2]
})

# Ajouter l'utilisateur fictif aux données d'interactions
interactions = pd.concat([interactions, new_user_ratings], ignore_index=True)

# Créer des mappings pour les utilisateurs et les recettes
user_mapping = {user_id: idx for idx, user_id in enumerate(interactions['user_id'].unique())}
recipe_mapping = {recipe_id: idx for idx, recipe_id in enumerate(interactions['recipe_id'].unique())}

# Mettre à jour les IDs dans les interactions
interactions['user_id'] = interactions['user_id'].map(user_mapping)
interactions['recipe_id'] = interactions['recipe_id'].map(recipe_mapping)

# Construire la matrice creuse
sparse_matrix = csr_matrix((
    interactions['rating'], 
    (interactions['user_id'], interactions['recipe_id'])
))

# Entraîner le modèle k-NN
knn_model = NearestNeighbors(metric='cosine', algorithm='brute')
knn_model.fit(sparse_matrix)

In [41]:
def get_recommendations_knn(user_id, nResults, n_neighbors):
    user_idx = user_mapping[user_id]
    try:
        # Recherche des voisins
        distances, indices = knn_model.kneighbors(sparse_matrix[user_idx], n_neighbors=n_neighbors)
    except ValueError:
        print("Aucune donnée disponible. Utilisation des recettes les mieux notées globalement.")
        top_recipes = (
            interactions.groupby('recipe_id')['rating']
            .mean()
            .sort_values(ascending=False)
            .head(nResults)
        )
        top_recipe_ids = [key for key, value in recipe_mapping.items() if value in top_recipes.index]
        return data[data['id'].isin(top_recipe_ids)]
    
    # Identifier les utilisateurs similaires
    similar_users = [list(user_mapping.keys())[i] for i in indices.flatten()]
    similar_user_ratings = interactions[interactions['user_id'].isin(similar_users)]
    
    if similar_user_ratings.empty:
        print("Aucun utilisateur similaire trouvé. Utilisation des recettes les mieux notées globalement.")
        top_recipes = (
            interactions.groupby('recipe_id')['rating']
            .mean()
            .sort_values(ascending=False)
            .head(nResults)
        )
        top_recipe_ids = [key for key, value in recipe_mapping.items() if value in top_recipes.index]
        return data[data['id'].isin(top_recipe_ids)]
    
    # Calculer les scores des recettes basés sur les utilisateurs similaires
    top_recipes = similar_user_ratings.groupby('recipe_id')['rating'].mean().sort_values(ascending=False).head(nResults)
    top_recipe_ids = [key for key, value in recipe_mapping.items() if value in top_recipes.index]
    recommended_recipes = data[data['id'].isin(top_recipe_ids)].copy()
    
    # Calculer la fiabilité pour chaque recette
    recipe_reliability = []
    for recipe_id in top_recipes.index:
        # Récupérer les distances des voisins ayant noté cette recette
        user_ids = similar_user_ratings[similar_user_ratings['recipe_id'] == recipe_id]['user_id']
        neighbor_distances = [
            distances[0][i] for i, user in enumerate(similar_users) if user in user_ids.values
        ]
        
        # Fiabilité : inversement proportionnelle à la distance moyenne
        if neighbor_distances:
            mean_distance = sum(neighbor_distances) / len(neighbor_distances)
            reliability = 1 - mean_distance / distances.max()
        else:
            reliability = 0  # Pas de fiabilité si aucun voisin pertinent
        
        recipe_reliability.append(reliability)
    
    # Ajouter les fiabilités aux recommandations
    recommended_recipes['reliability'] = recipe_reliability
    
    return recommended_recipes


In [42]:
# Application de la fonction de recommandation avec k-NN
nRecipes = 1000
nNeighbors = 100
recommendations_knn = get_recommendations_knn(new_user_id, nRecipes, nNeighbors)
print("Nombre de recommandations avec knn: ", recommendations_knn.shape[0])
print(f"Recommandations pour l'utilisateur {new_user_id}:\n")
recommendations_knn.head(nRecipes)

Nombre de recommandations avec knn:  124
Recommandations pour l'utilisateur 25076:



Unnamed: 0,name,id,minutes,contributor_id,submitted,tags,nutrition,n_steps,steps,description,ingredients,n_ingredients,reliability
7289,apple sour cream cake,57771,55,61660,2003-04-03,"['60-minutes-or-less', 'time-to-make', 'course...","[275.2, 19.0, 92.0, 10.0, 7.0, 32.0, 12.0]",11,"['preheat oven to 350', 'grease a 13 x 9 x 2 b...","love this cake! it is very moist, but not over...","['butter', 'sugar', 'eggs', 'vanilla', 'flour'...",13,0.019258
10873,authentic watergate salad,120800,70,170959,2005-05-06,"['time-to-make', 'course', 'preparation', '5-i...","[165.4, 13.0, 72.0, 2.0, 4.0, 20.0, 7.0]",3,"['stir pudding mix , pineapple with juice , ma...","i checked all the other recipes here, and each...","['instant pistachio pudding mix', 'crushed pin...",5,0.056567
13714,baked cheese omelet,137324,20,209747,2005-09-13,"['30-minutes-or-less', 'time-to-make', 'course...","[549.0, 61.0, 9.0, 24.0, 77.0, 112.0, 2.0]",8,"['preheat oven to 400f', 'beat eggs , water , ...",easy denver type omelet that is baked in the o...,"['eggs', 'water', 'salt', 'butter', 'green oni...",8,0.019258
14344,baked ham and cheese omelet roll,28648,30,37305,2002-05-16,"['ham', '30-minutes-or-less', 'time-to-make', ...","[409.5, 38.0, 1.0, 25.0, 58.0, 59.0, 5.0]",11,"['preheat oven to 450 degrees', 'beat eggs and...",this is an easy way to serve omelets. in our h...,"['eggs', 'milk', 'all-purpose flour', 'salt', ...",7,0.019258
14540,baked macaroni with cheese and tomatoes,20147,75,20371,2002-02-19,"['weeknight', 'time-to-make', 'course', 'main-...","[304.1, 22.0, 14.0, 15.0, 31.0, 43.0, 9.0]",9,"['preheat oven to 350f degrees', 'cook macaron...","i haven't made this in years, so was very plea...","['elbow macaroni', 'savory', 'white sugar', 'g...",10,0.019258
...,...,...,...,...,...,...,...,...,...,...,...,...,...
225805,whiskey marinade for chicken pork or steak,164519,10,142386,2006-04-16,"['15-minutes-or-less', 'time-to-make', 'course...","[312.5, 20.0, 108.0, 114.0, 4.0, 9.0, 9.0]",10,"['place brown sugar in bowl', 'add whiskey to ...",i used this marinade on ribeye steaks cooked o...,"['whiskey', 'brown sugar', 'olive oil', 'garli...",7,0.013864
226184,white chocolate chip cranberry oatmeal cookies,112683,30,39547,2005-03-04,"['30-minutes-or-less', 'time-to-make', 'course...","[98.6, 6.0, 34.0, 2.0, 2.0, 13.0, 4.0]",8,"['preheat oven to 375f', 'in a large bowl usin...",i originally made these at the request of my o...,"['sugar', 'brown sugar', 'butter', 'egg', 'van...",12,0.056567
226962,whole wheat english muffins,63868,145,3288,2003-06-05,"['weeknight', 'time-to-make', 'course', 'cuisi...","[233.3, 9.0, 10.0, 5.0, 14.0, 6.0, 13.0]",10,"['in a large bowl , dissolve the yeast in the ...",posted in response to request. i love english ...,"['dry yeast', 'warm water', 'sugar', 'salt', '...",9,0.056567
228307,wrap up lunch,52620,15,5060,2003-01-28,"['15-minutes-or-less', 'time-to-make', 'course...","[297.5, 22.0, 20.0, 27.0, 18.0, 28.0, 11.0]",14,"['combine cream cheese , grated carrot and cum...",entry for the ready set cook #3 contest. this ...,"['cream cheese spread', 'carrot', 'cumin', 'wh...",9,0.013864


In [43]:
def filter_recommendations(tabRecommendation):
    # Séparer les valeurs de la colonne 'nutrition' en plusieurs colonnes
    nutrition_columns = ['calories', 'total fat (PDV)', 'sugar (PDV)', 'sodium (PDV)', 'protein (PDV)', 'saturated fat (PDV)', 'carbohydrates (PDV)']
    nutrition_data = tabRecommendation['nutrition'].str.strip('[]').str.split(',', expand=True)
    nutrition_data.columns = nutrition_columns

    # Ajouter les nouvelles colonnes au DataFrame original
    tabRecommendation = tabRecommendation.drop(columns=['nutrition'])
    tabRecommendation = pd.concat([tabRecommendation, nutrition_data], axis=1)

    for col in ['calories', 'total fat (PDV)', 'protein (PDV)', 'carbohydrates (PDV)']:
        tabRecommendation[col] = pd.to_numeric(tabRecommendation[col], errors='coerce')

    # Appliquer les contraintes manuellement pour ce premier exemple
    filtered_recommendations = tabRecommendation[
        (tabRecommendation['calories'] <= 700) &
        (tabRecommendation['protein (PDV)'] >= 20) &
        (tabRecommendation['total fat (PDV)'] <= 30) &
        (tabRecommendation['minutes'] <= 40)
    ]

    return filtered_recommendations;

In [44]:
# Filtrer les recommandations k-NN avec des contraintes
result = filter_recommendations(recommendations_knn)
print("Nombre de recommandations kNN filtrées : ", result.shape[0])
print(f"Recommandations pour l'utilisateur {new_user_id}:\n")
result.head(nRecipes)

Nombre de recommandations kNN filtrées :  6
Recommandations pour l'utilisateur 25076:



Unnamed: 0,name,id,minutes,contributor_id,submitted,tags,n_steps,steps,description,ingredients,n_ingredients,reliability,calories,total fat (PDV),sugar (PDV),sodium (PDV),protein (PDV),saturated fat (PDV),carbohydrates (PDV)
68624,curried poached eggs,24685,10,5060,2002-04-08,"['15-minutes-or-less', 'time-to-make', 'course...",10,['fill medium saucepan with about 3 inches of ...,wonderful taste to start your day! from reader...,"['ground coriander', 'curry powder', 'ground c...",8,0.013864,389.6,23.0,7.0,46.0,41.0,22.0,14.0
102052,hamburger helper soup,21813,40,29014,2002-03-09,"['60-minutes-or-less', 'time-to-make', 'course...",6,['brown ground beef and onion in dutch oven un...,ok - go ahead and laugh! this comforting soup ...,"['ground beef', 'onion', 'hamburger helper mix...",10,0.019258,300.6,26.0,17.0,24.0,47.0,33.0,3.0
118323,kittencal s tuna melt sandwich,169085,15,89831,2006-05-22,"['15-minutes-or-less', 'time-to-make', 'course...",8,"['set oven to broiler heat', 'in a bowl mix tu...",trust me you will be craving this often once y...,"['solid white tuna packed in water', 'low-fat ...",11,0.024536,304.9,17.0,6.0,25.0,61.0,29.0,6.0
167376,pumpkin risotto,23113,30,30716,2002-03-21,"['30-minutes-or-less', 'time-to-make', 'course...",14,['heat butter and oil together in a large sauc...,my ultimate comfort food. i make this so often...,"['butter', 'olive oil', 'onion', 'garlic', 'mu...",12,0.019258,405.6,16.0,8.0,9.0,22.0,23.0,21.0
210777,thai rice noodles with chicken and asparagus,179966,40,306669,2006-07-31,"['60-minutes-or-less', 'time-to-make', 'course...",11,['pour noodles into bowl and cover with very h...,oooooooh this is good! my husband said he now ...,"['rice noodles', 'oil', 'garlic cloves', 'bone...",9,0.019258,435.7,15.0,17.0,45.0,57.0,8.0,18.0
212718,the ultimate salmon fillets,53198,30,45524,2003-02-03,"['30-minutes-or-less', 'time-to-make', 'course...",10,"['preheat oven to 450 degrees', 'if frozen , p...","living on the west coast, we have eaten a grea...","['salmon fillet', 'sour cream', 'prepared must...",9,0.013864,318.3,27.0,7.0,13.0,66.0,27.0,1.0


In [45]:
# Préparation des données pour SVD
# Réduction de dimension avec SVD
svd = TruncatedSVD(n_components=50, random_state=42)
svd_model = svd.fit_transform(sparse_matrix)  # Matrice U (représentation des utilisateurs)
svd_components = svd.components_  # Matrice V^T (représentation des recettes)

In [46]:
def get_recommendations_svd(user_id, nResults):
    # Trouver l'index utilisateur
    user_idx = user_mapping[user_id]
    user_ratings = svd_model[user_idx]  # Représentation de l'utilisateur dans l'espace SVD
    
    # Reformater user_ratings en 2D (1, 50) pour l'alignement des dimensions
    user_ratings = user_ratings.reshape(1, -1)
    
    # Prédictions des scores pour toutes les recettes
    predictions = user_ratings.dot(svd_components)  # dimensions alignées : (1, 50) dot (50, n_recipes)
    predictions = predictions.flatten()  # Convertir en vecteur 1D
    
    # Construire les scores pour chaque recette
    recipe_scores = pd.DataFrame({
        'recipe_id': range(len(predictions)),
        'predicted_rating': predictions
    })
    
    # Calculer la fiabilité pour chaque recette : normaliser les prédictions
    max_pred = predictions.max()
    std_pred = predictions.std()
    if std_pred > 0:  # Éviter une division par zéro
        recipe_scores['reliability'] = 1 - (abs(predictions - predictions.mean()) / std_pred)
    else:
        recipe_scores['reliability'] = 1.0  # Si pas de variance, toutes les fiabilités sont maximales
    recipe_scores['reliability'] = recipe_scores['reliability'].clip(lower=0, upper=1)  # Fiabilité entre 0 et 1
    
    # Trier les recettes par score prédictif
    top_recipes = recipe_scores.sort_values(by='predicted_rating', ascending=False).head(nResults)
    
    # Mapper les IDs des recettes
    top_recipe_ids = [key for key, value in recipe_mapping.items() if value in top_recipes['recipe_id'].values]
    recommended_recipes = data[data['id'].isin(top_recipe_ids)].copy()
    
    # Assurez-vous que toutes les recommandations ont une correspondance dans recipe_scores
    recommended_recipes = recommended_recipes.merge(
        recipe_scores[['recipe_id', 'reliability']],
        left_on='id',
        right_on='recipe_id',
        how='left'
    )
    
    # Remplir les NaN dans la colonne de fiabilité (par sécurité)
    recommended_recipes['reliability'] = recommended_recipes['reliability'].fillna(0.0)
    
    return recommended_recipes


In [47]:
# Application de la fonction de recommandation avec SVD
recommendations_svd = get_recommendations_svd(new_user_id, nRecipes)
print("Nombre de recommandations avec svd : ", recommendations_svd.shape[0])
print(f"Recommandations pour l'utilisateur {new_user_id}:\n")
recommendations_svd.head(nRecipes)

Nombre de recommandations avec svd :  987
Recommandations pour l'utilisateur 25076:



Unnamed: 0,name,id,minutes,contributor_id,submitted,tags,nutrition,n_steps,steps,description,ingredients,n_ingredients,recipe_id,reliability
0,not your every day chicken salad chicken salad,164291,245,295591,2006-04-14,"['time-to-make', 'course', 'main-ingredient', ...","[258.6, 20.0, 9.0, 17.0, 56.0, 16.0, 1.0]",4,"['blend mayonnaise , vinegar , garlic and dill...",a friend gave this to me with out me ever try...,"['light mayonnaise', 'cider vinegar', 'garlic ...",7,,0.000000
1,almost boston market creamed spinach,77206,50,89831,2003-11-21,"['60-minutes-or-less', 'time-to-make', 'course...","[337.9, 43.0, 9.0, 24.0, 18.0, 84.0, 5.0]",14,['in a saucepan melt butter over medium heat u...,this is quite similar to the creamed spinach a...,"['butter', 'flour', 'salt', 'half-and-half', '...",10,,0.000000
2,1 hour ham and bean soup,47924,55,62086,2002-12-08,"['ham', '60-minutes-or-less', 'time-to-make', ...","[401.7, 18.0, 16.0, 48.0, 61.0, 12.0, 14.0]",13,"['heat vegetable oil in a soup pot', 'add carr...","made this one up myself, from a mix of differe...","['vegetable oil', 'carrot', 'celery', 'onion',...",10,,0.000000
3,10 minute cheesy mexican rice,132955,10,110078,2005-08-09,"['15-minutes-or-less', 'time-to-make', 'course...","[308.5, 11.0, 18.0, 55.0, 25.0, 21.0, 15.0]",7,"['mix broth and salsa in medium saucepan', 'br...",a quick and easy recipe i got from kraft.,"['condensed chicken broth', 'salsa', 'minute r...",4,,0.000000
4,24k carrots,152441,15,209747,2006-01-19,"['15-minutes-or-less', 'time-to-make', 'course...","[124.4, 9.0, 48.0, 5.0, 2.0, 18.0, 5.0]",3,['cook carrots until tender- or to desired don...,"carrots coated with a yummy butter, brown suga...","['carrots', 'brown sugar', 'butter', 'ground g...",4,,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
982,ziti with garlic clam sauce,323896,30,107583,2008-09-08,"['30-minutes-or-less', 'time-to-make', 'course...","[456.1, 24.0, 10.0, 31.0, 53.0, 16.0, 16.0]",13,"['in large skillet , over medium heat , heat o...",a quick tasty main dish. serve with a crusty ...,"['canola oil', 'onion', 'garlic cloves', 'clam...",10,,0.000000
983,zucchini and cucumber salad,159002,10,37449,2006-03-08,"['15-minutes-or-less', 'time-to-make', 'course...","[192.4, 24.0, 25.0, 14.0, 4.0, 11.0, 3.0]",7,"['in a large bowl , combine zucchini , cucumbe...",mmmmm....zucchini and cucumber marinated in an...,"['zucchini', 'english cucumber', 'sweet onion'...",14,,0.000000
984,zucchini and mushroom skillet,121202,35,35526,2005-05-09,"['60-minutes-or-less', 'time-to-make', 'course...","[43.2, 0.0, 19.0, 0.0, 6.0, 0.0, 2.0]",5,['cut zucchini in half and then into 1 inch pi...,"low carb, delicious and simple to make.","['zucchini', 'fresh mushrooms', 'onion', 'oliv...",7,,0.000000
985,zucchini gratin,15309,50,9869,2001-12-05,"['60-minutes-or-less', 'time-to-make', 'course...","[400.6, 46.0, 31.0, 25.0, 41.0, 88.0, 4.0]",14,"['preheat oven to 350f degrees', 'butter 9 inc...",creamy vegetable dish,"['butter', 'zucchini', 'fresh tarragon', 'parm...",8,15309.0,0.790662


In [48]:
# Filtrer les recommandations SVD avec des contraintes
result = filter_recommendations(recommendations_svd)
print("Nombre de recommandations SVD filtrées : ", result.shape[0])
print(f"Recommandations pour l'utilisateur {new_user_id}:\n")
result.head(nRecipes)

Nombre de recommandations SVD filtrées :  51
Recommandations pour l'utilisateur 25076:



Unnamed: 0,name,id,minutes,contributor_id,submitted,tags,n_steps,steps,description,ingredients,n_ingredients,recipe_id,reliability,calories,total fat (PDV),sugar (PDV),sodium (PDV),protein (PDV),saturated fat (PDV),carbohydrates (PDV)
3,10 minute cheesy mexican rice,132955,10,110078,2005-08-09,"['15-minutes-or-less', 'time-to-make', 'course...",7,"['mix broth and salsa in medium saucepan', 'br...",a quick and easy recipe i got from kraft.,"['condensed chicken broth', 'salsa', 'minute r...",4,,0.0,308.5,11.0,18.0,55.0,25.0,21.0,15.0
77,balsamic salmon,83719,16,54678,2004-02-12,"['30-minutes-or-less', 'time-to-make', 'course...",8,"['preheat oven to 450f', 'place salmon , skin-...",chatelaine - april 2001 submitted by dan mathe...,"['salmon fillets', 'balsamic vinegar', 'liquid...",6,,0.0,466.5,25.0,39.0,12.0,131.0,14.0,3.0
88,beef noodle skillet,357999,30,65502,2009-02-27,"['30-minutes-or-less', 'time-to-make', 'course...",10,"['cook noodles as directed on package', 'meanw...",a fast meal that's tasty and easy on the budge...,"['broad egg noodles', 'extra lean ground beef'...",9,,0.0,303.9,11.0,23.0,10.0,40.0,13.0,13.0
102,beverage gang dedication shrimp pasta and as...,292731,30,233583,2008-03-19,"['30-minutes-or-less', 'time-to-make', 'course...",13,"['in large skillet , heat oil and 2 tbsp butte...",this is dedicated to the beverage gang as they...,"['fusilli', 'asparagus', 'shrimp', 'white wine...",12,,0.0,474.3,22.0,18.0,34.0,44.0,32.0,17.0
104,black bean and pineapple quesadilla,261471,25,174096,2007-10-25,"['30-minutes-or-less', 'time-to-make', 'course...",8,"['spray pan with non-stick cooking spray', 'st...",i recreated this dish from a favorite at a loc...,"['flour tortilla', 'green chili', 'onion', 'pi...",7,,0.0,361.6,20.0,39.0,44.0,33.0,35.0,15.0
135,brown sugar roasted salmon with maple mustard ...,428310,30,526666,2010-06-02,"['30-minutes-or-less', 'time-to-make', 'course...",11,"['preheat oven to 400f', 'combine brown sugar ...",simple roasted salmon coated in brown sugar an...,"['salmon fillets', 'brown sugar', 'black peppe...",8,,0.0,356.1,20.0,101.0,24.0,59.0,10.0,9.0
193,chickpea olive salad,360800,7,621626,2009-03-14,"['15-minutes-or-less', 'time-to-make', 'course...",2,['mix all the ingredients together in a glass ...,this salad is popular in the middle east.,"['chickpeas', 'black olives', 'scallion', 'fre...",10,,0.0,430.6,20.0,3.0,43.0,28.0,8.0,21.0
194,chickpea salad,185331,15,199848,2006-09-08,"['15-minutes-or-less', 'time-to-make', 'course...",3,"['in a large bowl , toss together the rinsed a...","easy, light and healthy source of protein and ...","['chickpeas', 'red onion', 'red bell pepper', ...",12,,0.0,359.2,17.0,11.0,30.0,22.0,7.0,18.0
253,creamy corned zucchini and peppers,432934,15,1157312,2010-07-21,"['15-minutes-or-less', 'time-to-make', 'course...",5,"['clean and small dice zucchini , onion , and ...",a really nice recipe for summer ingredients. ...,"['zucchini', 'corn', 'white onion', 'canola oi...",10,,0.0,313.9,30.0,37.0,14.0,21.0,34.0,9.0
255,creamy dijon cheesy cauliflower,69095,30,96896,2003-08-15,"['30-minutes-or-less', 'time-to-make', 'course...",13,"['grease a shallow baking dish', 'cook the cau...",this recipe is absolutely delicious. the dijon...,"['cauliflower', 'butter', 'all-purpose flour',...",8,,0.0,309.9,30.0,11.0,21.0,32.0,60.0,6.0


In [None]:
### CODE POUR LE FILTRAGE PAR CONTENU 

import pandas as pd
import string
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel

recipes_df = pd.read_csv('/content/recipes/RAW_recipes.csv', on_bad_lines='skip')
recipes_df.info()
recipes_df = recipes_df.drop(
    ['description', 'submitted', 'contributor_id', 'tags', 'nutrition', 'n_steps', 'n_ingredients', 'steps'], axis=1)
recipes_df.head()
recipes_df = recipes_df.head(10000)
recipes_df = recipes_df.drop_duplicates()
recipes_df = recipes_df.dropna()

def text_cleaning(text):
    text = "".join([char for char in text if char not in string.punctuation])
    return text

recipes_df['ingredients'] = recipes_df['ingredients'].apply(lambda x: text_cleaning(x))
recipes_df.duplicated().sum()
recipes_df.isnull().sum()
tfidf = TfidfVectorizer(stop_words='english', max_features=30000)
tfidf_matrix = tfidf.fit_transform(recipes_df['ingredients'])
tfidf_matrix.shape
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)
cosine_sim
indices = pd.Series(recipes_df.index, index=recipes_df['name']).drop_duplicates()
indices

def get_recommendations(title, cosine_sim=cosine_sim):
    idx = indices[title]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    # Get the scores of the 5 most similar food
    sim_scores = sim_scores[1:6]

    food_indices = [i[0] for i in sim_scores]

    df_reco = pd.DataFrame(recipes_df['name'].iloc[food_indices])
    df_reco['ingredients'] = recipes_df['ingredients'].iloc[food_indices]
    return df_reco

print(get_recommendations('asparagus with mustard dill sauce'))