In [106]:
import pandas as pd
from scipy.sparse import csr_matrix
from sklearn.neighbors import NearestNeighbors
from sklearn.decomposition import TruncatedSVD

In [107]:
# Charger/traiter les données
data = pd.read_csv('../data/raw/raw_recipes.csv')
data = data.drop_duplicates().dropna()
users = pd.read_csv('../data/raw/PP_users.csv')
users = users.drop_duplicates().dropna()
interactions = pd.read_csv('../data/raw/RAW_interactions.csv')
interactions = interactions.drop_duplicates().dropna()

# Garder uniquement les recettes notées
noted_recipe_ids = interactions['recipe_id'].unique()
data = data[data['id'].isin(noted_recipe_ids)]

In [108]:
# Créer un utilisateur fictif et lui attribuer des notes à 10 recettes
best_recipes = (
    interactions.groupby('recipe_id')['rating']
    .mean()
    .sort_values(ascending=False)
    .head(10)
    .index
)

new_user_id = users['u'].max() + 1  # Générer un nouvel ID
new_user_ratings = pd.DataFrame({
    'user_id': [new_user_id] * 10,
    'recipe_id': best_recipes,
    'rating': [4, 3, 5, 2, 5, 1, 3, 4, 5, 5]
    #'rating': [1, 5, 2, 5, 3, 5, 3, 2, 4, 2]
})

# Ajouter l'utilisateur fictif aux données d'interactions
interactions = pd.concat([interactions, new_user_ratings], ignore_index=True)

# Créer des mappings pour les utilisateurs et les recettes
user_mapping = {user_id: idx for idx, user_id in enumerate(interactions['user_id'].unique())}
recipe_mapping = {recipe_id: idx for idx, recipe_id in enumerate(interactions['recipe_id'].unique())}

# Mettre à jour les IDs dans les interactions
interactions['user_id'] = interactions['user_id'].map(user_mapping)
interactions['recipe_id'] = interactions['recipe_id'].map(recipe_mapping)

# Construire la matrice creuse
sparse_matrix = csr_matrix((
    interactions['rating'], 
    (interactions['user_id'], interactions['recipe_id'])
))

# Entraîner le modèle k-NN
knn_model = NearestNeighbors(metric='cosine', algorithm='brute')
knn_model.fit(sparse_matrix)

In [109]:
# Fonction pour obtenir des recommandations avec k-NN
def get_recommendations(user_id, nResults, n_neighbors):
    user_idx = user_mapping[user_id]
    try:
        distances, indices = knn_model.kneighbors(sparse_matrix[user_idx], n_neighbors=n_neighbors)
    except ValueError:
        print("Aucune données disponible. Utilisation des recettes les mieux notées globalement.")
        top_recipes = (
            interactions.groupby('recipe_id')['rating']
            .mean()
            .sort_values(ascending=False)
            .head(nResults)
        )
        top_recipe_ids = [key for key, value in recipe_mapping.items() if value in top_recipes.index]
        return data[data['id'].isin(top_recipe_ids)]
    
    similar_users = [list(user_mapping.keys())[i] for i in indices.flatten()]
    similar_user_ratings = interactions[interactions['user_id'].isin(similar_users)]
    
    if similar_user_ratings.empty:
        print("Aucun utilisateur similaire trouvé. Utilisation des recettes les mieux notées globalement.")
        top_recipes = (
            interactions.groupby('recipe_id')['rating']
            .mean()
            .sort_values(ascending=False)
            .head(nResults)
        )
        top_recipe_ids = [key for key, value in recipe_mapping.items() if value in top_recipes.index]
        return data[data['id'].isin(top_recipe_ids)]
    
    top_recipes = similar_user_ratings.groupby('recipe_id')['rating'].mean().sort_values(ascending=False).head(nResults)
    top_recipe_ids = [key for key, value in recipe_mapping.items() if value in top_recipes.index]
    recommended_recipes = data[data['id'].isin(top_recipe_ids)]
    return recommended_recipes

In [110]:
# Application de la fonction de recommandation avec k-NN
nRecipes = 1000
nNeighbors = 100
recommendations_knn = get_recommendations(new_user_id, nRecipes, nNeighbors)
print("Nombre de recommandations avec knn: ", recommendations_knn.shape[0])
print(f"Recommandations pour l'utilisateur {new_user_id}:\n")
recommendations_knn.head(nRecipes)

Nombre de recommandations avec knn:  166
Recommandations pour l'utilisateur 25076:



Unnamed: 0,name,id,minutes,contributor_id,submitted,tags,nutrition,n_steps,steps,description,ingredients,n_ingredients
122,100 broccoli casserole,116679,50,107334,2005-04-12,"['60-minutes-or-less', 'time-to-make', 'course...","[212.0, 25.0, 11.0, 19.0, 11.0, 50.0, 4.0]",5,['cook broccoli until tender in the amount of ...,this is a nice change of pace from the cheez w...,"['frozen broccoli', 'cream cheese', 'dry onion...",7
2546,addictive chicken tenders one taste and you r...,69990,50,48263,2003-08-28,"['60-minutes-or-less', 'time-to-make', 'course...","[450.4, 51.0, 2.0, 54.0, 69.0, 98.0, 1.0]",6,"['preheat oven to 350', 'combine all dry ingre...","i found this recipe on a low-carb site, (dh is...","['chicken tenders', 'parmesan cheese', 'dried ...",8
3975,almond anise biscotti,78048,60,49879,2003-12-06,"['60-minutes-or-less', 'time-to-make', 'course...","[138.9, 9.0, 42.0, 3.0, 5.0, 13.0, 6.0]",17,"['cream butter and 1 cup sugar', 'add eggs one...",one of my most requested recipes.,"['butter', 'sugar', 'eggs', 'almond extract', ...",10
4182,aloo matar,220883,40,450571,2007-04-05,"['60-minutes-or-less', 'time-to-make', 'course...","[184.3, 14.0, 22.0, 11.0, 9.0, 6.0, 7.0]",10,"['heat oil over medium heat', 'add cumin seed ...",this is a recipe for aloo matar that i got off...,"['potatoes', 'frozen peas', 'green chili', 'wh...",14
4381,amanda s thai peanut,175230,25,236462,2006-06-28,"['30-minutes-or-less', 'time-to-make', 'course...","[480.6, 22.0, 54.0, 54.0, 42.0, 13.0, 23.0]",12,"['in a large pot , cook fettucini as directed'...",by far the best and healthiest thai peanut i'v...,"['fettuccine', 'broccoli', 'carrot', 'garlic',...",10
...,...,...,...,...,...,...,...,...,...,...,...,...
222239,vegetarian pho vietnamese noodle soup,201382,40,334301,2006-12-21,"['60-minutes-or-less', 'time-to-make', 'course...","[304.3, 0.0, 9.0, 5.0, 7.0, 0.0, 23.0]",22,"['tear greens into bite-sized pieces', 'use bo...",from sally bernstien's article on vietnamese f...,"['rice noodles', 'seitan', 'bean sprouts', 'na...",21
225975,white bean zucchini basil salad,125580,20,16975,2005-06-11,"['30-minutes-or-less', 'time-to-make', 'course...","[139.5, 9.0, 6.0, 10.0, 12.0, 4.0, 5.0]",8,['rinse beans in colander and place in a large...,a delicious summer salad perfect for picnics a...,"['zucchini', 'roma tomatoes', 'great northern ...",11
227203,whoopie pies the real deal lancaster co r...,54787,38,41706,2003-02-24,"['60-minutes-or-less', 'time-to-make', 'course...","[518.7, 46.0, 169.0, 9.0, 8.0, 32.0, 19.0]",14,"['cream shortening , sugar and eggs', 'add van...",mmmmmmmmmmmmmmmmm! if you've never had a whoop...,"['oil', 'brown sugar', 'eggs', 'flour', 'salt'...",12
228032,wonderful curried sweet potato soup,161324,30,225021,2006-03-24,"['30-minutes-or-less', 'time-to-make', 'course...","[283.8, 13.0, 45.0, 16.0, 16.0, 12.0, 13.0]",16,"['soup:', 'in a soup pot , heat oil over mediu...",this is a wonderful soup. and look at the list...,"['oil', 'onion', 'garlic clove', 'curry paste'...",12


In [111]:
def filter_recommendations(tabRecommendation):
    # Séparer les valeurs de la colonne 'nutrition' en plusieurs colonnes
    nutrition_columns = ['calories', 'total fat (PDV)', 'sugar (PDV)', 'sodium (PDV)', 'protein (PDV)', 'saturated fat (PDV)', 'carbohydrates (PDV)']
    nutrition_data = tabRecommendation['nutrition'].str.strip('[]').str.split(',', expand=True)
    nutrition_data.columns = nutrition_columns

    # Ajouter les nouvelles colonnes au DataFrame original
    tabRecommendation = tabRecommendation.drop(columns=['nutrition'])
    tabRecommendation = pd.concat([tabRecommendation, nutrition_data], axis=1)

    for col in ['calories', 'total fat (PDV)', 'protein (PDV)', 'carbohydrates (PDV)']:
        tabRecommendation[col] = pd.to_numeric(tabRecommendation[col], errors='coerce')

    # Appliquer les contraintes manuellement pour ce premier exemple
    filtered_recommendations = tabRecommendation[
        (tabRecommendation['calories'] >= 300) &
        (tabRecommendation['calories'] <= 700) &
        (tabRecommendation['protein (PDV)'] >= 20) &
        (tabRecommendation['total fat (PDV)'] <= 30) &
        (tabRecommendation['minutes'] <= 40)
    ]

    return filtered_recommendations;

In [112]:
# Filtrer les recommandations k-NN avec des contraintes
result = filter_recommendations(recommendations_knn)
print("Nombre de recommandations kNN filtrées : ", result.shape[0])
print(f"Recommandations pour l'utilisateur {new_user_id}:\n")
result.head(nRecipes)

Nombre de recommandations kNN filtrées :  14
Recommandations pour l'utilisateur 25076:



Unnamed: 0,name,id,minutes,contributor_id,submitted,tags,n_steps,steps,description,ingredients,n_ingredients,calories,total fat (PDV),sugar (PDV),sodium (PDV),protein (PDV),saturated fat (PDV),carbohydrates (PDV)
4381,amanda s thai peanut,175230,25,236462,2006-06-28,"['30-minutes-or-less', 'time-to-make', 'course...",12,"['in a large pot , cook fettucini as directed'...",by far the best and healthiest thai peanut i'v...,"['fettuccine', 'broccoli', 'carrot', 'garlic',...",10,480.6,22.0,54.0,54.0,42.0,13.0,23.0
20793,beef taquitos,217324,30,141293,2007-03-18,"['30-minutes-or-less', 'time-to-make', 'course...",13,"['heat the oven to 400', 'heat the oil in a la...","like tacos, only neater to eat. in mexico, the...","['vegetable oil', 'onion', 'garlic clove', 'le...",11,471.4,30.0,13.0,43.0,48.0,35.0,15.0
26917,boneless hoisin country ribs,313039,25,37722,2008-07-09,"['30-minutes-or-less', 'time-to-make', 'course...",8,"['preheat oven to 400 degrees f', 'line a larg...",a very nice combination of flavors for a quint...,"['garlic cloves', 'hoisin sauce', 'brown sugar...",7,427.3,16.0,143.0,68.0,76.0,18.0,14.0
79857,egyptian red snapper in red pepper mint sauce,133946,20,183964,2005-08-18,"['30-minutes-or-less', 'time-to-make', 'course...",4,"['saute onion in olive oil until golden', 'add...",this recipe is posted for the zaar world tour ...,"['red snapper fillets', 'olive oil', 'red onio...",9,328.6,9.0,7.0,5.0,120.0,5.0,1.0
85488,fontina topped chicken cutlets with couscous,129527,35,185293,2005-07-12,"['60-minutes-or-less', 'time-to-make', 'course...",17,"['heat oven to 375f', 'in large nonstick skill...",this is so easy and very good!! people think ...,"['olive oil', 'garlic clove', 'baby spinach', ...",9,637.0,30.0,4.0,83.0,103.0,46.0,20.0
100567,ground turkey with creamy squash sauce over n...,192361,30,58104,2006-10-26,"['curries', '30-minutes-or-less', 'time-to-mak...",9,"['heat oil in a large pan', 'add onions and ga...",low fat made with a puree of winter squash be ...,"['oil', 'garlic cloves', 'onion', 'ground turk...",17,387.7,15.0,18.0,9.0,49.0,13.0,16.0
133847,mexicali casserole,16328,40,27371,2001-12-28,"['60-minutes-or-less', 'time-to-make', 'course...",11,['start browning meat in a large skillet addin...,i like this recipe because it is not only low-...,"['ground turkey', 'fat free cheese', 'onion', ...",10,443.4,17.0,71.0,80.0,69.0,17.0,18.0
148006,one pot spaghetti supper,413237,25,316094,2010-02-16,"['30-minutes-or-less', 'time-to-make', 'main-i...",9,['cook the pasta following package directions'...,this recipe was featured in the 1-2-3 eats col...,"['spaghetti', 'olive oil', 'onion', 'pasta sau...",9,366.1,13.0,34.0,24.0,35.0,7.0,17.0
150647,oven baked chicken parmesan,42972,40,53510,2002-10-14,"['60-minutes-or-less', 'time-to-make', 'main-i...",7,"['preheat oven to 400 degrees', 'dip chicken i...",an easy recipe for chicken parmesan!,"['boneless skinless chicken breast halves', 'e...",5,484.1,25.0,74.0,52.0,77.0,31.0,13.0
181994,savory rice pilaf with lavender apricots,66672,35,92700,2003-07-14,"['60-minutes-or-less', 'time-to-make', 'course...",13,"['melt butter in medium saucepan', 'gradually ...",i got this recipe from the herb gardeners at f...,"['butter', 'onion', 'celery', 'garlic clove', ...",15,460.7,29.0,70.0,47.0,23.0,32.0,21.0


In [113]:
# Préparation des données pour SVD
# Réduction de dimension avec SVD
svd = TruncatedSVD(n_components=50, random_state=42)
matrix_svd = svd.fit_transform(sparse_matrix)

In [114]:
# Function to get recommendations with SVD
def get_svd_recommendations(user_id, nResults):
    user_idx = user_mapping[user_id]
    user_vector = matrix_svd[user_idx]
    scores = matrix_svd.dot(user_vector)
    top_indices = scores.argsort()[-nResults:][::-1]
    
    # Handle missing keys gracefully
    top_recipe_ids = [recipe_mapping.get(i, None) for i in top_indices if i in recipe_mapping]
    top_recipe_ids = [id for id in top_recipe_ids if id is not None]
    
    return data[data['id'].isin(top_recipe_ids)]

In [115]:
# Application de la fonction de recommandation avec SVD
recommendations_svd = get_svd_recommendations(new_user_id, nRecipes)
print("Nombre de recommandations avec svd : ", recommendations_svd.shape[0])
print(f"Recommandations pour l'utilisateur {new_user_id}:\n")
recommendations_svd.head(nRecipes)

Nombre de recommandations avec svd :  241
Recommandations pour l'utilisateur 25076:



Unnamed: 0,name,id,minutes,contributor_id,submitted,tags,nutrition,n_steps,steps,description,ingredients,n_ingredients
487,old reliable french bread for kitchen aid mi...,52289,170,39949,2003-01-27,"['weeknight', 'time-to-make', 'course', 'main-...","[113.5, 1.0, 0.0, 10.0, 6.0, 0.0, 7.0]",18,['dissolve yeast in warm water in warmed mixer...,"this recipe came with my kitchen aid mixer, an...","['active dry yeast', 'water', 'salt', 'margari...",7
559,three cheese cheese ball,77380,375,37636,2003-11-26,"['weeknight', 'time-to-make', 'course', 'main-...","[1846.7, 243.0, 26.0, 130.0, 173.0, 436.0, 9.0]",5,"['mix all ingredients thoroughly', 'divide mix...",great for parties and superbowl get-togethers....,"['cream cheese', 'cheddar cheese', 'milk', 'bl...",8
1101,2 baby potatoes with rosemary,162657,60,210188,2006-04-03,"['lactose', '60-minutes-or-less', 'time-to-mak...","[207.3, 8.0, 10.0, 15.0, 7.0, 4.0, 11.0]",6,"['preheat oven to 425f', 'cut each potato in h...",this is the starch for the easter dinner from ...,"['small potatoes', 'olive oil', 'fresh rosemar...",6
1608,5 spice sugar,101898,5,128047,2004-10-13,"['lactose', '15-minutes-or-less', 'time-to-mak...","[52.3, 0.0, 50.0, 0.0, 0.0, 0.0, 4.0]",2,"['for a gift: layer in a 1 / 3-1 / 2 c clear ,...",found this in sunset magazine around christmas...,"['sugar', 'ground cinnamon', 'ground nutmeg', ...",6
1807,8 vegetable ragout,136399,30,235262,2005-09-06,"['30-minutes-or-less', 'time-to-make', 'course...","[156.2, 6.0, 36.0, 27.0, 10.0, 3.0, 9.0]",6,"['saute onion , carrot , garlic in vegetable f...",a wonderful vegetable dish for everyday or com...,"['onion', 'carrot', 'minced garlic cloves', 'v...",12
...,...,...,...,...,...,...,...,...,...,...,...,...
227678,wine punch,95943,5,147217,2004-07-20,"['15-minutes-or-less', 'time-to-make', 'course...","[55.2, 0.0, 33.0, 0.0, 0.0, 0.0, 3.0]",3,"['combine all ingredients in a punch bowl', 'a...",good party punch,"['rose wine', '7-up soda', 'orange juice conce...",4
228298,wounded dragon,16047,5,10649,2001-12-20,"['15-minutes-or-less', 'time-to-make', 'course...","[58.2, 0.0, 25.0, 0.0, 0.0, 0.0, 3.0]",3,['layer in the order given above into a shot g...,this is a fantastic name for a drink. i love t...,"['grenadine', 'bacardi 151 rum', 'midori melon...",3
229249,yemiser selatta ethiopian lentil salad with ...,133099,70,27678,2005-08-10,"['time-to-make', 'course', 'main-ingredient', ...","[309.7, 11.0, 9.0, 24.0, 33.0, 4.0, 14.0]",7,['place the lentils in a sieve and wash them u...,this is traditionally served during lent eithe...,"['dried lentils', 'red wine vinegar', 'vegetab...",7
230492,zesty italian peasant bread appetizer,95069,22,61121,2004-07-05,"['30-minutes-or-less', 'time-to-make', 'course...","[79.8, 4.0, 1.0, 7.0, 5.0, 5.0, 3.0]",9,"['preheat oven to 400 degrees f', 'place bread...","this came from the bhg website. quick, easy an...","['italian bread', 'olive oil', 'garlic clove',...",7


In [116]:
# Filtrer les recommandations SVD avec des contraintes
result = filter_recommendations(recommendations_svd)
print("Nombre de recommandations SVD filtrées : ", result.shape[0])
print(f"Recommandations pour l'utilisateur {new_user_id}:\n")
result.head(nRecipes)

Nombre de recommandations SVD filtrées :  15
Recommandations pour l'utilisateur 25076:



Unnamed: 0,name,id,minutes,contributor_id,submitted,tags,n_steps,steps,description,ingredients,n_ingredients,calories,total fat (PDV),sugar (PDV),sodium (PDV),protein (PDV),saturated fat (PDV),carbohydrates (PDV)
28307,breaded garlic and dill fish low fat,82678,35,41409,2004-01-31,"['60-minutes-or-less', 'time-to-make', 'course...",11,['toast the panko bread crumbs on an unoiled b...,"this is a wonderful, healthy dish that's reall...","['white fish fillets', 'panko breadcrumbs', 't...",9,329.0,8.0,5.0,12.0,82.0,5.0,9.0
42597,chicken and broccoli mornay,99372,30,158304,2004-09-07,"['30-minutes-or-less', 'time-to-make', 'course...",5,"['for sauce , combine first 4 ingredients in s...","chicken, broccoli, and a cheesy gravy. who can...","['chicken gravy', 'swiss cheese', 'parmesan ch...",7,363.4,29.0,8.0,32.0,72.0,47.0,3.0
43342,chicken breast with portabella mushrooms,92571,40,142004,2004-06-03,"['60-minutes-or-less', 'time-to-make', 'course...",16,"['remove any unwanted fat from chicken', 'mix ...","i got tired of the typical chicken dinners, so...","['boneless skinless chicken breast halves', 's...",9,344.4,26.0,11.0,23.0,73.0,32.0,3.0
77167,easy microwave pouring custard,133138,7,11297,2005-08-10,"['15-minutes-or-less', 'time-to-make', 'course...",5,['in a microwave safe jug heat the milk on med...,**please note this was made in a 700wt microwa...,"['milk', 'egg yolks', 'sugar']",3,449.4,25.0,213.0,4.0,24.0,39.0,21.0
87637,fried egg sandwiches,43027,40,37779,2002-10-14,"['bacon', '60-minutes-or-less', 'time-to-make'...",12,['spread one side of each slice of toast with ...,this is something my grandmother used to make ...,"['bread', 'honey mustard', 'bacon', 'butter', ...",6,334.3,27.0,20.0,25.0,24.0,31.0,10.0
96287,greek chicken breasts,84147,35,87877,2004-02-16,"['60-minutes-or-less', 'time-to-make', 'course...",11,"['combine flour , salt , pepper , and oregano ...",this is another recipe i have had for years an...,"['flour', 'salt', 'fresh ground black pepper',...",13,376.9,29.0,10.0,37.0,59.0,25.0,5.0
111029,indian lentils and rice soup,22749,30,3288,2002-03-16,"['30-minutes-or-less', 'time-to-make', 'course...",8,['spray 3-quart saucepan with nonstick cooking...,"from start to finish, you can have lentil soup...","['green onions', 'gingerroot', 'crushed red pe...",13,364.4,5.0,28.0,10.0,38.0,11.0,21.0
115527,jolean s k sandwiches exceptional tuna melts,121324,25,185105,2005-05-09,"['30-minutes-or-less', 'time-to-make', 'course...",7,"['preheat oven to 325 degrees', 'mix all ingre...",don't know why my mom called these k-sandwiche...,"['tuna', 'velveeta cheese', 'hard-boiled eggs'...",8,427.6,28.0,35.0,37.0,39.0,28.0,15.0
160579,pineapple sesame chicken,207762,35,378710,2007-01-26,"['60-minutes-or-less', 'time-to-make', 'course...",13,"['cut chicken into bitesize chucks', 'drain pi...",this is a very easy dish to make in one skille...,"['boneless skinless chicken breasts', 'crushed...",12,426.3,27.0,85.0,52.0,58.0,41.0,13.0
163101,pork chops in soy sauce,105484,40,178271,2004-12-06,"['60-minutes-or-less', 'time-to-make', 'course...",3,"['season pork chops with salt , pepper , and g...","it's a recipe my mom always made growing up, a...","['pork chops', 'salt', 'pepper', 'garlic powde...",9,516.4,22.0,7.0,170.0,69.0,25.0,19.0
