In [30]:
import pandas as pd
from scipy.sparse import csr_matrix
from sklearn.neighbors import NearestNeighbors

In [31]:
# Charger/traiter les données
data = pd.read_csv('../data/raw/raw_recipes.csv')
data = data.drop_duplicates().dropna()
users = pd.read_csv('../data/raw/PP_users.csv')
users = users.drop_duplicates().dropna()
interactions = pd.read_csv('../data/raw/RAW_interactions.csv')
interactions = interactions.drop_duplicates().dropna()

In [32]:
# Créer un utilisateur fictif et lui attribuer des notes à 10 recettes
best_recipes = (
    interactions.groupby('recipe_id')['rating']
    .mean()
    .sort_values(ascending=False)
    .head(10)
    .index
)

new_user_id = users['u'].max() + 1  # Générer un nouvel ID
new_user_ratings = pd.DataFrame({
    'user_id': [new_user_id] * 10,
    'recipe_id': best_recipes,
    'rating': [4, 3, 5, 2, 5, 1, 3, 4, 5, 5]
})

# Ajouter l'utilisateur fictif aux données d'interactions
interactions = pd.concat([interactions, new_user_ratings], ignore_index=True)

# Créer des mappings pour les utilisateurs et les recettes
user_mapping = {user_id: idx for idx, user_id in enumerate(interactions['user_id'].unique())}
recipe_mapping = {recipe_id: idx for idx, recipe_id in enumerate(interactions['recipe_id'].unique())}

# Mettre à jour les IDs dans les interactions
interactions['user_id'] = interactions['user_id'].map(user_mapping)
interactions['recipe_id'] = interactions['recipe_id'].map(recipe_mapping)

# Construire la matrice creuse
sparse_matrix = csr_matrix((
    interactions['rating'], 
    (interactions['user_id'], interactions['recipe_id'])
))

# Entraîner le modèle k-NN
knn_model = NearestNeighbors(metric='cosine', algorithm='brute')
knn_model.fit(sparse_matrix)

In [33]:
# Fonction pour obtenir des recommandations
def get_recommendations(user_id, nResults, n_neighbors):
    user_idx = user_mapping[user_id]
    try:
        distances, indices = knn_model.kneighbors(sparse_matrix[user_idx], n_neighbors=n_neighbors)
    except ValueError:
        print("Aucune données disponible. Utilisation des recettes les mieux notées globalement.")
        top_recipes = (
            interactions.groupby('recipe_id')['rating']
            .mean()
            .sort_values(ascending=False)
            .head(nResults)
        )
        top_recipe_ids = [key for key, value in recipe_mapping.items() if value in top_recipes.index]
        return data[data['id'].isin(top_recipe_ids)]
    
    similar_users = [list(user_mapping.keys())[i] for i in indices.flatten()]
    similar_user_ratings = interactions[interactions['user_id'].isin(similar_users)]
    
    if similar_user_ratings.empty:
        print("Aucun utilisateur similaire trouvé. Utilisation des recettes les mieux notées globalement.")
        top_recipes = (
            interactions.groupby('recipe_id')['rating']
            .mean()
            .sort_values(ascending=False)
            .head(nResults)
        )
        top_recipe_ids = [key for key, value in recipe_mapping.items() if value in top_recipes.index]
        return data[data['id'].isin(top_recipe_ids)]
    
    top_recipes = similar_user_ratings.groupby('recipe_id')['rating'].mean().sort_values(ascending=False).head(nResults)
    top_recipe_ids = [key for key, value in recipe_mapping.items() if value in top_recipes.index]
    recommended_recipes = data[data['id'].isin(top_recipe_ids)]
    return recommended_recipes

In [34]:
# Application
nRecipes = 1000
nNeighbors = 100
recommendations = get_recommendations(new_user_id, nRecipes, nNeighbors)
print("Nombre de recommandations : ", recommendations.shape[0])
print(f"Recommandations pour l'utilisateur {new_user_id}:\n")
recommendations.head(nRecipes)

Nombre de recommandations :  166
Recommandations pour l'utilisateur 25076:



Unnamed: 0,name,id,minutes,contributor_id,submitted,tags,nutrition,n_steps,steps,description,ingredients,n_ingredients
122,100 broccoli casserole,116679,50,107334,2005-04-12,"['60-minutes-or-less', 'time-to-make', 'course...","[212.0, 25.0, 11.0, 19.0, 11.0, 50.0, 4.0]",5,['cook broccoli until tender in the amount of ...,this is a nice change of pace from the cheez w...,"['frozen broccoli', 'cream cheese', 'dry onion...",7
2546,addictive chicken tenders one taste and you r...,69990,50,48263,2003-08-28,"['60-minutes-or-less', 'time-to-make', 'course...","[450.4, 51.0, 2.0, 54.0, 69.0, 98.0, 1.0]",6,"['preheat oven to 350', 'combine all dry ingre...","i found this recipe on a low-carb site, (dh is...","['chicken tenders', 'parmesan cheese', 'dried ...",8
3975,almond anise biscotti,78048,60,49879,2003-12-06,"['60-minutes-or-less', 'time-to-make', 'course...","[138.9, 9.0, 42.0, 3.0, 5.0, 13.0, 6.0]",17,"['cream butter and 1 cup sugar', 'add eggs one...",one of my most requested recipes.,"['butter', 'sugar', 'eggs', 'almond extract', ...",10
4182,aloo matar,220883,40,450571,2007-04-05,"['60-minutes-or-less', 'time-to-make', 'course...","[184.3, 14.0, 22.0, 11.0, 9.0, 6.0, 7.0]",10,"['heat oil over medium heat', 'add cumin seed ...",this is a recipe for aloo matar that i got off...,"['potatoes', 'frozen peas', 'green chili', 'wh...",14
4381,amanda s thai peanut,175230,25,236462,2006-06-28,"['30-minutes-or-less', 'time-to-make', 'course...","[480.6, 22.0, 54.0, 54.0, 42.0, 13.0, 23.0]",12,"['in a large pot , cook fettucini as directed'...",by far the best and healthiest thai peanut i'v...,"['fettuccine', 'broccoli', 'carrot', 'garlic',...",10
...,...,...,...,...,...,...,...,...,...,...,...,...
222239,vegetarian pho vietnamese noodle soup,201382,40,334301,2006-12-21,"['60-minutes-or-less', 'time-to-make', 'course...","[304.3, 0.0, 9.0, 5.0, 7.0, 0.0, 23.0]",22,"['tear greens into bite-sized pieces', 'use bo...",from sally bernstien's article on vietnamese f...,"['rice noodles', 'seitan', 'bean sprouts', 'na...",21
225975,white bean zucchini basil salad,125580,20,16975,2005-06-11,"['30-minutes-or-less', 'time-to-make', 'course...","[139.5, 9.0, 6.0, 10.0, 12.0, 4.0, 5.0]",8,['rinse beans in colander and place in a large...,a delicious summer salad perfect for picnics a...,"['zucchini', 'roma tomatoes', 'great northern ...",11
227203,whoopie pies the real deal lancaster co r...,54787,38,41706,2003-02-24,"['60-minutes-or-less', 'time-to-make', 'course...","[518.7, 46.0, 169.0, 9.0, 8.0, 32.0, 19.0]",14,"['cream shortening , sugar and eggs', 'add van...",mmmmmmmmmmmmmmmmm! if you've never had a whoop...,"['oil', 'brown sugar', 'eggs', 'flour', 'salt'...",12
228032,wonderful curried sweet potato soup,161324,30,225021,2006-03-24,"['30-minutes-or-less', 'time-to-make', 'course...","[283.8, 13.0, 45.0, 16.0, 16.0, 12.0, 13.0]",16,"['soup:', 'in a soup pot , heat oil over mediu...",this is a wonderful soup. and look at the list...,"['oil', 'onion', 'garlic clove', 'curry paste'...",12


In [35]:
# Ajout du tri par contraintes

# Séparer les valeurs de la colonne 'nutrition' en plusieurs colonnes
nutrition_columns = ['calories','total fat (PDV)','sugar (PDV)','sodium (PDV)','protein (PDV)','saturated fat (PDV)','carbohydrates (PDV)']
nutrition_data = recommendations['nutrition'].str.strip('[]').str.split(',', expand=True)
nutrition_data.columns = nutrition_columns

# Ajouter les nouvelles colonnes au DataFrame original
recommendations = recommendations.drop(columns=['nutrition'])
recommendations = pd.concat([recommendations, nutrition_data], axis=1)

for col in ['calories', 'total fat (PDV)', 'protein (PDV)', 'carbohydrates (PDV)']:
    recommendations[col] = pd.to_numeric(recommendations[col], errors='coerce')
    
# Appliquer les contraintes manuellement pour ce premier exemple
filtered_recommendations = recommendations[
    (recommendations['calories'] >= 300) &
    (recommendations['calories'] <= 700) &
    (recommendations['protein (PDV)'] >= 20) &
    (recommendations['total fat (PDV)'] <= 30) &
    (recommendations['minutes'] <= 40)
    ]

print("Nombre de recommandations filtrées : ", filtered_recommendations.shape[0])
print(f"Recommandations pour l'utilisateur {new_user_id}:\n")
filtered_recommendations.head(nRecipes)

Nombre de recommandations filtrées :  14
Recommandations pour l'utilisateur 25076:

