In [16]:
import pandas as pd
from scipy.sparse import csr_matrix
import numpy as np
from sklearn.neighbors import NearestNeighbors
import requests
from bs4 import BeautifulSoup
from sklearn.neighbors import NearestNeighbors

In [17]:
def create_ratings_df(n_vote_user, n_vote_recipe):
    """
    The create_ratings_df function creates a dataframe of the ratings from the RAW_interactions.csv file, 
    where each row is a user-recipe pair and contains their rating for that recipe. The function also filters out 
    any users who have rated less than n_vote_user recipes and any recipes that have been rated
    
    :param n_vote_user: Filter the users that have voted at least n times
    :param n_vote_recipe: Filter the recipes that have been rated at least n times
    :return: A dataframe with the ratings of each user
    """
    df = pd.read_csv("../src/recipesDB/RAW_interactions.csv")

    grade = df[["user_id","recipe_id","rating"]]

    data_df = pd.DataFrame(grade['user_id'].value_counts())
    final_data = data_df[(data_df["user_id"]>n_vote_user)]

    data_df1 = pd.DataFrame(grade['recipe_id'].value_counts())
    final_data1 = data_df1[(data_df1["recipe_id"]>n_vote_recipe)]

    ratings = df.loc[(df['user_id'].isin(final_data.index)) & (df['recipe_id'].isin(final_data1.index))]
    ratings = ratings.drop(columns = ['date', 'review'], axis=1)
    return ratings

def create_matrix(df):
      
    N = len(df['user_id'].unique())
    M = len(df['recipe_id'].unique())
      
    # Map Ids to indices
    user_mapper = dict(zip(np.unique(df["user_id"]), list(range(N))))
    recipe_mapper = dict(zip(np.unique(df["recipe_id"]), list(range(M))))
      
    # Map indices to IDs
    user_inv_mapper = dict(zip(list(range(N)), np.unique(df["user_id"])))
    recipe_inv_mapper = dict(zip(list(range(M)), np.unique(df["recipe_id"])))
      
    user_index = [user_mapper[i] for i in df['user_id']]
    recipe_index = [recipe_mapper[i] for i in df['recipe_id']]
  
    X = csr_matrix((df["rating"], (recipe_index, user_index)), shape=(M, N))
      
    return X, user_mapper, recipe_mapper, user_inv_mapper, recipe_inv_mapper

def find_similar_recipes(recipe_id, ratings, k, metric='cosine', show_distance=False):
    X, user_mapper, recipe_mapper, user_inv_mapper, recipe_inv_mapper = create_matrix(ratings)  
    neighbour_ids = []
      
    recipe_ind = recipe_mapper[recipe_id]
    recipe_vec = X[recipe_ind]
    k+=1
    kNN = NearestNeighbors(n_neighbors=k, algorithm="brute", metric=metric)
    kNN.fit(X)
    recipe_vec = recipe_vec.reshape(1,-1)
    neighbour = kNN.kneighbors(recipe_vec, return_distance=show_distance)
    for i in range(0,k):
        n = neighbour.item(i)
        neighbour_ids.append(recipe_inv_mapper[n])
    neighbour_ids.pop(0)
    return neighbour_ids


ratings = create_ratings_df(50, 50)
# X, user_mapper, recipe_mapper, user_inv_mapper, recipe_inv_mapper = create_matrix(ratings)
# Exemples d'id recette à tester :
# [486496, 495275, 474987, 495271, 16512, 16859, 105594, 121799, 14111, 33387]
recipe_id = 495275
similar_ids = find_similar_recipes(recipe_id, ratings, k=10)
print(f"Similar recipes to {recipe_id} are: {similar_ids}")

Similar recipes to 495275 are: [494438, 495577, 494435, 486496, 474987, 495271, 121799, 505862, 493413, 486261]


In [18]:
def main():
    ratings = create_ratings_df(50, 50)
    X, user_mapper, recipe_mapper, user_inv_mapper, recipe_inv_mapper = create_matrix(ratings)
    recipe_id = 495275
    similar_ids = find_similar_recipes(recipe_id, ratings, 10)
    print(f"Similar recipes to {recipe_id} are: {similar_ids}")
main()

Similar recipes to 495275 are: [494438, 495577, 494435, 486496, 474987, 495271, 121799, 505862, 493413, 486261]
