In [1]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [2]:
data = pd.read_csv('./Data/interactions_train.csv')

In [3]:
recipe_data = pd.read_csv('./Data/RAW_recipes.csv')
recipe_data = recipe_data.rename(columns={"id": "recipe_id"})
recipe_data = recipe_data.iloc[:100000]

In [4]:
merged_recipe_interaction_data = data.merge(recipe_data, how='inner', on='recipe_id', sort=True)

In [5]:
user_recipe_matrix = merged_recipe_interaction_data[['user_id', 'recipe_id', 'u', 'i', 'date', 'name', 'rating']]

In [6]:
user_recipe_matrix = user_recipe_matrix.pivot_table(index='user_id', columns='recipe_id', values='rating')

In [8]:
recipe_data.reset_index(level = 0, inplace = True, drop=True)
ind = pd.Series(recipe_data.index, index = recipe_data['name'])
vectorizer = TfidfVectorizer(ngram_range=(2, 2), stop_words='english')
tf_data = vectorizer.fit_transform(recipe_data['name'].values.astype('U'))
sim_matrix = cosine_similarity(tf_data)

In [9]:
def recommend_from_id(recipe_id, top_n):
    indx = recipe_data.index[recipe_data['recipe_id'] == recipe_id]
    sim = list(enumerate(sim_matrix[indx][0]))
    sim = sorted(sim, key=lambda x:x[1], reverse=True)
    sim = sim[1:top_n+1]
    item_list = []
    for item in sim:
        r_item = (recipe_data.iloc[item[0]]['recipe_id'], item[0], item[1])
        item_list.append(r_item)
    return item_list

In [112]:
def recommend_for_user(user_id, top_n=10):
    # Create set to store recommendations
    recs = []
    # Get recipe ids of recipes user rated
    user_vector = user_recipe_matrix.iloc[user_id]
    recipe_ids = user_vector[~np.isnan(user_vector)].index.tolist()
    # Generate recommendations for each recipe id and store it in a set
    for recipe_id in recipe_ids:
        rating = user_vector[recipe_id]
        rec_list = recommend_from_id(recipe_id, top_n)
        rec_new_list = [tuple([rating]+list(rec)) for rec in rec_list]
        recs.extend(rec_new_list)
    # sort the set based on the similarity
    sorted_recs = sorted(recs, key=lambda x:x[3], reverse=True)
    # recommend top n similar items
    final_recs = []
    for item in sorted_recs:
        rec_item = (recipe_data.iloc[item[2]]['name'], item[0], item[1], item[2], item[3], item[0]*item[3])
        final_recs.append(rec_item)
    item_rating_map_numer = dict()
    item_rating_map_denom = dict()
    ignore_list = []
    for rec in final_recs:
        if rec[4] != 0:
            if str(rec[2]) in item_rating_map_numer.keys() and str(rec[2]) in item_rating_map_denom.keys():
                item_rating_map_numer[str(rec[2])] += rec[5]
                item_rating_map_denom[str(rec[2])] += rec[4]
            else:
                item_rating_map_numer[str(rec[2])] = rec[5]
                item_rating_map_denom[str(rec[2])] = rec[4]
        else:
            ignore_list.append(rec[2])
    scored_recs = []
    for rec in final_recs:
        if rec[2] not in ignore_list:
            score_rec_item = (item_rating_map_numer[str(rec[2])]/item_rating_map_denom[str(rec[2])], rec[0], rec[1], rec[2], rec[3], rec[4], rec[5])
            scored_recs.append(score_rec_item)
    sorted_scored_recs = sorted(scored_recs, key=lambda x:x[0], reverse=True)
    return sorted_scored_recs[:top_n]

In [115]:
recommend_for_user(2369)

[(5.0,
  'brussels sprouts with black pepper butter   hazelnuts',
  5.0,
  95489,
  30798,
  0.5446774733549232,
  2.7233873667746162),
 (5.0,
  'butter cookies',
  5.0,
  106361,
  31779,
  0.47215445172129916,
  2.3607722586064956),
 (5.0,
  'g  g  s butter cookies',
  5.0,
  338221,
  89493,
  0.47215445172129916,
  2.3607722586064956),
 (5.0,
  'grilled corn with red pepper butter',
  5.0,
  245789,
  98776,
  0.4165077631020702,
  2.0825388155103512),
 (5.0,
  '1 2 3 peanut butter cookies',
  5.0,
  377326,
  779,
  0.36873102171315714,
  1.8436551085657857),
 (5.0,
  'b b s peanut butter cookies',
  5.0,
  315591,
  11783,
  0.36873102171315714,
  1.8436551085657857),
 (5.0,
  'black pepper pasta',
  5.0,
  36165,
  24647,
  0.33357825476262654,
  1.6678912738131326),
 (5.0,
  'black pepper shrimp',
  5.0,
  331637,
  24649,
  0.32566319149385,
  1.6283159574692498),
 (5.0,
  'black pepper biscuits',
  5.0,
  330225,
  24633,
  0.3202026149427287,
  1.6010130747136437),
 (5.0,
  