In [1]:
import pandas as pd
from scipy import stats
import numpy as np
import matplotlib.pyplot as plt
from collections import Counter
from sklearn.utils import shuffle
from datetime import datetime
from sortedcontainers import SortedList
import ast
import json
from sklearn.metrics.pairwise import cosine_similarity
from tqdm import tqdm
import warnings
from tqdm import tqdm
from sklearn.metrics import ndcg_score, dcg_score,mean_absolute_error as mae,average_precision_score as map_k
warnings.filterwarnings('ignore')

In [2]:
df_raw = pd.read_csv("archive/RAW_interactions.csv")

In [3]:
df_recipes = pd.read_csv("archive/RAW_recipes.csv")

df_interaction_train = pd.read_csv("archive/interactions_train.csv")
df_interaction_test= pd.read_csv("archive/interactions_validation.csv")

In [4]:
df_raw[["user_id","recipe_id","rating"]].count()

user_id      1132367
recipe_id    1132367
rating       1132367
dtype: int64

In [5]:
df_raw.drop_duplicates(subset=["user_id","recipe_id","rating"],keep="last")[["user_id","recipe_id","rating"]].count()

user_id      1132367
recipe_id    1132367
rating       1132367
dtype: int64

In [6]:
df_raw=df_raw[["user_id","recipe_id","rating"]]

In [7]:
user_ids_count = Counter(df_raw.user_id)

In [8]:
user_ids_keep = [i for (i,j) in user_ids_count.most_common()[::-1] if j>=5]

In [9]:
df_small = df_raw[(df_raw.user_id.isin(user_ids_keep))].reset_index(drop=True)

In [10]:
df_small.dropna(inplace=True)

In [11]:
df_small.user_id.isna().sum(),df_small.recipe_id.isna().sum()

(0, 0)

In [12]:
df_small.user_id.nunique(),df_small.recipe_id.nunique()

(23086, 211039)

## Creating Recipe to Macro Embedding mapper

In [15]:
# 0-> calories , 4 -> protein , 6-> carbs
indices={0:'calories',4:'protein',6:'carbohydrates'}

In [16]:
recipe_to_nutrition_mapping={}

In [17]:
def update_recipe_nutrition_mapping(row):
    i=int(row.id)
    j=[ast.literal_eval(row.nutrition)[i] for i in indices.keys()]
    
    if i not in recipe_to_nutrition_mapping:
        recipe_to_nutrition_mapping[i]=j
    else:
        pass

In [18]:
df_recipes.apply(lambda x:update_recipe_nutrition_mapping(x),axis=1);

In [20]:
with open('recipe_nutrition_mapping.txt', 'w') as convert_file:
     convert_file.write(json.dumps(recipe_to_nutrition_mapping))

In [22]:
#recipe_to_nutrition_mapping=json.load(open("recipe_nutrition_mapping.txt"))

In [24]:
df_small=df_small[["user_id","recipe_id","rating"]]

In [25]:
df_small.shape

(872021, 3)

In [26]:
df_small=df_small[~df_small["recipe_id"].isna()]

In [27]:
df_small.shape

(872021, 3)

In [30]:
!pwd

/Users/abhinav23run/Desktop/ML Project/RoboChef/notebooks/recommendation


## Converting the user_ids and recipe_ids into contiguous value series

In [52]:
user_id_set=set(df_small.user_id.values)
user2idx={}
i=0
for user in user_id_set:
    user2idx[user]=i
    i+=1
    
recipe_id_set=set(df_small.recipe_id.values)
recipe2idx={}
i=0
for recipe in recipe_id_set:
    recipe2idx[recipe]=i
    i+=1

In [53]:
df_small.loc[:,'user_idx'] = df_small.apply(lambda row: user2idx[row.user_id], axis=1)
df_small.loc[:,'recipe_idx'] = df_small.apply(lambda row: recipe2idx[row.recipe_id], axis=1)

In [54]:
new_user_to_old_user_mapping={}
new_recipe_to_old_recipe_mapping={}

In [55]:
def update_new_id_to_old_id_mapping(row):
    new_user=int(row.user_idx)
    new_recipe=int(row.recipe_idx)
    old_user=int(row.user_id)
    old_recipe=int(row.recipe_id)
    
    if new_user not in new_user_to_old_user_mapping:
        new_user_to_old_user_mapping[new_user]=old_user
    else:
        pass
    
    if new_recipe not in new_recipe_to_old_recipe_mapping:
        new_recipe_to_old_recipe_mapping[new_recipe]=old_recipe
    else:
        pass

In [57]:
df_small.apply(lambda x:update_new_id_to_old_id_mapping(x),axis=1);

In [113]:
old_recipe_to_new_recipe_mapping={v:k for k,v in new_recipe_to_old_recipe_mapping.items()}

In [28]:
#***********Train Test Split**********

In [33]:
ad_train=pd.read_csv("ad_interaction_train.csv")

In [34]:
ad_test=pd.read_csv("ad_interaction_test.csv")

In [35]:
ad_train.shape

(887483, 5)

In [36]:
ad_test.shape

(244884, 5)

In [69]:
ad_total=pd.concat([ad_train,ad_test])

In [70]:
ad_total.shape

(1132367, 5)

In [37]:
#Method2 (Stratified Sampling)

In [66]:
data_train_v1=df_small.merge(ad_train,on=["user_id","recipe_id","rating"],how="inner")[["user_idx","recipe_idx","rating"]]

In [67]:
data_test_v1=df_small.merge(ad_test,on=["user_id","recipe_id","rating"],how="inner")[["user_idx","recipe_idx","rating"]]

In [71]:
df_small=df_small.merge(ad_total,on=["user_id","recipe_id","rating"],how="inner")[["user_idx","recipe_idx","rating"]]

In [75]:
df_small.shape,data_train_v1.shape,data_test_v1.shape

((872021, 3), (697918, 3), (174103, 3))

In [76]:
df_small.head()

Unnamed: 0,user_idx,recipe_idx,rating
0,6608,21550,4
1,20438,21550,5
2,1428,23511,4
3,22432,46089,5
4,10227,46089,5


## Creating User - Macros Average Embedding

In [83]:
def get_average_embedding(df):
    embedding_lol=list(df["recipe_idx"].map(new_recipe_to_old_recipe_mapping).map(recipe_to_nutrition_mapping).values)
    return [float(sum(col))/len(col) for col in zip(*embedding_lol)]

In [87]:
user_to_embedding_mapping={}

In [86]:
len(list(data_test_v1.user_idx.unique()))

20049

In [88]:
for i,j in enumerate(list(df_small.user_idx.unique())):
    if i%1000==0:
        print(f"{i} users done")
    df_subset=df_small.loc[df_small["user_idx"]==j]
    user_to_embedding_mapping[j]=get_average_embedding(df_subset) 

0 users done
1000 users done
2000 users done
3000 users done
4000 users done
5000 users done
6000 users done
7000 users done
8000 users done
9000 users done
10000 users done
11000 users done
12000 users done
13000 users done
14000 users done
15000 users done
16000 users done
17000 users done
18000 users done
19000 users done
20000 users done
21000 users done
22000 users done
23000 users done


In [90]:
user_to_embedding_mapping

{6608: [254.70857142857142, 29.714285714285715, 7.457142857142857],
 20438: [343.42708333333326, 30.666666666666668, 13.583333333333334],
 1428: [431.51290322580644, 44.54838709677419, 11.161290322580646],
 22432: [491.4285641025628, 41.895384615384614, 15.012820512820513],
 10227: [478.7178733031677, 29.782805429864254, 17.282805429864254],
 9183: [546.5670833333337, 36.6375, 17.677083333333332],
 22094: [515.6799999999995, 31.251351351351353, 19.883783783783784],
 13878: [484.0416167664675, 45.24850299401198, 12.802395209580839],
 10652: [331.0166666666667, 37.833333333333336, 10.0],
 13728: [424.5250000000001, 39.2, 13.3],
 22071: [530.99609929078, 46.10992907801418, 16.826241134751772],
 21929: [221.86249999999998, 21.5, 9.0],
 809: [465.7547770700634, 38.961783439490446, 13.579617834394904],
 12554: [426.34141414141436, 38.592592592592595, 11.7665544332211],
 15496: [520.8823529411765, 52.0, 18.176470588235293],
 13007: [466.1863636363636, 38.40909090909091, 12.5],
 16404: [469.85

In [91]:
# a dictionary to tell us which users have rated which recipes
user2recipe = {}
# a dicationary to tell us which recipes have been rated by which users
recipe2user = {}
# a dictionary to look up ratings
userrecipe2rating = {}
print("Calling: update_user2movie_and_movie2user")
count = 0

Calling: update_user2movie_and_movie2user


In [94]:
def update_user2recipe_and_recipe2user(row):
    
#     global count
#     count += 1
#     if count % 100000 == 0:
#         print("processed: %.3f" % (float(count)/cutoff))

    i = int(row.user_idx)
    j = int(row.recipe_idx)
    if i not in user2recipe:
        user2recipe[i] = [j]
    else:
        user2recipe[i].append(j)

    if j not in recipe2user:
        recipe2user[j] = [i]
    else:
        recipe2user[j].append(i)

    userrecipe2rating[(i,j)] = row.rating

In [95]:
data_train_v1.apply(update_user2recipe_and_recipe2user, axis=1);

In [96]:
userrecipe2rating_test = {}
user2recipe_test={}
print("Calling: update_userrecipe2rating_test")
count = 0

Calling: update_userrecipe2rating_test


In [97]:
def update_userrecipe2rating_test(row):
    
    global count
    count += 1
    if count % 100000 == 0:
        print("processed: %.3f" % (float(count)/len(data_test_v1)))

    i = int(row.user_idx)
    j = int(row.recipe_idx)
    if i not in user2recipe_test:
        user2recipe_test[i] = [j]
    else:
        user2recipe_test[i].append(j)
    userrecipe2rating_test[(i,j)] = row.rating

In [98]:
data_test_v1.apply(update_userrecipe2rating_test, axis=1);

processed: 0.574


In [99]:
n1 = np.max(list(user2recipe.keys())) + 1
n2 = np.max([u for (u, m), r in userrecipe2rating_test.items()])
# the test set may contain recipes the train set doesn't have data on
m1 = np.max(list(recipe2user.keys()))
m2 = np.max([m for (u, m), r in userrecipe2rating_test.items()])
M = max(m1, m2) + 1
N = max(n1, n2) + 1
print("N:", N, "M:", M)

N: 23087 M: 211039


## User - User Collaborative Filtering

In [100]:
K = 25 # number of neighbors we'd like to consider
limit = 5 # number of common recipes users must have in common in order to consider
neighbors = {} # store neighbors in this list
averages = {} # each user's average rating for later use
deviations = {} # each user's deviation for later use
SIGMA_CONST = 1e-6
recommended_recipe_list = []

In [101]:
"""
Code to Generate most closest neighbors along with their correlation weights 
These weights will be used to make rating predictions 
"""
for j1,i in enumerate(list(set(data_train_v1.user_idx.values))):
    
    recipes_i = user2recipe[i]
    recipes_i_set = set(recipes_i)

    # calculate avg and deviation
    ratings_i = { recipe:userrecipe2rating[(i, recipe)] for recipe in recipes_i }
    avg_i = np.mean(list(ratings_i.values()))
    dev_i = { recipe:(rating - avg_i) for recipe, rating in ratings_i.items() }
    dev_i_values = np.array(list(dev_i.values()))
    sigma_i = np.sqrt(dev_i_values.dot(dev_i_values))

    # save these for later use
    averages[i]=avg_i
    deviations[i]=dev_i
    
    sl = SortedList()
    
    for i1,j in enumerate(list(set(data_train_v1.user_idx.values))):
        if j!=i:
            recipes_j = user2recipe[j]
            recipes_j_set = set(recipes_j)
            common_recipes = (recipes_i_set & recipes_j_set)
            if(len(common_recipes)>limit):
                
                # calculate avg and deviation
                ratings_j = { recipe:userrecipe2rating[(j, recipe)] for recipe in recipes_j }
                avg_j = np.mean(list(ratings_j.values()))
                dev_j = { recipe:(rating - avg_j) for recipe, rating in ratings_j.items() }
                dev_j_values = np.array(list(dev_j.values()))
                sigma_j = np.sqrt(dev_j_values.dot(dev_j_values))
                
                # calculate correlation coefficient
                numerator = sum(dev_i[m]*dev_j[m] for m in common_recipes)
                denominator = ((sigma_i+SIGMA_CONST) * (sigma_j+SIGMA_CONST))
                w_ij = numerator / (denominator)
                # insert into sorted list and truncate
                # negate absolute weight, because list is sorted ascending and we get all neighbors with the highest correlation
                # maximum value (1) is "closest"
                sl.add((-(w_ij), j))
                # Putting an upper cap on the number of neighbors
                if len(sl)>K:
                    del sl[-1]
    if i%100==0:                
        print((i,j1,sl))
    neighbors[i]=sl

(0, 0, SortedList([]))
(100, 100, SortedList([(-0.04505732044598958, 8942), (-0.011694921129094368, 4522), (-0.0034480150302647786, 17371), (-0.0032906848548559255, 15727), (-0.0020331542037883682, 18701), (-0.0018882746884966927, 9481), (-0.001792298836464829, 7416), (-0.0012425974079504662, 7938), (-0.0012090793144346843, 7362), (-6.615916591492522e-05, 22432), (0.0007824029251573247, 1375), (0.0008470282122689687, 19353), (0.0020957346867665817, 13543), (0.0024777940886602174, 1446), (0.005622633977942064, 5527), (0.007178279699492343, 3779)]))
(200, 200, SortedList([]))
(300, 300, SortedList([]))
(400, 400, SortedList([]))
(500, 500, SortedList([]))
(600, 600, SortedList([(-0.008770303189417055, 22304), (-0.0062979297220098745, 22432), (-0.005926475769414905, 10560), (-0.005197607167886721, 19353), (-0.005169805757426817, 17233), (-0.004881654198595771, 15727), (-0.004839412884795932, 10506), (-0.0047482353561614276, 18771), (-0.004668644546816483, 198), (-0.004500693003272304, 144

(10500, 10499, SortedList([]))
(10600, 10599, SortedList([]))
(10700, 10699, SortedList([]))
(10800, 10799, SortedList([]))
(10900, 10899, SortedList([]))
(11000, 10999, SortedList([(0.00759250983114669, 1446)]))
(11100, 11099, SortedList([]))
(11200, 11199, SortedList([]))
(11300, 11299, SortedList([]))
(11400, 11399, SortedList([]))
(11500, 11499, SortedList([]))
(11600, 11599, SortedList([(-0.07946575259265352, 1309), (-0.06439429857633291, 9535), (-0.0523853296769466, 13052), (-0.044895712913293445, 21439), (-0.03339836438406285, 19533), (-0.025731604879314297, 1155), (-0.025375474758516123, 14218), (-0.023921783638844608, 20912), (-0.023026311917222434, 17974), (-0.018733450180002057, 17243), (-0.012515346808341028, 10394), (-0.012172509683179204, 15518), (-0.010765866272722862, 6922), (-0.005701033273571229, 22873), (-0.003436078125228475, 1751), (-0.0033240776682926325, 5702), (-0.0028540726451977676, 8017), (-0.002233988847018599, 3701), (-0.002038111054314191, 14766), (-0.0019

(22500, 22499, SortedList([]))
(22600, 22599, SortedList([]))
(22700, 22699, SortedList([]))
(22800, 22799, SortedList([]))
(22900, 22899, SortedList([(-0.05660011640628598, 16397), (-0.030156955296037045, 1751), (-0.027825485386864968, 15727), (-0.009154811322444051, 1446), (-0.00812119426985093, 19420), (-0.007958765075982954, 2222), (-0.006154406603439868, 21238), (-0.004844535222791389, 17272), (-0.0044899205843979785, 3338), (-0.002945758829171824, 1559), (-0.002689359706241472, 18771), (-0.0023420402127803846, 6389), (-0.002341848806596457, 23011), (-0.0022926713694388728, 10070), (-0.0021958008278016423, 19045), (-0.0020304804242136675, 16604), (-0.001946387525444585, 17233), (-0.00187855634839748, 9481), (-0.0018514573016041285, 198), (-0.0017417970116407131, 1375), (-0.001741358802747042, 3673), (-0.0014744769441612604, 114), (-0.0013922841524187156, 17718), (-0.0012296147419968576, 15832), (-0.0012035727446254813, 2481)]))
(23000, 22999, SortedList([]))


## Predict Fucntion 

In [103]:
def predict(i, m):
    """
    Function to generate predicted ratings for a given user i and recipe m
    """
    # calculate the weighted sum of deviations
    numerator = 0
    denominator = 0
    neighbors_get=neighbors.get(i,0)
    
    if neighbors_get!=0:
        for neg_w, j in neighbors.get(i):
            # remember, the weight is stored as its negative
            # so the negative of the negative weight is the positive weight
            try:
                numerator += -neg_w * deviations[j][m]
                denominator += abs(neg_w)
            except KeyError:
              # neighbor may not have rated the same rating
              # don't want to do dictionary lookup twice
              # so just throw exception
              pass

        if denominator == 0:
            prediction = averages.get(i)
        else:
            prediction = numerator / denominator + averages.get(i)
    else:
        prediction = averages.get(i,-1)
    if prediction==-1:
        #print("New User Identified")
        pass
    else:
        prediction = min(5, prediction)
        prediction = max(0.5, prediction) # min rating is 0.5
    return prediction

In [104]:
train_predictions = {}
train_targets = {}
for (i, m), target in userrecipe2rating.items():
    
    
    # calculate the prediction for this recipe
    prediction = predict(i, m)

    # save the prediction and target
#     train_predictions.append(prediction)
#     train_targets.append(target)
    train_predictions[(i,m)]=prediction
    train_targets[(i,m)]=target                                     

In [105]:
test_predictions = {}
test_targets = {}
# same thing for test set
for (i, m), target in userrecipe2rating_test.items():
    
    # calculate the prediction for this recipe
    prediction_test = predict(i, m)

    # save the prediction and target
    test_predictions[(i,m)]=prediction_test
    test_targets[(i,m)]=target

## Prediction Evaluation metrics

### RMSE and MSE

In [106]:
# calculate accuracy
def mse(p, t):
    
    p = np.array(p)
    t = np.array(t)
    return np.mean((p - t)**2)

def rmse(p, t):
    
    p = np.array(p)
    t = np.array(t)
    return np.sqrt(np.mean((p - t)**2))

train_mse=mse(list(train_predictions.values()), list(train_targets.values()))
test_mse=mse(list(test_predictions.values()), list(test_targets.values()))

train_rmse=rmse(list(train_predictions.values()), list(train_targets.values()))
test_rmse=rmse(list(test_predictions.values()), list(test_targets.values()))
print('train mse:', mse(list(train_predictions.values()), list(train_targets.values())))
print('test mse:', mse(list(test_predictions.values()), list(test_targets.values())))

print('train rmse:', rmse(list(train_predictions.values()), list(train_targets.values())))
print('test rmse:', rmse(list(test_predictions.values()), list(test_targets.values())))

train mse: 0.801959975340608
test mse: 0.943338575075392
train rmse: 0.8955221802616661
test rmse: 0.9712561840603086


### MAE

In [107]:
train_mae=mae(list(train_predictions.values()), list(train_targets.values()))
test_mae=mae(list(test_predictions.values()), list(test_targets.values()))

print('train mae:', mae(list(train_predictions.values()), list(train_targets.values())))
print('test mae:', mae(list(test_predictions.values()), list(test_targets.values())))

train mae: 0.522643529733127
test mae: 0.5670269049700766


## Ranking Based Evaluation Metrics

**We do not have rankings as of now in our truth data set , so we leverage cosine similarity between recipes's embedding and user's average embedding to break up recipes having the same rating into probable rankings . This would enable use to calculate various Ranking based evaluation metrics such as NDCG and MAP@k**

**Our embedding vector has 3 features -> Calories , Protein and Carbs.**

In [115]:
data_test_v1_pred=data_test_v1
data_test_v1_pred.loc[:,"rating_pred"]=pd.Series([round(test_predictions[i],3) for i in userrecipe2rating_test.keys()]) 

In [116]:
def get_cosine_similarity(user_id):
    user_vector=[user_to_embedding_mapping[user_id]]
    recipes_vector={j:recipe_to_nutrition_mapping[j] for j in [new_recipe_to_old_recipe_mapping[i] for i in user2recipe_test[user_id]]}
    l=list(cosine_similarity(np.array(user_vector),np.array(list(recipes_vector.values())))[0])
    recipes_vector.update(zip(recipes_vector, l))
    recipe_to_cosine_similarity={old_recipe_to_new_recipe_mapping[k]:v for k,v in recipes_vector.items()}
    df_subset=data_test_v1_pred[data_test_v1_pred["user_idx"]==user_id]
    df_subset["cosine_similarity"]=df_subset["recipe_idx"].map(recipe_to_cosine_similarity)
    return df_subset

In [117]:
df_test_cosine=pd.DataFrame(columns=['user_idx',
 'recipe_idx',
 'rating',
 'rating_pred',
 'cosine_similarity'])

In [118]:
for i,user in enumerate(list(set(data_test_v1_pred.user_idx))):
    if i%1000==0:
        print(f"{i} users done")
    df_test_cosine=pd.concat([df_test_cosine,get_cosine_similarity(user)],ignore_index=True)

0 users done
1000 users done
2000 users done
3000 users done
4000 users done
5000 users done
6000 users done
7000 users done
8000 users done
9000 users done
10000 users done
11000 users done
12000 users done
13000 users done
14000 users done
15000 users done
16000 users done
17000 users done
18000 users done
19000 users done
20000 users done


## Creating Ranks based on Ratings and Cosine Similarity

In [119]:
df_test_cosine["Original Rank"]=df_test_cosine.sort_values(["rating","cosine_similarity"],ascending=[False,False]).groupby(['user_idx']).cumcount() + 1
df_test_cosine["Prediction Rank"]=df_test_cosine.sort_values(["rating_pred","cosine_similarity"],ascending=[False,False]).groupby(['user_idx']).cumcount() + 1

In [120]:
y_actual=[]
y_pred=[]

In [121]:
for i,user in enumerate(list(set(df_test_cosine.user_idx))):
    if i%1000==0:
        print(f"{i} users done")
    df_subset=df_test_cosine[df_test_cosine["user_idx"]==user]
    y_actual.append(list(df_subset["Original Rank"].values))
    y_pred.append(list(df_subset["Prediction Rank"].values))

0 users done
1000 users done
2000 users done
3000 users done
4000 users done
5000 users done
6000 users done
7000 users done
8000 users done
9000 users done
10000 users done
11000 users done
12000 users done
13000 users done
14000 users done
15000 users done
16000 users done
17000 users done
18000 users done
19000 users done
20000 users done


## Computing Mean NDCG score

In [122]:
ndcg_list=[]
for i in range(len(y_actual)):
    try:
        ndcg_list.append(ndcg_score(np.asarray([y_actual[i]]),np.asarray([y_pred[i]])))
    except:
        ndcg_list.append(np.nan)

In [123]:
print(f"Mean NDCG score is {np.nanmean(ndcg_list)}")

Mean NDCG score is 0.9398573406018421


## Computing MAP@k

In [124]:
mapk_list=[]
for i in range(len(y_actual)):
    try:
        mapk_list.append(map_k(np.asarray([y_actual[i]]),np.asarray([y_pred[i]])))
    except:
        mapk_list.append(np.nan)

In [125]:
print(f"Average Precision score is {np.nanmean(mapk_list)}")

Average Precision score is 1.0


In [126]:
df_evaluation=pd.DataFrame(columns=['min_common_recipes','test_mse','test_rmse','test_mae','test_ndcg','test_map'])

In [127]:
df_evaluation.loc[len(df_evaluation.index)] = [5, test_mse, test_rmse,test_mae, np.nanmean(ndcg_list), np.nanmean(mapk_list)]

In [128]:
df_evaluation

Unnamed: 0,min_common_recipes,test_mse,test_rmse,test_mae,test_ndcg,test_map
0,5.0,0.943339,0.971256,0.567027,0.939857,1.0


## Function to Generate Recommendations for a given user

In [None]:
def get_recipes_recommendations(i,k):
    """
    Input : i ---> user_id for which we need recommendations
            k ---> The number of recipe recommendations wanted
    Output : 
            most_related_neighbors ---> a list of most related user_id's based on pearson correlation coefficient
            recommended_recipes ---> list of recommended recipes based on similar user's likings
            common_recipes_dict ---> Dictionary of neighbors and common_recipes 
    """
        common_recipes_dict = {}
        recipes_i = user2recipe[i]
        recipes_i_set = set(recipes_i)

        # calculate avg and deviation
        ratings_i = {recipe: userrecipe2rating[(i, recipe)] for recipe in recipes_i}
        avg_i = np.mean(list(ratings_i.values()))
        dev_i = {recipe: (rating - avg_i) for recipe, rating in ratings_i.items()}
        dev_i_values = np.array(list(dev_i.values()))
        sigma_i = np.sqrt(dev_i_values.dot(dev_i_values))

        # save these for later use
        averages[i] = avg_i
        deviations[i] = dev_i

        sl = SortedList()
        for j in list(set(data_train_v1.user_idx.values)):
            if j != i:
                recipes_j = user2recipe[j]
                recipes_j_set = set(recipes_j)
                common_recipes = (recipes_i_set & recipes_j_set)

                if (len(common_recipes) > limit):
                    common_recipes_dict[j] = list(common_recipes)
                    ratings_j = {recipe: userrecipe2rating[(
                        j, recipe)] for recipe in recipes_j}
                    avg_j = np.mean(list(ratings_j.values()))
                    dev_j = {recipe: (rating - avg_j)
                             for recipe, rating in ratings_j.items()}
                    dev_j_values = np.array(list(dev_j.values()))
                    sigma_j = np.sqrt(dev_j_values.dot(dev_j_values))

                    # calculate correlation coefficient
                    numerator = sum(dev_i[m]*dev_j[m] for m in common_recipes)
                    denominator = ((sigma_i+SIGMA_CONST)
                                   * (sigma_j+SIGMA_CONST))
                    w_ij = numerator / (denominator)
                    # insert into sorted list and truncate
                    # negate absolute weight, because list is sorted ascending and we get all neighbors with the highest correlation
                    # maximum value (1) is "closest"
                    sl.add((-(w_ij), j))
                    # Putting an upper cap on the number of neighbors
                    if len(sl) > K:
                        del sl[-1]

        neighbors[i] = sl
        try:
            most_related_neighbors = [j for i, j in neighbors[i][:10]]
        except:
            most_related_neighbors = [j for i, j in neighbors[i]]
        for i in most_related_neighbors:
            recipes_i = user2recipe[i]
            recipes_i_set = set(recipes_i)
            ratings_i = {recipe: userrecipe2rating[(i, recipe)] for recipe in recipes_i}
            recommended_recipe_list.append(ratings_i)

        total = Counter()
        for j in recommended_recipe_list:
            total += Counter(j)
        recommended_recipe_ids = [i for i, j in total.most_common(k)]
        recommended_recipes = []

        for recipe_id in recommended_recipe_ids:
            recommended_recipes.append(
                [(i, j) for i, j in recipe2idx.items() if j == recipe_id][0][0])
        recommended_recipes = list(set(recommended_recipes)-set(user2recipe[i]))

        return most_related_neighbors, recommended_recipes, common_recipes_dict

In [None]:
## Testing for Sample user 
most_related_neighbors, recommended_recipes, common_recipes_dict=get_recipes_recommendations(6608,10)