In [None]:
import pandas as pd
import numpy as np
from collections import defaultdict

In [None]:
from surprise import SVD
from surprise import Dataset
from surprise import accuracy
from surprise import Reader
from surprise.model_selection import KFold
from surprise.model_selection import cross_validate
from surprise.model_selection import GridSearchCV
from sklearn.model_selection import StratifiedKFold

In [None]:
item_profiles2 = pd.read_csv('../input/item-profiles2.csv', sep=';')
item_profiles3 = pd.read_csv('../input/item-profiles3.csv', sep=';')
ratings = pd.read_csv('../input/user-item-rating.csv', sep='\t', names=['user_id','item_id','rating'])

In [None]:
# We use this healthiness method to calculate the healthiness of recipes, and to set weights for the post filtering.
def healthiness(itemsDataframe):
    
    # Calculate the energy percentage of each relevant macronutrient. 1g of fat contains 9 kCal. 
    fat = ((itemsDataframe['Fat (g)'] * 9) / itemsDataframe['Calories (kCal)']) * 100
    sugar = ((itemsDataframe['Sugar (g)'] * 4) / itemsDataframe['Calories (kCal)']) * 100
    saturatedFat = ((itemsDataframe['Saturated Fat (g)'] * 9) / itemsDataframe['Calories (kCal)']) * 100
    
    # This calculates a continous version of the healthiness score. 
    # Fat/3 because the recommended limit for fat is 3X the others. This "unhealthiness" metric was sadly not used.
    itemsDataframe['Unhealtiness'] = (fat / 3) + sugar + saturatedFat
    
    # These are bounderies and points are tunable to influence 
    # post filter weights in accordence with the health recommendations.
    itemsDataframe.loc[fat > 30, 'fatPoints'] = 0
    itemsDataframe.loc[fat >= 40, 'fatPoints'] = -1
    itemsDataframe.loc[fat >= 50, 'fatPoints'] = -2
    itemsDataframe.loc[fat >= 60, 'fatPoints'] = -3
    itemsDataframe.loc[fat >= 70, 'fatPoints'] = -4
    itemsDataframe.loc[fat >= 80, 'fatPoints'] = -5
    itemsDataframe.loc[fat <= 30, 'fatPoints'] = 1 
    itemsDataframe.loc[fat <= 20, 'fatPoints'] = 2
    itemsDataframe.loc[fat <= 10, 'fatPoints'] = 3
    itemsDataframe.loc[fat <= 5, 'fatPoints'] = 4 
    itemsDataframe.loc[fat <= 1, 'fatPoints'] = 5

    itemsDataframe.loc[sugar > 10, 'sugarPoints'] = 0
    itemsDataframe.loc[sugar >= 13, 'sugarPoints'] = -1
    itemsDataframe.loc[sugar >= 16, 'sugarPoints'] = -2
    itemsDataframe.loc[sugar >= 19, 'sugarPoints'] = -3
    itemsDataframe.loc[sugar >= 22, 'sugarPoints'] = -4
    itemsDataframe.loc[sugar >= 25, 'sugarPoints'] = -5
    itemsDataframe.loc[sugar <= 10, 'sugarPoints'] = 1 
    itemsDataframe.loc[sugar <= 7, 'sugarPoints'] = 2
    itemsDataframe.loc[sugar <= 5, 'sugarPoints'] = 3 
    itemsDataframe.loc[sugar <= 3, 'sugarPoints'] = 4
    itemsDataframe.loc[sugar <= 1, 'sugarPoints'] = 5 
        
    itemsDataframe.loc[saturatedFat > 10, 'satFatPoints'] = 0
    itemsDataframe.loc[saturatedFat >= 13, 'satFatPoints'] = -1
    itemsDataframe.loc[saturatedFat >= 16, 'satFatPoints'] = -2
    itemsDataframe.loc[saturatedFat >= 19, 'satFatPoints'] = -3
    itemsDataframe.loc[saturatedFat >= 22, 'satFatPoints'] = -4
    itemsDataframe.loc[saturatedFat >= 25, 'satFatPoints'] = -5
    itemsDataframe.loc[saturatedFat <= 10, 'satFatPoints'] = 1 
    itemsDataframe.loc[saturatedFat <= 7, 'satFatPoints'] = 2
    itemsDataframe.loc[saturatedFat <= 5, 'satFatPoints'] = 3 
    itemsDataframe.loc[saturatedFat <= 3, 'satFatPoints'] = 4
    itemsDataframe.loc[saturatedFat <= 1, 'satFatPoints'] = 5 
        
       
    itemsDataframe['Healthiness'] = itemsDataframe['fatPoints'] + itemsDataframe['satFatPoints'] + itemsDataframe['sugarPoints']

In [None]:
# Apply healthiness evaluation to each recipe in item_profiles2
healthiness(item_profiles2)

# Put healthiness as value in a dictionary with recipe as key.
healthiness_profiles = item_profiles2[['Recipe ID', 'Healthiness']]
healthiness_dict = defaultdict()
for _,Recipe_ID, Healthiness in healthiness_profiles.itertuples():
    healthiness_dict[Recipe_ID] = Healthiness

In [None]:
# This is the post filter method. Suprise predictions are given in a list of immutable tuples.
# To post filter the predictions I make a dataframe of the predictions list, change the predictions,
# and make a new list of tuples.
def postfilter(predictions, healthinessFactor):
    ratingsProcessed = pd.DataFrame.from_records(predictions, columns=['uid', 'iid', 'r_ui', 'est', 'details'])
    ratingsProcessed = ratingsProcessed.join(item_profiles2['Healthiness'], how= 'inner', on= 'iid', sort=False)
    # This is were the values are transformed based on the healthinessFactor parameter and the health points of the recipes.
    ratingsProcessed['est'] = ratingsProcessed['est'] + (ratingsProcessed['Healthiness']*healthinessFactor)
    ratingsProcessed.drop(labels='Healthiness', axis=1, inplace=True)
    ratingsProcessed = list(ratingsProcessed.itertuples(name='Prediction', index=False))
    return ratingsProcessed

In [None]:
from collections import defaultdict
from sklearn.metrics import ndcg_score

# We used Suprise for much of this code https://surprise.readthedocs.io/en/stable/FAQ.html 
def evaluations_at_k(predictions):

    # K is the number of highest ranking predictions to consider
    k = 10
    # This is the threshold for what is considered an adequate recommendation.
    threshold = 4
    
    # We map the predictions to the users. uid=user identity, iid=item identity, true_r[ating], est[imated rating]. 
    top_n = defaultdict(list)
    for uid, iid, true_r, est, _ in predictions:
        top_n[uid].append((iid, est))

    # Sort the predictions for each user and retrieve the k highest ones and put them in a dictionary.
    for uid, user_ratings in top_n.items():
        user_ratings.sort(key=lambda x: x[1], reverse=True)
        top_n[uid] = user_ratings[:k]
        
    # Remove the dictionary entries that didn't have enough ratings because of the data partitioning. 
    top_k = top_n.copy()
    for uid, user_ratings in top_k.items():
        if (len(top_k[uid]) != k):
            top_n.pop(uid)
            
    # Calculate the healthiness average of all recommendations. 
    # 0 = the tipping point between inside and outside of HDR recommendations. Negative value means unhealthy.
    healthinessAverage = 0
    count = 0
    for x in top_n.items():
        count =  count + 1
        healthiness = 0
        for y in x[1]:
            #healthiness = healthiness + healthiness_dict[y[0]] # This should work
            healthiness = healthiness + item_profiles2.at[y[0], 'Healthiness'] # This works, but shouldn't work
        healthinessAverage = healthinessAverage + (healthiness/k)
    
    healthinessAverage = healthinessAverage/count
    
    # This time we put put the estimated rating and the true rating in the dictionary.
    user_est_true = defaultdict(list)
    for uid, _, true_r, est, _ in predictions:
        user_est_true[uid].append((est, true_r))

    precisions = dict()
    recalls = dict()
    for uid, user_ratings in user_est_true.items():
        
        # Sort user ratings by estimated value
        user_ratings.sort(key=lambda x: x[0], reverse=True)

        # Number of relevant items
        n_rel = sum((true_r >= threshold) for (_, true_r) in user_ratings)

        # Number of recommended items in top k
        n_rec_k = sum((est >= threshold) for (est, _) in user_ratings[:k])

        # Number of relevant and recommended items in top k
        n_rel_and_rec_k = sum(((true_r >= threshold) and (est >= threshold))
                              for (est, true_r) in user_ratings[:k])

        # Precision@K: Proportion of recommended items that are relevant
        # When n_rec_k is 0, Precision is undefined. We here set it to 0.

        precisions[uid] = n_rel_and_rec_k / n_rec_k if n_rec_k != 0 else 0

        # Recall@K: Proportion of relevant items that are recommended
        # When n_rel is 0, Recall is undefined. We here set it to 0.

        recalls[uid] = n_rel_and_rec_k / n_rel if n_rel != 0 else 0
        
    # Calculate the averages of precision and recall
    precision = (sum(prec for prec in precisions.values())) / len(precisions)
    recall = (sum(rec for rec in recalls.values())) / len(recalls)
    
    #MAE 
    from sklearn.metrics import mean_absolute_error
    y_true = [[x[2] for x in predictions]]
    y_pred = [[x[3] for x in predictions]]
    mae = mean_absolute_error(y_true, y_pred)

    #NDCG
    ndcg = ndcg_score(y_true, y_pred, k)

    return precision, recall, ndcg, healthinessAverage, mae
    

In [None]:
def input_svd_estimates(train_ratings):   
    # We extract the user_id and recipe_id series and cross join them such that each unique pairing gets a spot.
    users = train_ratings['user_id'].drop_duplicates().to_frame()
    recipes = train_ratings['recipe_id'].drop_duplicates().to_frame()
    users['key'] = 1
    recipes['key'] = 1
    users_recipes = users.merge(recipes, how='outer', on=['key']).drop("key", 1)
    
    # Fit the original train ratings.
    reader = Reader()
    data = Dataset.load_from_df(train_ratings, reader)
    trainset = data.build_full_trainset()
    svd.fit(trainset)

    # Put the true ratings back in train ratings.
    train_ratings = pd.merge(train_ratings, users_recipes, how='outer', on=['user_id', 'recipe_id'])

    # Estimate the ratings where there are no true rating.
    foo = train_ratings[train_ratings['rating'].isna()].apply(lambda x: svd.predict(x.user_id, x.recipe_id, r_ui=None)[3], axis=1)
    train_ratings.loc[train_ratings['rating'].isna(),'rating'] = foo
    
    return train_ratings

In [None]:
# Defining our content based algorithm based on Feyneand Bekocsky 
def content_based_predict(train, test):
    
    train_ratings = ratings.rename(columns={'item_id': 'recipe_id'}).iloc[train]
    
    # If this line is not commented out, a call to content_based_predict will be a hybrid approach.
    #
    #train_ratings = input_svd_estimates(train_ratings)
    #

    # Arange the data
    recipe_id_ingredient_id = item_profiles3[['Recipe ID', 'Ingredient ID']]
    ingredientRatings = train_ratings.merge(recipe_id_ingredient_id, left_on='recipe_id', right_on='Recipe ID').drop('Recipe ID', 1)
    ingredientRatings = ingredientRatings[['user_id', 'recipe_id', 'Ingredient ID', 'rating']]
    ingredientRatings = ingredientRatings.rename(columns={'Ingredient ID': 'ingredient_id', 'item_id': 'recipe_id', 'rating': 'rat'})

    # Fit the data
    recipes_dict = defaultdict(list) 
    ingredients_dict = defaultdict(lambda: defaultdict(list))  
    for user_id, recipe_id, ingredient_id, rating in ingredientRatings.itertuples(name='rating', index=False):
        recipes_dict[recipe_id].append(ingredient_id) 
        ingredients_dict[user_id][ingredient_id].append(rating)
    for user_id in ingredients_dict.copy().keys():
        for ingredient_id in ingredients_dict[user_id].copy().keys():
            # Replace the lists in the leaf nodes of this nested dict with the average of the ingredient scores for the ingredient.
            ingredients_dict[user_id][ingredient_id]  = sum(ingredients_dict[user_id][ingredient_id])/len(ingredients_dict[user_id][ingredient_id])
    

    # Load the test set.
    test_ratings = ratings.rename(columns={'item_id': 'recipe_id'}).iloc[test]
    
    # predict the data
    content_based_predictions = defaultdict(lambda: defaultdict())                        
    for user_id, recipe_id, _ in test_ratings.itertuples(name='predictions', index=False):
        rating_est = 0.0
        counter = 0
        for ingredient_id in recipes_dict[recipe_id]:
            if(type(ingredients_dict[user_id][ingredient_id]) == float):
                rating_est += ingredients_dict[user_id][ingredient_id]
                counter += 1
        if(counter != 0):
            content_based_predictions[user_id][recipe_id] = rating_est/counter
        else:
            content_based_predictions[user_id][recipe_id] = np.nan
    
    # Use the above nested loop as a lambda function to prepare the predictions for evaluation.
    test_ratings['rating_est'] = test_ratings.apply(lambda x: content_based_predictions[x.user_id][x.recipe_id], axis=1)
    test_ratings['details'] = "{'is it awesome': YES}"
    test_ratings.columns = ['uid', 'iid', 'r_ui', 'est', 'details']
    test_ratings = test_ratings.dropna()
    predictions = list(test_ratings.itertuples(name='Prediction', index=False))
    
    return predictions

In [None]:
# Create random baseline.
import random
def get_baseline(predictions):
    predDF = pd.DataFrame.from_records(predictions, columns=['uid', 'iid', 'r_ui', 'est', 'details'])
    predDF['est'] = np.random.randint(1, 6, predDF.shape[0])
    randomized_predictions = list(predDF.itertuples(index=False))
    return randomized_predictions

In [None]:
# We use grid search cross validation to tune parameters for the SVD.
reader = Reader()
data = Dataset.load_from_df(ratings, reader)
param_grid = {'n_epochs': [5,5], 'lr_all': [0.01, 0.1], 'reg_all':[0.01,0.1]}
grid_search = GridSearchCV(SVD, param_grid, measures=['rmse'], cv=2)
grid_search.fit(data)

print(grid_search.best_params['rmse'])
svd = grid_search.best_estimator['rmse']

In [None]:
# We use ordinary k fold for the SVD.
kf = KFold(n_splits=5)

# Just to print the evaluation metrics all at once.
precisionAvg = 0
recallAvg = 0
ndcgAvg = 0
healthinessAvg = 0
maeS = 0
for trainset, testset in kf.split(data):
    svd.fit(trainset)
    # The second parameter is the post fileter healthiness factor.
    predictions2 = postfilter(svd.test(testset), 0.0)
    randomized_predictions = get_baseline(predictions2) 
    # evaluate predictions.
    precision, recall, ndcg, healthinessAverage, mae = evaluations_at_k(predictions2)
    #precision, recall, ndcg, healthinessAverage, mae = evaluations_at_k(randomized_predictions) #vs predictions2 randomized_predictions
    precisionAvg = precisionAvg + precision
    recallAvg = recallAvg + recall
    ndcgAvg = ndcgAvg + ndcg
    healthinessAvg = healthinessAvg + healthinessAverage
    maeS = mae + maeS

print("Precision:", precisionAvg/5, "Recall:", recallAvg/5, "NDCG_score:", ndcgAvg/5, "Healthiness:", healthinessAvg/5, 'MAE:', maeS/5)

In [None]:
# We must use stratified splits for the content based algoritm
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=1) 

precisionAvgr = 0
recallAvgr = 0
ndcgAvgr = 0
healthinessAvgr = 0
maeAvgr = 0
# Run the algorithm 5 times and evaluate.
for train, test in skf.split(ratings, ratings['rating']):
    predictions1 = content_based_predict(train, test)
    # The postfilter is working, but the healthiness score has broken down. And time! Luckily we got out data.
    predictions1 = postfilter(predictions1, 0.2)
    #randomized_predictions = get_baseline(predictions1) 
    precision, recall, ndcg, healthinessAverage, mae = evaluations_at_k(predictions1)
    precisionAvgr = precisionAvgr + precision
    recallAvgr = recallAvgr + recall
    ndcgAvgr = ndcgAvgr + ndcg
    healthinessAvgr = healthinessAvgr + healthinessAverage
    maeAvgr = mae + maeAvgr
    print("Precision:", precision, "Recall:", recall, "NDCG_score:", ndcg, "Healthiness:", healthinessAverage, "MAE:", mae)
   
print("Precision:", precisionAvgr/5, "Recall:", recallAvgr/5, "NDCG_score:", ndcgAvgr/5, "Healthiness:", healthinessAvgr/5, "MAE:", maeAvgr/5)

In [None]:
# This is the reason why we think the recall at 10 score is also broken. 
foo = ratings[ratings['rating'] > 3].groupby('user_id')['rating'].count()
foo = 10/foo
foo.mean()