# Imports

In [1]:
import pandas as pd
import pickle
from collections import defaultdict

from surprise import NormalPredictor
from surprise import SVD
from surprise import Dataset
from surprise import Reader
from surprise import accuracy
from surprise import AlgoBase
from surprise.model_selection import train_test_split as train_test_split_sup
 
import metrics
import sampling

import numpy as np

from sklearn.model_selection import train_test_split as train_test_split
from surprise.model_selection import LeaveOneOut
import random

from metrics import recommender_map

from sampling import create_train_test_dataframe
from sampling import train_test_surprise_format
from top_n_evaluation import create_anti_testset_for_user_all
from top_n_evaluation import create_anti_testset_for_user
from top_n_evaluation import create_recommendation_top_n_evaluation

# Constants

In [35]:
RECIPES_BASE = '../Data/base/recipes_base.parquet'
RATINGS_BASE = '../Data/base/ratings_base.parquet'

# Load data

In [36]:
ratings_base = pd.read_parquet(RATINGS_BASE)

In [37]:
ratings_sample = sampling.get_ratings_with_min_number_list(ratings_base, [20, 10])

In [38]:
recipes_base = pd.read_parquet(RECIPES_BASE)

# Functions

In [6]:
def create_sample_n_ratings_per_user(df, n=10):
    return df.groupby('AuthorId', as_index = False, group_keys=False).apply(lambda s: s.sample(min(len(s), 10), replace=False))

In [7]:
def create_sample_n_popular_users(df, n=2500):
    sample = df.groupby(['AuthorId'], as_index = False, group_keys=False).size()
    sample = sample.sort_values(by=['size'], ascending=False)[:n]
    return df[df.AuthorId.isin(sample.AuthorId.unique())]

In [10]:
user_item_ratings_dataset = Dataset.load_from_df(ratings_sample[["AuthorId", "RecipeId", "Rating"]], Reader(rating_scale=(0, 5)))

In [11]:
train_df, test_df = create_train_test_dataframe(ratings_sample, 0.2, 13)
trainset, testset = train_test_surprise_format(train_df, test_df)

# Create recommendations

In [17]:
with open("../Data/SVD/author_min_10/SVD_best_model.obj", 'rb') as pickle_file:
     algo = pickle.load(pickle_file)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x22c28df4550>

In [1]:
recommendations_svd, relevant_items_svd = create_recommendation_top_n_evaluation(train_df, 
                                           ratings_sample, 
                                           algorithm=algo, 
                                           word2vec_vectors=None,
                                           sample_size=500, 
                                           user_sample_size=0,
                                           k=100,
                                           knn=False,
                                           verbose=True)

In [22]:
# with open("G:/Recipes/tests_samples/20_10/svd/samples/svd_recommendations_sample500.obj", "wb") as pickle_file:
#     pickle.dump(recommendations_svd, pickle_file)
    
# with open("G:/Recipes/tests_samples/20_10/svd/samples/svd_relevant_items_sample500.obj", "wb") as pickle_file:
#     pickle.dump(relevant_items_svd, pickle_file)
    
# with open("G:/Recipes/tests_samples/20_10/testset_20_10.obj", "wb") as pickle_file:
#     pickle.dump(testset, pickle_file)
    
# with open("G:/Recipes/tests_samples/20_10/trainset_20_10.obj", "wb") as pickle_file:
#     pickle.dump(trainset, pickle_file)

# MAP

In [77]:
map_svd_list = []
k_list = [1] + list(range(5, 105, 2))
for k in k_list:
    map_k = metrics.recommender_map(recommendations=recommendations_svd, 
                                relevant_items=relevant_items_svd,
                                k=k)
    map_svd_list.append(map_k)

In [79]:
with open("G:/Recipes/Wykresy_samples/1000/mar_svd.obj", "wb") as pickle_file:
    pickle.dump(map_svd_list, pickle_file)
    
# # with open("G:/Recipes/tests_samples/20_10/svd/k_1_100_list.obj", "wb") as pickle_file:
# #     pickle.dump(k_list, pickle_file)

# Hit rate

In [18]:
hr_svd_list = []
k_list = [1] + list(range(5, 105, 2))
for k in k_list:
    hr_k = metrics.hit_rate(recommendations=recommendations_svd, 
                    relevant_items=relevant_items_svd,
                    k=k)
    hr_svd_list.append(hr_k)

In [19]:
with open("G:/Recipes/Wykresy_samples/500/hr_svd.obj", "wb") as pickle_file:
    pickle.dump(hr_svd_list, pickle_file)

# Item coverage

In [21]:
item_cov_k = []
k_list = [1] + list(range(5, 105, 2))

for k in k_list:
    item_cov_temp = {}
    for key in recommendations_svd.keys():
        item_cov_temp[key] = recommendations_svd[key][:k]
        
    cov = metrics.item_coverage(item_cov_temp, ratings_sample.RecipeId.nunique(), min_rating=0)
    item_cov_k.append(cov)

In [22]:
with open("G:/Recipes/Wykresy_samples/500/item_cov_svd.obj", "wb") as pickle_file:
    pickle.dump(item_cov_k, pickle_file)

# Novelty

In [24]:
number_of_ratings_per_recipe = ratings_sample.groupby(["RecipeId"]).size()
popularity_ranking = metrics.get_popularity_ranks(number_of_ratings_per_recipe)

In [6]:
novelty_k = []
k_list = [1] + list(range(5, 105, 2))

for k in k_list:
    novelty_temp = {}
    for key in recommendations_svd.keys():
        novelty_temp[key] = recommendations_svd[key][:k]
        
    nov = metrics.novelty(novelty_temp, ratings_per_recipe)
    novelty_k.append(nov)

In [7]:
with open("G:/Recipes/Wykresy_samples/500/novelty_svd_new.obj", "wb") as pickle_file:
    pickle.dump(novelty_k, pickle_file)

# Diversity

In [28]:
# with open('G:/Recipes/Matrix/ING_CAT_50_SEPERATE/matrix.obj', 'rb') as pickle_file:
#     matrix_names = pickle.load(pickle_file)
    
# with open('G:/Recipes/Matrix/ING_CAT_50_SEPERATE/recipe_id_to_pos.obj', 'rb') as pickle_file:
#     recipe_id_to_pos = pickle.load(pickle_file)

# with open('G:/Recipes/Matrix/ING_CAT_50_SEPERATE/pos_to_recipe_id.obj', 'rb') as pickle_file:
#     pos_to_recipe_id = pickle.load(pickle_file)
    

In [2]:
import itertools
from sentence_transformers import util

div_ing_k = []
k_list = [1] + list(range(5, 105, 2))

for k in k_list:
    if k % 3==0:
        print(k)
    div_temp = {}
    for key in recommendations_svd.keys():
        div_temp[key] = recommendations_svd[key][:k]
        
    div = metrics.diversity(div_temp, matrix_names, recipe_id_to_pos)
    div_ing_k.append(div)

In [30]:
with open("G:/Recipes/Wykresy_samples/500/diversity_svd.obj", "wb") as pickle_file:
    pickle.dump(div_ing_k, pickle_file)

# Most popular

In [80]:
n_popular_users = sampling.create_sample_n_popular_users(ratings_sample, n=2500)

In [81]:
least_popular_users = ratings_sample[~ratings_sample.AuthorId.isin(n_popular_users.AuthorId.unique())]

In [82]:
recommendations_n_popular = {}
for key, value in recommendations_svd.items():
    if key in n_popular_users.AuthorId.unique():
        recommendations_n_popular[key] = value

In [83]:
recommendations_least_popular = {}
for key, value in recommendations_svd.items():
    if key in least_popular_users.AuthorId.unique():
        recommendations_least_popular[key] = value

In [84]:
relevant_items_most_popular = defaultdict(list)
for uid in n_popular_users.AuthorId.unique():
    relevant_items_most_popular[uid] = metrics.get_relevant_items_for_user(uid, test_df, min_rating=0)

In [85]:
relevant_items_least_popular = defaultdict(list)
for uid in least_popular_users.AuthorId.unique():
    relevant_items_least_popular[uid] = metrics.get_relevant_items_for_user(uid, test_df, min_rating=0)

## MAP

In [86]:
map_most_popular_list = []
k_list = [1] + list(range(5, 105, 2))
for k in k_list:
    map_k = recommender_map(recommendations=recommendations_n_popular, 
                                relevant_items=relevant_items_most_popular,
                                k=k)
    map_most_popular_list.append(map_k)

In [87]:
map_least_popular_list = []
k_list = [1] + list(range(5, 105, 2))
for k in k_list:
    map_k = recommender_map(recommendations=recommendations_least_popular, 
                                relevant_items=relevant_items_least_popular,
                                k=k)
    map_least_popular_list.append(map_k)

In [88]:
with open("G:/Recipes/Wykresy_samples/1000/mar_most_popular_svd.obj", "wb") as pickle_file:
    pickle.dump(map_most_popular_list, pickle_file)
    
with open("G:/Recipes/Wykresy_samples/1000/mar_least_popular_svd.obj", "wb") as pickle_file:
    pickle.dump(map_least_popular_list, pickle_file)

## HR

In [45]:
hr_most_popular_list = []
k_list = [1] + list(range(5, 105, 2))
for k in k_list:
    hr_k = hit_rate(recommendations=recommendations_n_popular,
                                relevant_items=relevant_items_most_popular,
                                k=k)
    hr_most_popular_list.append(hr_k)

In [46]:
hr_least_popular_list = []
k_list = [1] + list(range(5, 105, 2))
for k in k_list:
    hr_k = hit_rate(recommendations=recommendations_least_popular,
                                relevant_items=relevant_items_least_popular,
                                k=k)
    hr_least_popular_list.append(hr_k)

In [47]:
with open("G:/Recipes/Wykresy_samples/500/hr_most_popular_svd.obj", "wb") as pickle_file:
    pickle.dump(hr_most_popular_list, pickle_file)
    
with open("G:/Recipes/Wykresy_samples/500/hr_least_popular_svd.obj", "wb") as pickle_file:
    pickle.dump(map_least_popular_list, pickle_file)