# Imports

In [12]:
# !pip install surprise

In [1]:
import pandas as pd
import pickle
from collections import defaultdict

from surprise import NormalPredictor
from surprise import SVD
from surprise import Dataset
from surprise import Reader
from surprise import accuracy
from surprise import AlgoBase
from surprise.model_selection import train_test_split as train_test_split_sup

import metrics
import sampling

import numpy as np

from sklearn.model_selection import train_test_split as train_test_split
from surprise.model_selection import LeaveOneOut
import random

from create_similarity_vectors import create_top_k_similar_vectors
from sentence_transformers import util
import torch
from surprise.prediction_algorithms.predictions import PredictionImpossible
import heapq

from metrics import recommender_map

from sampling import create_train_test_dataframe
from sampling import train_test_surprise_format
from top_n_evaluation import create_anti_testset_for_user_all
from top_n_evaluation import create_anti_testset_for_user
from top_n_evaluation import create_recommendation_top_n_evaluation

In [2]:
# from google.colab import drive
# drive.mount('/content/drive')

# Constants

In [3]:
# RATINGS_BASE = './drive/MyDrive/Colab Notebooks/Recipes_new/Data/base/ratings_base.parquet'
RATINGS_BASE = '../Data/base/ratings_base.parquet'

# Load data

In [4]:
ratings_base = pd.read_parquet(RATINGS_BASE)

In [5]:
ratings_sample = sampling.get_ratings_with_min_number_list(ratings_base, [20, 10])

## Create dataset

In [18]:
user_item_ratings_dataset = Dataset.load_from_df(ratings_sample[["AuthorId", "RecipeId", "Rating"]], Reader(rating_scale=(0, 5)))

In [19]:
train_df, test_df = create_train_test_dataframe(ratings_sample, 0.2, 13)
trainset, testset = train_test_surprise_format(train_df, test_df)

In [9]:
with open("../Data/NP/model.obj", 'rb') as pickle_file:
     algo = pickle.load(pickle_file)

<surprise.prediction_algorithms.random_pred.NormalPredictor at 0x1d7a2794700>

In [10]:
predictions = algo.test(testset)
accuracy.rmse(predictions)
accuracy.mae(predictions)

RMSE: 1.1114
MAE:  0.7161


0.716093591032219

In [1]:
recommendations_sample, relevant_items_sample = create_recommendation_top_n_evaluation(train_df, 
                                           ratings_sample, 
                                           algorithm=algo, 
                                           word2vec_vectors=None,
                                           sample_size=20000, 
                                           user_sample_size=0,
                                           k=100,
                                           knn=False,
                                           verbose=True)

# MAP

In [43]:
def average_precision_at_k(recommendations_per_user: list, relevant_items_per_user: list, k=10):
    '''
    Calculates avarage precision@k for one user
    Parameters:
    recommendations_per_user (list) - list of predictions for one user
    relevant_items_per_user - list of items from user's history

    Result
    apk - average precision at k for user
    '''
    if len(relevant_items_per_user) == 0:
        return 0.0
    
    if len(recommendations_per_user) > k:
            recommendations_per_user = recommendations_per_user[:k]
    hits = 0.0
    precision_sum = 0.0
    for i, item in enumerate(recommendations_per_user):
        if item in relevant_items_per_user:
            hits += 1.0
            precision_sum += hits / (i + 1.0)
    
#     apk = precision_sum / min(len(relevant_items_per_user), k)
#     apk = precision_sum / len(relevant_items_per_user)
    apk = precision_sum / k
    
    return apk


def recommender_map(recommendations: dict, relevant_items: dict, k: int):
    '''
    Calculates mean average precision for recommender system at k
    
    Parameters:
    recommendations (dict) - dictionary of recommendations for all users
    relevant_items (dict) - dictionary of relevant items for all users
    k - length of top k list
    Result:
    MAP@k
    '''
    apks = []
    for uid in recommendations.keys():
        apks.append(average_precision_at_k(recommendations[uid], relevant_items[uid], k=k))
    return np.mean(apks)

In [44]:
map_list = []
k_list = [1] + list(range(5, 105, 2))
for k in k_list:
    map_k_knn = metrics.recommender_map(recommendations=recommendations_normal_sample, 
                                relevant_items=relevant_items_normal_sample,
                                k=k)
    map_list.append(map_k_knn)

In [45]:
map_list[0]

0.013253224951404841

In [46]:
with open("G:/Recipes/Wykresy_samples/1000/map_k_np.obj", "wb") as pickle_file:
    pickle.dump(map_list, pickle_file)
    
# with open("G:/Recipes/tests_samples/20_10/knn/sample/k_1_100_list.obj", "wb") as pickle_file:
#     pickle.dump(k_list, pickle_file)

# Hit rate

In [17]:
hr_list = []
k_list = [1] + list(range(5, 105, 2))
for k in k_list:
    hr_k = metrics.hit_rate(recommendations=recommendations_normal_sample, 
                                relevant_items=relevant_items_normal_sample,
                                k=k)
    hr_list.append(hr_k)

In [19]:
with open("G:/Recipes/Wykresy_samples/20000/hr_np.obj", "wb") as pickle_file:
    pickle.dump(hr_list, pickle_file)

## Item coverage

In [21]:
item_cov_k = []
k_list = [1] + list(range(5, 105, 2))

for k in k_list:
    item_cov_temp = {}
    for key in recommendations_normal_sample.keys():
        item_cov_temp[key] = recommendations_normal_sample[key][:k]
        
    cov = metrics.item_coverage(item_cov_temp, ratings_sample.RecipeId.nunique(), min_rating=0)
    item_cov_k.append(cov)

In [22]:
with open("G:/Recipes/Wykresy_samples/20000/item_cov_np.obj", "wb") as pickle_file:
    pickle.dump(item_cov_k, pickle_file)

### Novelty

In [24]:
number_of_ratings_per_recipe = ratings_sample.groupby(["RecipeId"]).size()
popularity_ranking = metrics.get_popularity_ranks(number_of_ratings_per_recipe)

In [24]:
novelty_k = []
k_list = [1] + list(range(5, 105, 2))

for k in k_list:
    novelty_temp = {}
    for key in recommendations_normal_sample.keys():
        novelty_temp[key] = recommendations_normal_sample[key][:k]
        
    nov = metrics.novelty(novelty_temp, ratings_per_recipe)
    novelty_k.append(nov)

In [25]:
with open("G:/Recipes/Wykresy_samples/500/novelty_np_new.obj", "wb") as pickle_file:
    pickle.dump(novelty_k, pickle_file)

## Diversity

In [28]:
with open('G:/Recipes/Matrix/ING_CAT_50_SEPERATE/matrix.obj', 'rb') as pickle_file:
    matrix_names = pickle.load(pickle_file)
    
with open('G:/Recipes/Matrix/ING_CAT_50_SEPERATE/recipe_id_to_pos.obj', 'rb') as pickle_file:
    recipe_id_to_pos = pickle.load(pickle_file)

with open('G:/Recipes/Matrix/ING_CAT_50_SEPERATE/pos_to_recipe_id.obj', 'rb') as pickle_file:
    pos_to_recipe_id = pickle.load(pickle_file)
    

In [2]:
import itertools
from sentence_transformers import util

div_ing_k = []
k_list = [1] + list(range(5, 105, 2))

for k in k_list:
    if k % 3==0:
        print(k)
    div_temp = {}
    for key in recommendations_normal_sample.keys():
        div_temp[key] = recommendations_normal_sample[key][:k]
        
    div = metrics.diversity(div_temp, matrix_names, recipe_id_to_pos)
    div_ing_k.append(div)

In [30]:
with open("G:/Recipes/Wykresy_samples/20000/diversity_np.obj", "wb") as pickle_file:
    pickle.dump(div_ing_k, pickle_file)

# Most popular

In [47]:
most_popular_users = sampling.create_sample_n_popular_users(ratings_sample, n=2500)

In [48]:
least_popular_users = ratings_sample[~ratings_sample.AuthorId.isin(most_popular_users.AuthorId.unique())]

In [49]:
recommendations_knn_most_popular = {}
for key, value in recommendations_normal_sample.items():
    if key in most_popular_users.AuthorId.unique():
        recommendations_knn_most_popular[key] = value
        
recommendations_knn_least_popular = {}
for key, value in recommendations_normal_sample.items():
    if key in least_popular_users.AuthorId.unique():
        recommendations_knn_least_popular[key] = value

## MAP

In [50]:
map_knn_most_popular_list = []
k_list = [1] + list(range(5, 105, 2))
for k in k_list:
    map_k = recommender_map(recommendations=recommendations_knn_most_popular, 
                                relevant_items=relevant_items_normal_sample,
                                k=k)
    map_knn_most_popular_list.append(map_k)
    
map_knn_least_popular_list = []
k_list = [1] + list(range(5, 105, 2))
for k in k_list:
    map_k = recommender_map(recommendations=recommendations_knn_least_popular, 
                                relevant_items=relevant_items_normal_sample,
                                k=k)
    map_knn_least_popular_list.append(map_k)

In [51]:
with open("G:/Recipes/Wykresy_samples/1000/map_k_most_popular_list.obj", "wb") as pickle_file:
    pickle.dump(map_knn_most_popular_list, pickle_file)
    
with open("G:/Recipes/Wykresy_samples/1000/map_k_least_popular.obj", "wb") as pickle_file:
    pickle.dump(map_knn_least_popular_list, pickle_file)
    

## HR

#### KNN

In [40]:
hr_knn_most_popular_list = []
k_list = [1] + list(range(5, 105, 2))
for k in k_list:
    hr_k = hit_rate(recommendations=recommendations_normal_sample,
                                relevant_items=relevant_items_normal_sample,
                                k=k)
    hr_knn_most_popular_list.append(hr_k)

In [41]:
hr_knn_least_popular_list = []
k_list = [1] + list(range(5, 105, 2))
for k in k_list:
    hr_k = hit_rate(recommendations=recommendations_normal_sample,
                                relevant_items=relevant_items_normal_sample,
                                k=k)
    hr_knn_least_popular_list.append(hr_k)

In [42]:
with open("G:/Recipes/Wykresy_samples/20000/hr_most_popular_list.obj", "wb") as pickle_file:
    pickle.dump(hr_knn_most_popular_list, pickle_file)
    
with open("G:/Recipes/Wykresy_samples/20000/hr_least_popular.obj", "wb") as pickle_file:
    pickle.dump(hr_knn_least_popular_list, pickle_file)
    