# Imports

In [1]:
# !pip install surprise

In [11]:
import pandas as pd
import pickle
from collections import defaultdict

from surprise import NormalPredictor
from surprise import SVD
from surprise import Dataset
from surprise import Reader
from surprise import accuracy
from surprise import AlgoBase
from surprise.model_selection import train_test_split as train_test_split_sup

import metrics
import sampling

import numpy as np

from sklearn.model_selection import train_test_split as train_test_split
from surprise.model_selection import LeaveOneOut
import random

from create_similarity_vectors import create_top_k_similar_vectors
from sentence_transformers import util
import torch
from surprise.prediction_algorithms.predictions import PredictionImpossible
import heapq

from metrics import recommender_map

from sampling import create_train_test_dataframe
from sampling import train_test_surprise_format
from top_n_evaluation import create_anti_testset_for_user_all
from top_n_evaluation import create_anti_testset_for_user
from top_n_evaluation import create_recommendation_top_n_evaluation

# Constants

In [13]:
RATINGS_BASE = '../Data/base/ratings_base.parquet'

# Load data

In [14]:
ratings_base = pd.read_parquet(RATINGS_BASE)

In [15]:
ratings_sample = sampling.get_ratings_with_min_number_list(ratings_base, [20, 10])

In [13]:
with open("G:/Recipes/Matrix/ING_CAT_50/matrix_top25k_ing_cat_50.obj", "rb") as input_file:
    matrix = pickle.load(input_file)

with open("G:/Recipes/Matrix/ING_CAT_50/recipe_id_to_pos.obj", "rb") as input_file:
    recipe_id_to_pos = pickle.load(input_file)

with open("G:/Recipes/Vectors/ing_cat_vectors_50_dict.obj", "rb") as input_file:
    vectors = pickle.load(input_file)



## Create dataset

In [16]:
user_item_ratings_dataset = Dataset.load_from_df(ratings_sample[["AuthorId", "RecipeId", "Rating"]], Reader(rating_scale=(0, 5)))

In [17]:
train_df, test_df = create_train_test_dataframe(ratings_sample, 0.2, 13)
trainset, testset = train_test_surprise_format(train_df, test_df)

## Create recommendations

In [2]:
recommendations_knn_sample, relevant_items_knn_sample = create_recommendation_top_n_evaluation(train_df, 
                                           ratings_sample, 
                                           algorithm=None, 
                                           word2vec_vectors=vectors,
                                           sample_size=20000, 
                                           user_sample_size=0,
                                           k=100,
                                           knn=True,
                                           verbose=True)

# MAP

In [44]:
map_list_knn = []
k_list = [1] + list(range(5, 105, 2))
for k in k_list:
    map_k_knn = metrics.recommender_map(recommendations=recommendations_knn_sample, 
                                relevant_items=relevant_items_knn_sample,
                                k=k)
    map_list_knn.append(map_k_knn)

In [46]:
with open("G:/Recipes/Wykresy_samples/map_k_knn.obj", "wb") as pickle_file:
    pickle.dump(map_list_knn, pickle_file)
    
# with open("G:/Recipes/Wykresy_samples/k_1_100_list.obj", "wb") as pickle_file:
#     pickle.dump(k_list, pickle_file)

# Hit rate

In [53]:
hr_list_knn = []
k_list = [1] + list(range(5, 105, 2))
for k in k_list:
    hr_k = metrics.hit_rate(recommendations=recommendations_knn_sample, 
                                relevant_items=relevant_items_knn_sample,
                                k=k)
    hr_list_knn.append(hr_k)

In [54]:
with open("G:/Recipes/Wykresy_samples/10000/hr_knn.obj", "wb") as pickle_file:
    pickle.dump(hr_list_knn, pickle_file)

# Item coverage

In [56]:
item_cov_k = []
k_list = [1] + list(range(5, 105, 2))

for k in k_list:
    item_cov_temp = {}
    for key in recommendations_knn_sample.keys():
        item_cov_temp[key] = recommendations_knn_sample[key][:k]
        
    cov = metrics.item_coverage(item_cov_temp, ratings_sample.RecipeId.nunique(), min_rating=0)
    item_cov_k.append(cov)

In [57]:
with open("G:/Recipes/Wykresy_samples/10000/item_cov_knn.obj", "wb") as pickle_file:
    pickle.dump(item_cov_k, pickle_file)

### Novelty

In [18]:
ratings_per_recipe = defaultdict(int)
for uid in recommendations_knn_sample.keys():
    for iid in recommendations_knn_sample[uid]:
        ratings_per_recipe[iid] += 1
    

In [59]:
number_of_ratings_per_recipe = ratings_sample.groupby(["RecipeId"]).size()
popularity_ranking = metrics.get_popularity_ranks(number_of_ratings_per_recipe)

In [22]:
novelty_k = []
k_list = [1] + list(range(5, 105, 2))

for k in k_list:
    novelty_temp = {}
    for key in recommendations_knn_sample.keys():
        novelty_temp[key] = recommendations_knn_sample[key][:k]
        
    nov = metrics.novelty(novelty_temp, ratings_per_recipe)
    novelty_k.append(nov)

In [23]:
with open("G:/Recipes/Wykresy_samples/500/novelty_knn_new.obj", "wb") as pickle_file:
    pickle.dump(novelty_k, pickle_file)

## Diversity

In [28]:
# with open('G:/Recipes/Matrix/ING_CAT_50_SEPERATE/matrix.obj', 'rb') as pickle_file:
#     matrix_names = pickle.load(pickle_file)
    
# with open('G:/Recipes/Matrix/ING_CAT_50_SEPERATE/recipe_id_to_pos.obj', 'rb') as pickle_file:
#     recipe_id_to_pos = pickle.load(pickle_file)

# with open('G:/Recipes/Matrix/ING_CAT_50_SEPERATE/pos_to_recipe_id.obj', 'rb') as pickle_file:
#     pos_to_recipe_id = pickle.load(pickle_file)
    

In [5]:
import itertools
from sentence_transformers import util

div_ing_k = []
k_list = [1] + list(range(5, 105, 2))

for k in k_list:
    if k % 3==0:
        print(k)
    div_temp = {}
    for key in recommendations_knn_sample.keys():
        div_temp[key] = recommendations_knn_sample[key][:k]
        
    div = metrics.diversity(div_temp, matrix_names, recipe_id_to_pos)
    div_ing_k.append(div)

In [64]:
# with open("G:/Recipes/Wykresy_samples/10000/diversity_knn.obj", "wb") as pickle_file:
#     pickle.dump(div_ing_k, pickle_file)

# Most popular

In [47]:
most_popular_users = sampling.create_sample_n_popular_users(ratings_sample, n=2500)

In [48]:
least_popular_users = ratings_sample[~ratings_sample.AuthorId.isin(most_popular_users.AuthorId.unique())]

In [49]:
recommendations_knn_most_popular = {}
for key, value in recommendations_knn_sample.items():
    if key in most_popular_users.AuthorId.unique():
        recommendations_knn_most_popular[key] = value
        
recommendations_knn_least_popular = {}
for key, value in recommendations_knn_sample.items():
    if key in least_popular_users.AuthorId.unique():
        recommendations_knn_least_popular[key] = value

## MAP

In [50]:
map_knn_most_popular_list = []
k_list = [1] + list(range(5, 105, 2))
for k in k_list:
    map_k = recommender_map(recommendations=recommendations_knn_most_popular, 
                                relevant_items=relevant_items_knn_sample,
                                k=k)
    map_knn_most_popular_list.append(map_k)
    
map_knn_least_popular_list = []
k_list = [1] + list(range(5, 105, 2))
for k in k_list:
    map_k = recommender_map(recommendations=recommendations_knn_least_popular, 
                                relevant_items=relevant_items_knn_sample,
                                k=k)
    map_knn_least_popular_list.append(map_k)

In [51]:
with open("G:/Recipes/Wykresy_samples/1000/map_k_most_popular_knn.obj", "wb") as pickle_file:
    pickle.dump(map_knn_most_popular_list, pickle_file)
    
with open("G:/Recipes/Wykresy_samples/1000/map_k_least_popular_knn.obj", "wb") as pickle_file:
    pickle.dump(map_knn_least_popular_list, pickle_file)

# with open("G:/Recipes/tests_samples/20_10/knn/wykresy/sample500/mapk_most_popular_list.obj", "wb") as pickle_file:
#     pickle.dump(map_knn_most_popular_list, pickle_file)
    
# with open("G:/Recipes/tests_samples/20_10/knn/wykresy/sample500/mapk_lest_popular_list.obj", "wb") as pickle_file:
#     pickle.dump(map_knn_least_popular_list, pickle_file)
    