In [None]:
import os
import pprint
import tempfile
from typing import Dict, Text

import numpy as np
import tensorflow as tf
import tensorflow_datasets as tfds

import pandas as pd
import sampling
import metrics

import tensorflow_recommenders as tfrs

import pickle

import datetime

from tensorflow.keras.layers import Flatten   
from tensorflow.keras.layers import Dense     

import TensorflowRichFeatures as tfrs_rich
from sampling import create_train_test_dataframe
import random

# Load data

In [None]:
RATINGS_BASE = "../Data/base/ratings_base.parquet"
RECIPES_BASE = "../Data/base/recipes_base.parquet"


CONCAT_ING_CAT= "../Data/samples/concatenated_ing_cat_df.obj"

In [None]:
recipes_small = pd.read_parquet(RECIPES_BASE)
ratings_small = pd.read_parquet(RATINGS_BASE)

with open(CONCAT_ING_CAT, "rb") as input_file:
    concatenated_ing_cat_df = pickle.load(input_file)

In [None]:
ratings_sample = sampling.get_ratings_with_min_number_list(ratings_small, [20, 10])
train_df, test_df = create_train_test_dataframe(ratings_sample, 0.2, 13)

ratings_sample.RecipeId = ratings_sample.RecipeId.apply(lambda x: int(x))
recipes_small.RecipeId = recipes_small.RecipeId.apply(lambda x: int(x))
recipe_ids_in_sample = list(set(ratings_sample.RecipeId))

In [None]:
ratings_sample

## Merge datasets

In [None]:
recipes_subset = recipes_small[["RecipeId"]].merge(concatenated_ing_cat_df, on="RecipeId", how="inner")
recipes_subset["Ingredients_Category"] = recipes_subset["Concatenated"].map(lambda x: " ".join(x))
recipes_subset = recipes_subset[recipes_subset.RecipeId.isin(recipe_ids_in_sample)]
merged_dataset = ratings_sample.merge(recipes_subset, on="RecipeId", how="inner")
merged_dataset.drop(columns=["Concatenated", "DateSubmitted"], inplace=True)

In [None]:
merged_dataset.info()

In [None]:
recipes_subset.info()

# Preparing tfrs dataset

In [None]:
recipes_subset['ItemId'] = recipes_subset.RecipeId.map(lambda x: bytes(str(x), 'utf-8'))

recipes_dict = recipes_subset[['ItemId','Ingredients_Category']]
recipes_dict = {name: np.array(value) for name, value in recipes_dict.items()}
recipes = tf.data.Dataset.from_tensor_slices(recipes_dict)


recipes = recipes.map(lambda x: {'RecipeId' : x['ItemId'],
                                 'Ingredients_Category' : x['Ingredients_Category']})

In [None]:
for x in recipes.take(1).as_numpy_iterator():
    pprint.pprint(x)

In [None]:
ratings_sample['UserId'] = ratings_sample.AuthorId.map(lambda x: bytes(str(x), 'utf-8'))
ratings_sample['ItemId'] = ratings_sample.RecipeId.map(lambda x: bytes(str(x), 'utf-8'))

ratings_dict = ratings_sample[['UserId', 'ItemId']]
ratings_dict = {name: np.array(value) for name, value in ratings_dict.items()}
ratings = tf.data.Dataset.from_tensor_slices(ratings_dict)


ratings = ratings.map(lambda x: {'AuthorId' : x['UserId'], 
                                 'RecipeId' : x['ItemId']})

In [None]:
for x in ratings.take(1).as_numpy_iterator():
    pprint.pprint(x)

In [None]:
recipe_ids = recipes.batch(1).map(lambda x: x["RecipeId"])
user_ids = ratings.batch(1_000_000).map(lambda x: x["AuthorId"])

unique_recipe_ids = np.unique(np.concatenate(list(recipe_ids)))
unique_user_ids = np.unique(np.concatenate(list(user_ids)))

# Load model

In [None]:
model = tfrs_rich.CombinedModel(layer_sizes=[64], 
                      unique_recipe_ids=unique_recipe_ids, 
                      unique_user_ids=unique_user_ids, 
                      recipes_dataset=recipes,
                                verbose=False)
model.compile(optimizer=tf.keras.optimizers.Adagrad(0.1))


In [None]:
model.load_weights("C:/Users/Użytkownik/Recipes/Data/TFRS/features/20_10/model_7/model_7a_400_epochs")

In [None]:
# model.summary()

In [None]:
# len(recommendations_per_user[68727])

# Create recommendations

In [None]:
from collections import defaultdict

i = 0
k=100
recommendations_per_user = defaultdict(list)
relevant_items_per_user = defaultdict(list)

for user_id in train_df.AuthorId.unique():
    if i % 250 == 0:
        print(i)

    user_items_in_trainset = train_df[train_df.AuthorId == user_id]['RecipeId'].unique()
    candidates, relevant_items_per_user[user_id] = create_anti_testset_for_user(user_id, 
                                                              user_items_in_trainset,
                                                              test_df, 
                                                              sample_size=500, 
                                                              user_sample_size=0,
                                                              knn=True)
#     candidates, relevant_items_per_user[user_id] = create_anti_testset_for_user(user_id, 
#                                                               user_items_in_trainset,
#                                                               ratings_sample,
#                                                               knn=True)
    
    recipes_candidates = recipes_subset[recipes_subset.RecipeId.isin(candidates)]
    recipes_dict = recipes_candidates[['ItemId','Ingredients_Category']]
    recipes_dict = {name: np.array(value) for name, value in recipes_dict.items()}
    recipes_test = tf.data.Dataset.from_tensor_slices(recipes_dict)


    recipes_test = recipes_test.map(lambda x: {'RecipeId' : x['ItemId'],
                                     'Ingredients_Category' : x['Ingredients_Category']})
    
    index = tfrs.layers.factorized_top_k.BruteForce(model.query_model)
    index.index_from_dataset(
    tf.data.Dataset.zip((recipes_test.map(lambda features: features['RecipeId']).batch(10000), 
                         recipes_test.batch(10000).map(model.candidate_model))))
    
    scores, ids = index({"AuthorId": tf.constant([bytes(str(user_id), 'utf-8')])}, k=k)
    recommendations = [int(x) for x in list(ids[0].numpy())]
    recommendations_per_user[user_id] = recommendations
    
    i += 1

# MAP

In [None]:
map_list_tfrs = []
k_list = [1] + list(range(5, 105, 2))
for k in k_list:
    map_k_tfrs = metrics.recommender_map(recommendations=recommendations_per_user, 
                                relevant_items=relevant_items_per_user,
                                k=k)
    map_list_tfrs.append(map_k_tfrs)

In [None]:
with open("G:/Recipes/Wykresy_samples/1000/map_k_tfrs.obj", "wb") as pickle_file:
    pickle.dump(map_list_tfrs, pickle_file)

# HR

In [None]:
hr_list_tfrs = []
k_list = [1] + list(range(5, 105, 2))
for k in k_list:
    hr_k = metrics.hit_rate(recommendations=recommendations_per_user, 
                                relevant_items=relevant_items_per_user,
                                k=k)
    hr_list_tfrs.append(hr_k)

In [None]:
with open("G:/Recipes/Wykresy_samples/20000/hr_tfrs.obj", "wb") as pickle_file:
    pickle.dump(hr_list_tfrs, pickle_file)

# Item coverage

In [None]:
item_cov_k = []
k_list = [1] + list(range(5, 105, 2))

for k in k_list:
    item_cov_temp = {}
    for key in recommendations_per_user.keys():
        item_cov_temp[key] = recommendations_per_user[key][:k]
        
    cov = metrics.item_coverage(item_cov_temp, ratings_sample.RecipeId.nunique(), min_rating=0)
    item_cov_k.append(cov)

In [None]:
with open("G:/Recipes/Wykresy_samples/20000/item_cov_tfrs.obj", 'wb') as pickle_file:
    pickle.dump(item_cov_k, pickle_file)

# Calculate novelty

In [None]:
ratings_per_recipe = defaultdict(int)
for uid in recommendations_per_user.keys():
    for iid in recommendations_per_user[uid]:
        ratings_per_recipe[iid] += 1
    

In [None]:
novelty(recommendations_per_user, ratings_per_recipe)

In [None]:
number_of_ratings_per_recipe = ratings_sample.groupby(["RecipeId"]).size()
popularity_ranking = metrics.get_popularity_ranks(number_of_ratings_per_recipe)

In [None]:
novelty_k = []
k_list = [1] + list(range(5, 105, 2))

for k in k_list:
    novelty_temp = {}
    for key in recommendations_per_user.keys():
        novelty_temp[key] = recommendations_per_user[key][:k]
        
    nov = metrics.novelty(novelty_temp,  ratings_per_recipe)
    novelty_k.append(nov)

In [None]:
with open("G:/Recipes/Wykresy_samples/500/novelty_tfrs_new.obj", 'wb') as pickle_file:
    pickle.dump(novelty_k, pickle_file)

# Calculate diversity

In [None]:
# with open('G:/Recipes/Matrix/ING_CAT_50_SEPERATE/matrix.obj', 'rb') as pickle_file:
#     matrix_names = pickle.load(pickle_file)
    
# with open('G:/Recipes/Matrix/ING_CAT_50_SEPERATE/recipe_id_to_pos.obj', 'rb') as pickle_file:
#     recipe_id_to_pos = pickle.load(pickle_file)

# with open('G:/Recipes/Matrix/ING_CAT_50_SEPERATE/pos_to_recipe_id.obj', 'rb') as pickle_file:
#     pos_to_recipe_id = pickle.load(pickle_file)

In [None]:
import itertools
from sentence_transformers import util

div_ing_k = []
k_list = [1] + list(range(5, 105, 2))

for k in k_list:
    if k % 3==0:
        print(k)
    div_temp = {}
    for key in recommendations_per_user.keys():
        div_temp[key] = recommendations_per_user[key][:k]
        
    div = metrics.diversity(div_temp, matrix_names, recipe_id_to_pos, None)
    div_ing_k.append(div)

In [None]:
with open("G:/Recipes/Wykresy_samples/20000/diversity_tfrs.obj", 'wb') as pickle_file:
    pickle.dump(div_ing_k, pickle_file)

In [None]:
# with open('G:/Recipes/tests_samples/20_10/tfrs/div_ing_k.obj', 'wb') as pickle_file:
#     pickle.dump(div_ing_k, pickle_file)

# Most popular

In [None]:
most_popular_users = sampling.create_sample_n_popular_users(ratings_sample, n=2500)

In [None]:
least_popular_users = ratings_sample[~ratings_sample.AuthorId.isin(most_popular_users.AuthorId.unique())]

## MAP

In [None]:

recommendations_most_popular = {}
for key, value in recommendations_per_user.items():
    if key in most_popular_users.AuthorId.unique():
        recommendations_most_popular[key] = value
        
recommendations_least_popular = {}
for key, value in recommendations_per_user.items():
    if key in least_popular_users.AuthorId.unique():
        recommendations_least_popular[key] = value

In [None]:
map_list_most_popular_tfrs = []
k_list = [1] + list(range(5, 105, 2))
for k in k_list:
    map_k_tfrs = metrics.recommender_map(recommendations=recommendations_most_popular, 
                                relevant_items=relevant_items_per_user,
                                k=k)
    map_list_most_popular_tfrs.append(map_k_tfrs)

In [None]:
map_list_least_popular_tfrs = []
k_list = [1] + list(range(5, 105, 2))
for k in k_list:
    map_k_tfrs = metrics.recommender_map(recommendations=recommendations_least_popular, 
                                relevant_items=relevant_items_per_user,
                                k=k)
    map_list_least_popular_tfrs.append(map_k_tfrs)

In [None]:
map_list_most_popular_tfrs[0]

In [None]:
map_list_least_popular_tfrs[0]

In [None]:
with open("G:/Recipes/Wykresy_samples/500/mar_most_popular_tfrs.obj", "wb") as pickle_file:
    pickle.dump(map_list_most_popular_tfrs, pickle_file)
    
with open("G:/Recipes/Wykresy_samples/500/mar_least_popular_tfrs.obj", "wb") as pickle_file:
    pickle.dump(map_list_least_popular_tfrs, pickle_file)

## HR

In [None]:
hr_list_most_popular_tfrs = []
k_list = [1] + list(range(5, 105, 2))
for k in k_list:
    hr_k = metrics.hit_rate(recommendations=recommendations_most_popular,
                                relevant_items=relevant_items_per_user,
                                k=k)
    hr_list_most_popular_tfrs.append(hr_k)

In [None]:
hr_list_least_popular_tfrs = []
k_list = [1] + list(range(5, 105, 2))
for k in k_list:
    hr_k = metrics.hit_rate(recommendations=recommendations_least_popular,
                                relevant_items=relevant_items_per_user,
                                k=k)
    hr_list_least_popular_tfrs.append(hr_k)

In [None]:
with open("G:/Recipes/Wykresy_samples/20000/hr_most_popular_tfrs.obj", "wb") as pickle_file:
    pickle.dump(hr_list_most_popular_tfrs, pickle_file)
    
with open("G:/Recipes/Wykresy_samples/20000/hr_least_popular_tfrs.obj", "wb") as pickle_file:
    pickle.dump(hr_list_least_popular_tfrs, pickle_file)