In [None]:
!pip install scikit-surprise
!pip install import-ipynb

In [None]:
import numpy as np
import pandas as pd
import random
from sklearn.metrics import ndcg_score, precision_score
from surprise import Dataset
import torch
import torch.nn as nn
import import_ipynb
from full_notebook import Recommender, Env, UserMovieEmbedding

In [None]:
data = Dataset.load_builtin('ml-1m')

df = pd.DataFrame(data.raw_ratings, columns = ['UserId', 'MovieId', 'Rating',  'Timestamp'], dtype='int32')
df = df.astype('int32')
users = df['UserId'].unique()
items = df['MovieId'].unique()

users_dict = np.load('user_dict.npy', allow_pickle=True).item()
users_history_lens = np.load('users_histroy_len.npy')

users_num = max(df["UserId"])+1
items_num = max(df["MovieId"])+1

eval_users_num = int(users_num * 0.2)
eval_items_num = items_num

eval_users_dict = {k:users_dict[k] for k in range(users_num-eval_users_num, users_num)}
eval_users_history_lens = users_history_lens[-eval_users_num:]

In [None]:
def evaluate(recommender, env, top_k=False):
  episode_rewards = []
  steps = 0
  mean_precision = 0
  mean_ndcg = 0
  user_id, items_ids, done = env.reset()
  hit_miss = []
  EMBEDDING_DIM = 100
  embedding_network = UserMovieEmbedding(users_num,items_num, EMBEDDING_DIM)

  while not done:
    # Observe current state & Find action
    user_embedding = embedding_network.u(torch.LongTensor([user_id]))
    item_embeddings = embedding_network.m(torch.LongTensor(list(items_ids)))

    state = recommender.state_repr(item_embeddings.unsqueeze(1), user_embedding)

    ## Action and recommended item
    action = recommender.actor.local_network(state)
    recommended_item = recommender.recommend_item(action, item_embeddings, items_ids, top_k=top_k)

    # Calculate reward & observe new state (in env)
    next_items_ids, reward, done, _ = env.step(recommended_item, top_k=top_k)
    if top_k:
      correct_list = [1 if r > 0 else 0 for r in reward]
      list_of_ones = [1] * top_k
      mean_ndcg += ndcg_score(correct_list, list_of_ones)
      mean_precision +=  precision_score(correct_list, list_of_ones)  
      hit_miss += correct_list
    else:
      hit_miss += [1 if reward > 0 else 0]

    items_ids = next_items_ids
    episode_rewards += np.sum(reward)
    steps += 1

  if top_k:
    return mean_precision/steps, mean_ndcg/steps

  list_of_ones = [1]*steps

  return np.mean(episode_rewards), precision_score(hit_miss, list_of_ones), ndcg_score(hit_miss, list_of_ones)


In [None]:
STATE_SIZE = 10
MAX_EPISODE_NUM = 8000
STATE_SIZE = 10

In [None]:
sum_precision = 0
sum_ndcg = 0

for user_id in eval_users_dict.keys():
    env = Env(eval_users_dict, users_history_lens, STATE_SIZE, fix_user_id=user_id)
    recommender = Recommender (env, users, items, STATE_SIZE)
    # recommender.load_model('') HERE LOAD THE MODEL FOR ACTOR AND CRITIC
    precision, ndcg = evaluate(recommender, env)
    sum_precision += precision
    sum_ndcg += ndcg
    
print(f'precision@{TOP_K} : {sum_precision/len(eval_users_dict)}, ndcg@{TOP_K} : {sum_ndcg/len(eval_users_dict)}')