# Dependencies

In [1]:
import os
import sys
import pandas as pd
import numpy as np

from utils import load_data

from recommender import *
from replay_buffer import *
from replay_memory import *
from actor import *
from critic import *
from state_representation import *
from env import *

In [2]:
DATA_DIR = "data"
STATE_SIZE = 10
MAX_EPISODES_NUM = 10

# Load the MovieLens Dataset

In [3]:
ratings = load_data(DATA_DIR, "ratings.dat")

In [4]:
movies = pd.read_csv(os.path.join(DATA_DIR, "movies_with_descriptions_and_embeddings_256.csv"))
movies.drop(columns=["Unnamed: 0"], inplace=True)

In [5]:
movies.head()

Unnamed: 0,MovieID,Title,Genres,Description,0,1,2,3,4,5,...,246,247,248,249,250,251,252,253,254,255
0,1,Toy Story (1995),Animation|Children's|Comedy,Toy Story (bra: Toy Story: Um Mundo de Aventur...,-0.102828,0.182277,0.002379,0.026427,0.022161,-0.051857,...,0.057287,0.036732,0.002791,0.002211,0.071692,-0.030306,0.029835,0.078229,-0.007015,0.038948
1,2,Jumanji (1995),Adventure|Children's|Fantasy,Jumanji é um filme americano de 1995 baseado e...,0.0147,0.140769,-0.021312,-0.014126,-0.053827,-0.004669,...,0.009891,0.002804,-0.061368,-0.034619,0.113774,-0.027829,-0.033963,-0.039537,-0.002083,0.051805
2,3,Grumpier Old Men (1995),Comedy|Romance,Grumpier Old Men (Brasil: Dois Velhos Mais Rab...,-0.026903,0.098825,-0.039313,0.095288,0.01746,-0.006847,...,-0.002634,0.024492,-0.086729,-0.000188,0.164671,-0.048271,0.052323,-0.013715,-0.024849,-0.002047
3,4,Waiting to Exhale (1995),Comedy|Drama,Waiting to Exhale (br: Falando de Amor; pt: 4 ...,-0.089902,0.178295,-0.043994,0.050084,-0.047126,0.031553,...,-0.014515,0.025317,-0.039006,-0.048315,0.079171,0.022229,0.087814,0.0415,0.064787,0.039644
4,5,Father of the Bride Part II (1995),Comedy,O Pai da Noiva (em inglês: Father of the Brid...,-0.06944,0.184769,-0.040607,0.044762,0.018103,-0.01985,...,-0.007717,0.006737,-0.121712,0.005513,0.09211,0.020565,0.086717,0.016204,0.011514,0.029713


In [6]:
users = pd.read_csv(os.path.join(DATA_DIR, "users_with_descriptions_and_embeddings_256.csv"))
users.drop(columns=["Unnamed: 0"], inplace=True)

In [7]:
users.head()

Unnamed: 0,UserID,Description_User,0,1,2,3,4,5,6,7,...,246,247,248,249,250,251,252,253,254,255
0,1,Este é o usuário de ID 1. Seu gênero é female ...,-0.057122,0.075298,0.004352,-0.009412,-0.015738,-0.038979,0.054973,0.049346,...,-0.040991,-0.075571,-0.008645,0.004374,0.070797,0.007613,0.083756,0.022218,0.047948,0.027094
1,2,Este é o usuário de ID 2. Seu gênero é male e ...,-0.010772,0.056558,0.002971,-0.006776,0.023203,0.022011,0.086239,0.050919,...,-0.080761,-0.063326,-0.042024,0.050113,0.046536,0.008862,0.09797,-0.024396,0.056816,0.032726
2,3,Este é o usuário de ID 3. Seu gênero é male e ...,-0.017504,0.047871,-0.001712,-0.009784,-0.021276,0.030254,0.025563,0.063957,...,-0.051223,-0.068534,-0.041843,0.035331,0.030979,0.019342,0.09961,-0.012089,0.030866,0.010227
3,4,Este é o usuário de ID 4. Seu gênero é male e ...,-0.07145,0.097593,0.004778,0.029669,0.005703,0.021013,0.053544,0.073858,...,-0.078885,-0.043247,-0.078745,0.051415,0.003763,0.004712,0.075394,-0.007788,0.009895,-0.00829
4,5,Este é o usuário de ID 5. Seu gênero é male e ...,-0.053805,0.081029,-0.004258,-0.024752,-0.036245,0.044592,0.044816,0.063308,...,-0.068509,-0.089697,-0.072361,0.01297,0.022103,0.031734,0.172332,0.011926,-0.00238,0.033484


# Pré-processamento dos dados

In [8]:
movies_list = [i.strip().split("::") for i in open(os.path.join(DATA_DIR,'movies.dat'),encoding='latin-1').readlines()]

In [9]:
movies_id_to_movies = {movie[0]: movie[1:] for movie in movies_list}

In [10]:
movies_id_to_movies

{'1': ['Toy Story (1995)', "Animation|Children's|Comedy"],
 '2': ['Jumanji (1995)', "Adventure|Children's|Fantasy"],
 '3': ['Grumpier Old Men (1995)', 'Comedy|Romance'],
 '4': ['Waiting to Exhale (1995)', 'Comedy|Drama'],
 '5': ['Father of the Bride Part II (1995)', 'Comedy'],
 '6': ['Heat (1995)', 'Action|Crime|Thriller'],
 '7': ['Sabrina (1995)', 'Comedy|Romance'],
 '8': ['Tom and Huck (1995)', "Adventure|Children's"],
 '9': ['Sudden Death (1995)', 'Action'],
 '10': ['GoldenEye (1995)', 'Action|Adventure|Thriller'],
 '11': ['American President, The (1995)', 'Comedy|Drama|Romance'],
 '12': ['Dracula: Dead and Loving It (1995)', 'Comedy|Horror'],
 '13': ['Balto (1995)', "Animation|Children's"],
 '14': ['Nixon (1995)', 'Drama'],
 '15': ['Cutthroat Island (1995)', 'Action|Adventure|Romance'],
 '16': ['Casino (1995)', 'Drama|Thriller'],
 '17': ['Sense and Sensibility (1995)', 'Drama|Romance'],
 '18': ['Four Rooms (1995)', 'Thriller'],
 '19': ['Ace Ventura: When Nature Calls (1995)', 'Come

In [11]:
# make sure the ratings are integer
ratings = ratings.applymap(int)

  ratings = ratings.applymap(int)


In [12]:
# make the user dictionary which have a value as movies list the user watched
users_dict = {user : [] for user in set(ratings["UserID"])}
users_dict[1]

[]

In [13]:
# order the dataset by time
ratings = ratings.sort_values(by='Timestamp', ascending=True)
ratings.head(5)

Unnamed: 0,UserID,MovieID,Rating,Timestamp
1000138,6040,858,4,956703932
1000153,6040,2384,4,956703954
999873,6040,593,5,956703954
1000007,6040,1961,4,956703977
1000192,6040,2019,5,956703977


In [14]:
# add (movie, rating) pair to the user dictionary
## add movie rated >= 4 only
ratings_df_gen = ratings.iterrows()
users_dict_for_history_len = {user : [] for user in set(ratings["UserID"])}
for data in ratings_df_gen:
    users_dict[data[1]['UserID']].append((data[1]['MovieID'], data[1]['Rating']))
    if data[1]['Rating'] >= 4:
        users_dict_for_history_len[data[1]['UserID']].append((data[1]['MovieID'], data[1]['Rating']))

In [15]:
# the length of history for each user
users_history_lens = [len(users_dict_for_history_len[u]) for u in set(ratings["UserID"])]

In [16]:
# get the number of users and items
users_num = max(ratings["UserID"])
items_num = len(movies)

In [17]:
# check the following variables
print(users_num, items_num)

6040 3883


# Split the training set

In [18]:
# Training setting
train_users_num = int(users_num * 0.8)
train_items_num = items_num
train_users_dict = {k:users_dict.get(k) for k in range(1, train_users_num+1)}
train_users_history_lens = users_history_lens[:train_users_num]

# Training

In [19]:
from recommender import *

In [20]:
env = Env(train_users_dict, train_users_history_lens, movies_id_to_movies, STATE_SIZE)
recommender = DRRAgent(env, users_num, items_num, STATE_SIZE, use_wandb=False)
recommender.actor.build_networks()
recommender.critic.build_networks()

In [22]:
episodic_precision_history = []
top_k = False

for episode in range(MAX_EPISODES_NUM):
    # episodic reward 리셋
    episode_reward = 0
    correct_count = 0
    steps = 0
    q_loss = 0
    mean_action = 0
    # Environment 리셋
    user_id, items_ids, done = env.reset()
    # print(f'user_id : {user_id}, rated_items_length:{len(env.user_items)}')
    # print('items : ', env.get_items_names(items_ids))
    while not done:
        
        # Observe current state & Find action
        ## Embedding 해주기
        user_eb = recommender.users_id_to_ebs[recommender.users_id_to_ebs["UserID"] == user_id]
        user_eb = user_eb.iloc[:, 1:].to_numpy()
        # user_eb = embedding_network.get_layer('user_embedding')(np.array(user_id))
        items_ebs = recommender.movies_id_to_ebs.loc[items_ids]
        items_ebs = items_ebs.iloc[:, 1:]
        items_ebs = items_ebs.to_numpy()
        # items_eb = embedding_network.get_layer('movie_embedding')(np.array(items_ids))
        # items_eb = m_embedding_network.get_layer('movie_embedding')(np.array(items_ids))
        ## SRM으로 state 출력
        state = recommender.srm_ave([np.expand_dims(user_eb, axis=0), np.expand_dims(items_ebs, axis=0)])

        ## Action(ranking score) 출력
        action = recommender.actor.network(state)

        ## ε-greedy exploration
        if recommender.epsilon > np.random.uniform() and not recommender.is_test:
            recommender.epsilon -= recommender.epsilon_decay
            action += np.random.normal(0,recommender.std,size=action.shape)

        ## Item 추천
        recommended_item = recommender.recommend_item(action, env.recommended_items, top_k=top_k)
        
        # Calculate reward & observe new state (in env)
        ## Step
        next_items_ids, reward, done, _ = env.step(recommended_item, top_k=top_k)
        if top_k:
            reward = np.sum(reward)

        # get next_state
        next_items_eb = recommender.movies_id_to_ebs.loc[next_items_ids]
        next_items_eb = next_items_eb.iloc[:, 1:]
        next_items_eb = next_items_eb.to_numpy()
        # next_items_eb = embedding_network.get_layer('movie_embedding')(np.array(next_items_ids))
        # next_items_eb = m_embedding_network.get_layer('movie_embedding')(np.array(next_items_ids))
        next_state = recommender.srm_ave([np.expand_dims(user_eb, axis=0), np.expand_dims(next_items_eb, axis=0)])

        # buffer에 저장
        recommender.buffer.append(state, action, reward, next_state, done)
        
        if recommender.buffer.crt_idx > 1 or recommender.buffer.is_full:
            # Sample a minibatch
            batch_states, batch_actions, batch_rewards, batch_next_states, batch_dones, weight_batch, index_batch = recommender.buffer.sample(recommender.batch_size)

            # Set TD targets
            target_next_action = recommender.actor.target_network(batch_next_states)
            qs = recommender.critic.network([target_next_action, batch_next_states])
            target_qs = recommender.critic.target_network([target_next_action, batch_next_states])
            min_qs = tf.raw_ops.Min(input=tf.concat([target_qs, qs], axis=1), axis=1, keep_dims=True) # Double Q method
            td_targets = recommender.calculate_td_target(batch_rewards, min_qs, batch_dones)

            # Update priority
            for (p, i) in zip(td_targets, index_batch):
                recommender.buffer.update_priority(abs(p[0]) + recommender.epsilon_for_priority, i)

            # print(weight_batch.shape)
            # print(td_targets.shape)
            # raise Exception
            # Update critic network
            q_loss += recommender.critic.train([batch_actions, batch_states], td_targets, weight_batch)
            
            # Update actor network
            s_grads = recommender.batch_sizecritic.dq_da([batch_actions, batch_states])
            recommender.actor.train(batch_states, s_grads)
            recommender.actor.update_target_network()
            recommender.critic.update_target_network()

        items_ids = next_items_ids
        episode_reward += reward
        mean_action += np.sum(action[0])/(len(action[0]))
        steps += 1

        if reward > 0:
            correct_count += 1
        
        print(f'recommended items : {len(env.recommended_items)},  epsilon : {recommender.epsilon:0.3f}, reward : {reward:+}', end='\r')

        if done:
            print()
            precision = int(correct_count/steps * 100)
            print(f'{episode}/{MAX_EPISODES_NUM}, precision : {precision:2}%, total_reward:{episode_reward}, q_loss : {q_loss/steps}, mean_action : {mean_action/steps}')
            if recommender.use_wandb:
                recommender.wandb.log({'precision':precision, 'total_reward':episode_reward, 'epsilone': recommender.epsilon, 'q_loss' : q_loss/steps, 'mean_action' : mean_action/steps})
            episodic_precision_history.append(precision)
        
    # if (episode+1)%50 == 0:
    #     plt.plot(episodic_precision_history)
    #     plt.savefig(os.path.join(save_model_weight_dir, f'images/training_precision_%_top_5.png'))

    # if (episode+1)%1000 == 0 or episode == MAX_EPISODES_NUM-1:
    #     save_model(os.path.join(save_model_weight_dir, f'actor_{episode+1}_fixed.h5'),
    #                     os.path.join(save_model_weight_dir, f'critic_{episode+1}_fixed.h5'))

AttributeError: 'tuple' object has no attribute 'rank'

In [21]:
items = [data[0] for data in users_dict[6039][:STATE_SIZE]]
items

[111, 282, 2067, 1230, 930, 947, 3088, 3022, 3133, 1294]

In [22]:
recommended_items = set(items)
recommended_items

{111, 282, 930, 947, 1230, 1294, 2067, 3022, 3088, 3133}

In [23]:
items_ids = np.array(list(set(i for i in range(items_num)) - recommended_items))
items_ids

array([   0,    1,    2, ..., 3880, 3881, 3882])

In [72]:
items_ebs = movies_id_to_ebs.loc[items_ids]
items_ebs = items_ebs.iloc[:, 1:]
a = items_ebs.to_numpy()
a.shape

(3873, 256)

In [73]:
b = np.expand_dims(items_ebs, axis=0)
b.shape

(1, 3873, 256)

In [74]:
c = tf.transpose(b, perm=(0,2,1))/recommender.embedding_dim
c.shape

TensorShape([1, 256, 3873])

In [75]:
wav = tf.keras.layers.Conv1D(1, 1, 1)

In [76]:
d = wav(c)
d.shape

TensorShape([1, 256, 1])

In [77]:
e = tf.transpose(d, perm=(0,2,1))
e.shape

TensorShape([1, 1, 256])

In [78]:
f = tf.squeeze(e, axis=1)
f.shape

TensorShape([1, 256])

In [79]:
g = np.expand_dims(user_eb, axis=0)
g.shape

(1, 1, 256)

In [80]:
h = tf.keras.layers.multiply([g, f])
h.shape

TensorShape([1, 1, 256])

In [45]:
user_eb = recommender.users_id_to_ebs[recommender.users_id_to_ebs["UserID"] == 6039]
user_eb = user_eb.iloc[:, 1:].to_numpy()

In [46]:
user_eb.shape

(1, 256)

In [51]:
f = np.expand_dims(user_eb, axis=0)
f.shape

(1, 1, 256)

In [52]:
tf.keras.layers.multiply([f, e])

ValueError: Inputs have incompatible shapes. Received shapes (1, 256) and (3873,)

In [21]:
users_id_to_ebs = pd.read_csv("./data/users_with_descriptions_and_embeddings_256.csv")
users_id_to_ebs.drop(columns=["Unnamed: 0", "Description_User"], inplace=True)

In [35]:
a = users_id_to_ebs[users_id_to_ebs["UserID"] == 6039]
a = a.iloc[:, 1:]
a = a.to_numpy()
a.shape

(1, 256)

In [40]:
items_ebs = movies_id_to_ebs.loc[items_ids]
items_ebs = items_ebs.iloc[:, 1:]
items_ebs = items_ebs.to_numpy()

In [41]:
items_ebs

(3873, 256)