# Recurrent Recommender Networks
## Trabalho Final da disciplina de Recuperação da Informação - UFRJ 2021.2
##  Feito por Rafael da Silva Fernandes - DRE: 117196229
### Artigo original: 
[Wu, Chao-Yuan, et al. "Recurrent recommender networks." Proceedings of the tenth ACM international conference on web search and data mining. 2017.](https://dl.acm.org/doi/epdf/10.1145/3018661.3018689)

## Importando bibliotecas

In [1]:
import sys
import pandas as pd
import numpy as np
import tensorflow.compat.v1 as tf
import tensorflow.nn as nn

import time
import warnings
warnings.filterwarnings('ignore')

## Hiperparâmetros

In [2]:
batch_size = 50
hidden_size = 128
out_size = 64
n_step = 1
learning_rate = 0.01
verbose = 10

## Pré-processamento

In [3]:
ratings_title = ['UserID', 'MovieID', 'Rating', 'TimeStamp']

ratings = pd.read_table("ratings.dat", sep = '::', header = None, names = ratings_title, engine = 'python')
ratings = ratings.sort_values(by = ['TimeStamp'])
ratings.head()

Unnamed: 0,UserID,MovieID,Rating,TimeStamp
1000138,6040,858,4,956703932
1000153,6040,2384,4,956703954
999873,6040,593,5,956703954
1000007,6040,1961,4,956703977
1000192,6040,2019,5,956703977


In [4]:
num_users = max(ratings['UserID'])
num_movies = max(ratings['MovieID'])

## Preparando o modelo

### Placeholders

In [5]:
tf.disable_eager_execution()

userID = tf.placeholder(tf.int32, shape = [None, 1], name = 'userID')
movieID = tf.placeholder(tf.int32, shape = [None, 1], name = 'movieID')
rating = tf.placeholder(tf.float32, shape = [None, 1], name = 'rating')
dropout = tf.placeholder(tf.float32, name = 'dropout')

### User embedding layer and lookup

In [6]:
with tf.variable_scope("userID_embedding", reuse = tf.AUTO_REUSE):

    Embedding_User = tf.get_variable(
        name = "embedding_users", 
        shape=  [num_users, hidden_size],
        initializer = tf.glorot_uniform_initializer()
    )
    
    uid_layer = nn.embedding_lookup(Embedding_User, userID)
    uid_layer = nn.relu(uid_layer)

### Movie embedding layer and lookup

In [7]:
with tf.variable_scope("movie_embedding", reuse = tf.AUTO_REUSE):
    
    Embedding_Item = tf.get_variable(
        name = "embedding_items", 
        shape = [num_movies, hidden_size],
        initializer=tf.glorot_uniform_initializer()
    )
    
    mid_layer = nn.embedding_lookup(Embedding_Item, movieID)
    mid_layer = nn.relu(mid_layer)

### Feedforward do User para a Rede Neural Recorrente (Recurrent Neural Networks)

In [8]:
with tf.variable_scope("user_rnn_cell", reuse = tf.AUTO_REUSE):
    
    userCell = tf.keras.layers.LSTMCell(hidden_size)
    userInput = tf.transpose(uid_layer, [1, 0, 2])
    userOutputs, userStates = tf.nn.dynamic_rnn(userCell, userInput, dtype = tf.float32)
    userOutput = userOutputs[-1]

Instructions for updating:
Please use `keras.layers.RNN(cell)`, which is equivalent to this API


### Feedforward do Movie para a RNN

In [9]:
with tf.variable_scope("movie_rnn_cell", reuse = tf.AUTO_REUSE):
    
    movieCell = tf.keras.layers.LSTMCell(hidden_size)
    movieInput = tf.transpose(mid_layer, [1, 0, 2])
    movieOutputs, movieStates = tf.nn.dynamic_rnn(movieCell, movieInput, dtype = tf.float32)
    movieOutput = movieOutputs[-1]

### Previsão do Rating

In [10]:
with tf.variable_scope("pred_layer", reuse = tf.AUTO_REUSE):
    
    userVector = tf.layers.dense(userOutput, out_size, activation = None)
    movieVector = tf.layers.dense(movieOutput, out_size, activation = None)
                
    pred = tf.reduce_sum(tf.multiply(userVector, movieVector), axis = 1, keepdims = True)

### Loss

In [11]:
loss = tf.reduce_mean(tf.losses.mean_squared_error(rating, pred))

### Otimizador

In [12]:
optimizer = tf.train.AdamOptimizer(learning_rate).minimize(loss)

## Treinando o modelo

In [13]:
sess = tf.Session() 
sess.run(tf.global_variables_initializer())

In [14]:
train = ratings.values
length = len(train)
batches = int(length/ batch_size) + 1

train_loss = []

start_time = time.perf_counter()

for i in range(batches):
    
    minIdx = i * batch_size
    maxIdx = min(length, (i + 1) * batch_size)
    
    train_batch = train[minIdx:maxIdx]
    
    inputs = np.array([(i[0] - 1, i[1] - 1, float(i[2])) for i in train_batch])
    
    feed_dict = {
        userID: np.expand_dims(inputs[:, 0], 1), 
        movieID: np.expand_dims(inputs[:, 1], 1), 
        rating: np.expand_dims(inputs[:, 2], 1), dropout: 1.
    }

    _, batch_loss = sess.run([optimizer, loss], feed_dict=feed_dict)
    train_loss.append(batch_loss)

    if verbose and i % verbose == 0:
        sys.stdout.write('\r{}/ {}： loss = {}'.format(
            i, batches, np.sqrt(np.mean(train_loss[-20:]))
        ))
        sys.stdout.flush()
        
total_time = time.perf_counter() - start_time
print("\nLevou um total de: " +  str(total_time) + " segundos.")

20000/ 20005： loss = 0.9647672772407532
Levou um total de: 4246.4992781 segundos.


### Avaliação dos últimos 1000

In [15]:
train_batch = train[-1000:]

inputs = np.array([(i[0] - 1, i[1] - 1, float(i[2])) for i in train_batch])

feed_dict = {
    userID: np.expand_dims(inputs[:, 0], 1), 
    movieID: np.expand_dims(inputs[:, 1], 1), 
    rating: np.expand_dims(inputs[:, 2], 1), dropout: 1.
}

p = sess.run(pred, feed_dict = feed_dict)
df = pd.DataFrame({'act': inputs[:, 2], 'pred': p.reshape(-1)})
df.head()

Unnamed: 0,act,pred
0,5.0,4.613207
1,4.0,4.559071
2,5.0,4.070119
3,5.0,4.357347
4,5.0,4.232045


### RMSE, MAE e MAPE

In [16]:
a = df['act'].values - df['pred'].values

rmse = np.mean(np.power(a, 2))
mae = np.mean(np.abs(a))
mape = np.mean(np.abs(a/ df['act'].values)) * 100

print("Avaliação dos últimos 1000")
print("RMSE:", rmse, "| MAE:", mae, "| MAPE:", mape)

Avaliação dos últimos 1000
RMSE: 0.8138423636782184 | MAE: 0.7161470744609832 | MAPE: 27.426932344834004
