# IMPORTANDO LIBRERIAS

In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import Input, Dense
from sklearn.model_selection import train_test_split

# CARGA DE DATOS


In [2]:
df_ratings = pd.read_csv('../data/ratings.csv')

In [3]:
df_ratings.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 26024289 entries, 0 to 26024288
Data columns (total 4 columns):
 #   Column     Dtype  
---  ------     -----  
 0   userId     int64  
 1   movieId    int64  
 2   rating     float64
 3   timestamp  int64  
dtypes: float64(1), int64(3)
memory usage: 794.2 MB


In [4]:
df_ratings.head(5)

Unnamed: 0,userId,movieId,rating,timestamp
0,1,110,1.0,1425941529
1,1,147,4.5,1425942435
2,1,858,5.0,1425941523
3,1,1221,5.0,1425941546
4,1,1246,5.0,1425941556


In [5]:
df_ratings.isnull().sum()

userId       0
movieId      0
rating       0
timestamp    0
dtype: int64

In [6]:
df_test = df_ratings[:1000000]

# MODELAMIENTO

In [7]:
user_item_matrix = df_test.pivot(index = 'userId', columns = 'movieId', values = 'rating')

In [8]:
user_item_matrix_binary = ( user_item_matrix > 0 ).astype(int)

In [9]:
train_data, test_data = train_test_split(user_item_matrix_binary, test_size=0.2, random_state = 42)

In [10]:
# RBM parameters
num_visible = num_items = user_item_matrix_binary.shape[1]
num_hidden = 50
batch_size = 64
epochs = 10

In [11]:
def ModelRBM(num_visible, num_hidden):
    X = Input(shape=(num_visible,))
    W = tf.Variable(tf.random.normal([num_visible, num_hidden], 0.01))
    b_visible = tf.Variable(tf.random.normal([num_visible], 0.01))
    b_hidden = tf.Variable(tf.random.normal([num_hidden], 0.01))

    def sample_hidden(x):
        return tf.nn.sigmoid(tf.matmul(x, W) + b_hidden)

    def sample_visible(x):
        return tf.nn.sigmoid(tf.matmul(x, tf.transpose(W)) + b_visible)

    def gibbs_sample(k, x):
        for i in range(k):
            h = sample_hidden(x)
            x = sample_visible(h)
        return x

    h = sample_hidden(X)
    X_sample = gibbs_sample(10, X)
    h_sample = sample_hidden(X_sample)

    model = Model(inputs=X, outputs=[X_sample, h, h_sample])
    model.compile(optimizer=Adam(0.01), loss='mean_squared_error')
    return model

In [12]:
# Build the RBM model
def rbm_model(num_visible : int, num_hidden : int):
    visible_layer = Input(shape=(num_visible,))
    hidden_layer = Dense(num_hidden, activation='sigmoid')(visible_layer)
    visible_layer_reconstructed = Dense(num_visible, activation='sigmoid')(hidden_layer)
    return Model(inputs = visible_layer, outputs = visible_layer_reconstructed)

In [13]:
rbm = rbm_model(num_visible, num_hidden)

In [14]:
# Train the RBM model
rbm.compile(optimizer = Adam(learning_rate = 0.0001), loss = 'mean_squared_error')

In [15]:
rbm.fit(train_data, train_data, batch_size = batch_size, epochs = epochs, shuffle = True, validation_data = (test_data, test_data))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x11bc04beb88>

# Experiments

In [16]:
# Extract the embeddings
user_embedding = rbm.layers[1].get_weights()[0].T
item_embedding = rbm.layers[2].get_weights()[0]

In [17]:
# Paramaters for prediction of the top N movies for a user
user_id = 1
n_movies = 10

In [18]:
# Function to recommend the top N movies for a user
def n_recommendations(user_id : int, n : int):
    user_movies = user_embedding[user_id - 1] 
    predicted_ratings = np.dot(user_embedding, user_movies)
    recommended_movies = np.argsort(predicted_ratings)[::-1]
    return recommended_movies[:n]

In [19]:
print(f'Top recomended movies for user {user_id} are {n_recommendations(user_id, 10)}')

Top recomended movies for user 1 are [ 0 35  9 33 31 28 14 29 47 39]


In [20]:
# save the model
rbm.save('../rbm.h5')