# IMPORTANDO LIBRERIAS

In [2]:
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import Input, Dense
from sklearn.model_selection import train_test_split

# CARGA DE DATOS


In [3]:
df_ratings = pd.read_csv('../data/ratings_filtered.csv')

In [4]:
df_ratings.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11016557 entries, 0 to 11016556
Data columns (total 5 columns):
 #   Column      Dtype  
---  ------      -----  
 0   Unnamed: 0  int64  
 1   userId      int64  
 2   movieId     int64  
 3   rating      float64
 4   timestamp   int64  
dtypes: float64(1), int64(4)
memory usage: 420.2 MB


In [5]:
df_ratings.head(5)

Unnamed: 0.1,Unnamed: 0,userId,movieId,rating,timestamp
0,0,1,110,1.0,1425941529
1,1,1,147,4.5,1425942435
2,2,1,858,5.0,1425941523
3,4,1,1246,5.0,1425941556
4,5,1,1968,4.0,1425942148


In [6]:
df_ratings.isnull().sum()

Unnamed: 0    0
userId        0
movieId       0
rating        0
timestamp     0
dtype: int64

In [7]:
df_test = df_ratings[:1000000]

# MODELAMIENTO

In [8]:
# Create user-item matrix
user_item_matrix = df_test.pivot(index = 'userId', columns = 'movieId', values = 'rating')

In [9]:
user_item_matrix_binary = ( user_item_matrix > 0 ).astype(int)

In [10]:
# Split the data into training and test sets
train_data, test_data = train_test_split(user_item_matrix_binary, test_size=0.2, random_state = 42)

In [11]:
# RBM parameters
num_visible = num_items = user_item_matrix_binary.shape[1]
num_hidden = 50
batch_size = 64
epochs = 10

In [12]:
def ModelRBM(num_visible, num_hidden):
    X = Input(shape=(num_visible,))
    W = tf.Variable(tf.random.normal([num_visible, num_hidden], 0.01))
    b_visible = tf.Variable(tf.random.normal([num_visible], 0.01))
    b_hidden = tf.Variable(tf.random.normal([num_hidden], 0.01))

    def sample_hidden(x):
        return tf.nn.sigmoid(tf.matmul(x, W) + b_hidden)

    def sample_visible(x):
        return tf.nn.sigmoid(tf.matmul(x, tf.transpose(W)) + b_visible)

    def gibbs_sample(k, x):
        for i in range(k):
            h = sample_hidden(x)
            x = sample_visible(h)
        return x

    h = sample_hidden(X)
    X_sample = gibbs_sample(10, X)
    h_sample = sample_hidden(X_sample)

    model = Model(inputs=X, outputs=[X_sample, h, h_sample])
    model.compile(optimizer=Adam(0.01), loss='mean_squared_error')
    return model

In [13]:
# Build the RBM model
def rbm_model(num_visible : int, num_hidden : int):
    visible_layer = Input(shape=(num_visible,))
    hidden_layer = Dense(num_hidden, activation='sigmoid')(visible_layer)
    visible_layer_reconstructed = Dense(num_visible, activation='sigmoid')(hidden_layer)
    return Model(inputs = visible_layer, outputs = visible_layer_reconstructed)

In [14]:
rbm = rbm_model(num_visible, num_hidden)

In [15]:
# Train the RBM model
rbm.compile(optimizer = Adam(learning_rate = 0.0001), loss = 'mean_squared_error')

In [16]:
rbm.fit(train_data, train_data, batch_size = batch_size, epochs = epochs, shuffle = True, validation_data = (test_data, test_data))

Epoch 1/10
[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 6ms/step - loss: 0.2029 - val_loss: 0.0869
Epoch 2/10
[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - loss: 0.0697 - val_loss: 0.0372
Epoch 3/10
[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - loss: 0.0324 - val_loss: 0.0222
Epoch 4/10
[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - loss: 0.0204 - val_loss: 0.0160
Epoch 5/10
[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - loss: 0.0153 - val_loss: 0.0129
Epoch 6/10
[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - loss: 0.0127 - val_loss: 0.0112
Epoch 7/10
[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - loss: 0.0111 - val_loss: 0.0101
Epoch 8/10
[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - loss: 0.0100 - val_loss: 0.0093
Epoch 9/10
[1m300/300[0m [32m━━━━━━━━

<keras.src.callbacks.history.History at 0x2aa8188e590>

# Experiments

In [17]:
# Extract the embeddings
user_embedding = rbm.layers[1].get_weights()[0].T
item_embedding = rbm.layers[2].get_weights()[0]

In [18]:
# Paramaters for prediction of the top N movies for a user
user_id = 1
n_movies = 10

In [19]:
# Function to recommend the top N movies for a user
def n_recommendations(user_id : int, n : int):
    user_movies = user_embedding[user_id - 1] 
    predicted_ratings = np.dot(user_embedding, user_movies)
    recommended_movies = np.argsort(predicted_ratings)[::-1]
    return recommended_movies[:n]

In [20]:
print(f'Top recomended movies for user {user_id} are {n_recommendations(user_id, 10)}')

Top recomended movies for user 1 are [ 0 39 31 40 27 49 11 21 37 16]


In [21]:
# save the model
rbm.save('../rbm.h5')

