In [3]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import SimpleRNN, Dense, Masking
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

In [4]:
df = pd.read_csv("test_set_1.csv")
df = df.sort_values(by=["userId", "timestamp"]).reset_index(drop=True)
df.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,541,5.0,943227521
1,1,166,5.0,943228442
2,1,1784,1.0,943228545
3,1,1944,2.0,943231120
4,1,1208,5.0,943231192


In [5]:
df["step"] = df.groupby("userId").cumcount()


In [6]:
features = ["movieId", "timestamp", "step"]  # manteniendo solo columnas numéricas
X_all = df[features].values.astype("float32")
y_all = df["rating"].values.astype("float32")

In [7]:
X_seq, y_seq = [], []

for uid, group in df.groupby("userId"):
    feat_seq = group[features].values.astype("float32")
    rating_seq = group["rating"].values.astype("float32")

    if len(feat_seq) > 1:
        X_seq.append(feat_seq[:-1])
        y_seq.append(rating_seq[1:])


In [8]:
from tensorflow.keras.preprocessing.sequence import pad_sequences

X = pad_sequences(X_seq, padding="post", dtype="float32", maxlen=100)
y = pad_sequences(y_seq, padding="post", dtype="float32", maxlen=100)
y = np.expand_dims(y, axis=-1)

In [9]:
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

In [10]:
model = Sequential()
model.add(Masking(mask_value=0., input_shape=(X.shape[1], X.shape[2])))
model.add(SimpleRNN(32, return_sequences=True))
model.add(Dense(1))

  super().__init__(**kwargs)


In [11]:
model.compile(optimizer="adam", loss="mse")
model.summary()

In [12]:
model.fit(X_train, y_train, epochs=3, batch_size=128, validation_data=(X_val, y_val))

Epoch 1/3
[1m1237/1237[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 12ms/step - loss: 7.5763 - val_loss: 1.1048
Epoch 2/3
[1m1237/1237[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 12ms/step - loss: 1.1111 - val_loss: 1.1046
Epoch 3/3
[1m1237/1237[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 12ms/step - loss: 1.1101 - val_loss: 1.1050


<keras.src.callbacks.history.History at 0x12b23f0e090>

In [13]:
predictions = model.predict(X_val)
mask = (y_val != 0)
rmse = np.sqrt(mean_squared_error(y_val[mask], predictions[mask]))
print(f"RMSE base (dataset original): {rmse:.4f}")

[1m1237/1237[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step
RMSE base (dataset original): 1.0504
