In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow.keras.layers import Input, Embedding, Flatten, Dense, Concatenate
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import MeanSquaredError
from tensorflow.keras.metrics import MeanAbsoluteError

In [3]:
ratings = pd.read_csv('ratings.csv')


In [5]:
ratings.shape

(25000095, 4)

In [6]:
user_id_map = {uid: idx for idx, uid in enumerate(ratings['userId'].unique())}
movie_id_map = {mid: idx for idx, mid in enumerate(ratings['movieId'].unique())}

ratings['user_id_enc'] = ratings['userId'].map(user_id_map)
ratings['movie_id_enc'] = ratings['movieId'].map(movie_id_map)

n_users = ratings['user_id_enc'].nunique()
n_movies = ratings['movie_id_enc'].nunique()

In [19]:
ratings

Unnamed: 0,userId,movieId,rating,timestamp,user_id_enc,movie_id_enc
0,1,296,5.0,1147880044,0,0
1,1,306,3.5,1147868817,0,1
2,1,307,5.0,1147868828,0,2
3,1,665,5.0,1147878820,0,3
4,1,899,3.5,1147868510,0,4
...,...,...,...,...,...,...
25000090,162541,50872,4.5,1240953372,162540,544
25000091,162541,55768,2.5,1240951998,162540,4574
25000092,162541,56176,2.0,1240950697,162540,7115
25000093,162541,58559,4.0,1240953434,162540,578


In [7]:
train, test = train_test_split(ratings, test_size=0.25, random_state=42)

In [9]:

user_model = Sequential([
    Embedding(input_dim=n_users, output_dim=32, input_length=1),
    Flatten()
])


movie_model = Sequential([
    Embedding(input_dim=n_movies, output_dim=32, input_length=1),
    Flatten()
])


In [11]:
user_input = Input(shape=(1,))
movie_input = Input(shape=(1,))

In [12]:

user_vec = user_model(user_input)
movie_vec = movie_model(movie_input)

concat = Concatenate()([user_vec, movie_vec])

In [13]:

dense_block = Sequential([
    Dense(64, activation='relu'),
    Dense(32, activation='relu'),
    Dense(1, activation='linear')
])


In [14]:
output = dense_block(concat)
model = Model(inputs=[user_input,movie_input], outputs=output)

In [15]:
model.compile(
    optimizer=Adam(learning_rate=0.001),
    loss=MeanSquaredError(),
    metrics=[MeanAbsoluteError()]
)


In [16]:
model.summary()

In [17]:
history = model.fit(
    [train['user_id_enc'], train['movie_id_enc']],
    train['rating'],
    batch_size=8192,
    epochs=10,
    validation_data=([test['user_id_enc'], test['movie_id_enc']], test['rating'])
)


Epoch 1/10
[1m2289/2289[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m50s[0m 21ms/step - loss: 0.9377 - mean_absolute_error: 0.7121 - val_loss: 0.7059 - val_mean_absolute_error: 0.6410
Epoch 2/10
[1m2289/2289[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m50s[0m 22ms/step - loss: 0.6770 - mean_absolute_error: 0.6253 - val_loss: 0.6705 - val_mean_absolute_error: 0.6208
Epoch 3/10
[1m2289/2289[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m48s[0m 20ms/step - loss: 0.6376 - mean_absolute_error: 0.6059 - val_loss: 0.6521 - val_mean_absolute_error: 0.6138
Epoch 4/10
[1m2289/2289[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m48s[0m 21ms/step - loss: 0.6042 - mean_absolute_error: 0.5891 - val_loss: 0.6426 - val_mean_absolute_error: 0.6070
Epoch 5/10
[1m2289/2289[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m49s[0m 21ms/step - loss: 0.5765 - mean_absolute_error: 0.5748 - val_loss: 0.6393 - val_mean_absolute_error: 0.6044
Epoch 6/10
[1m2289/2289[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[

In [18]:
model.save("your_trained_movielens_model.h5")
print("✅ Sequential model saved!")



✅ Sequential model saved!
