In [44]:
import torch
from torch import nn
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from torch.utils.data import Dataset, DataLoader
import optuna

In [45]:
if torch.cuda.is_available():
    device = "cuda"
elif torch.backends.mps.is_available():
    device = "mps"
else:
    device = "cpu"

print(f"Used device: {device}")

Used device: mps


In [46]:
df = pd.read_csv("movies_dataset.csv")
x = df[["user_id", "movie_id"]]
y = df["rating"]

X_f, X_test, y_f, y_test = train_test_split(
    x, y, test_size=0.15, random_state=42
)

X_train, X_valid, y_train, y_valid = train_test_split(
    X_f, y_f, test_size=0.12, random_state=42
)

### Baseline - average of ratings

In [47]:
average_rating = df["rating"].mean()
y_pred_baseline = [average_rating] * len(y_test)
baseline_mse = mean_squared_error(y_test, y_pred_baseline)

print(f"Ratings average global: {average_rating:.2f}")
print(f"Baseline mse: {baseline_mse:.4f}")

Ratings average global: 3.50
Baseline mse: 1.1329


### Neural Network

using pytorch Dataset to create custom dataset for neural network and DataLoaders

In [48]:
class MovieDataset(Dataset):
    def __init__(self, df, targets):
        self.users = torch.tensor(df["user_id"].values, dtype=torch.long)
        self.movies = torch.tensor(df["movie_id"].values, dtype=torch.long)
        self.ratings = torch.tensor(targets.values, dtype=torch.float32)
    
    def __len__(self):
        return len(self.ratings)
    
    def __getitem__(self, index):
        return self.users[index], self.movies[index], self.ratings[index]

In [49]:
train_set = MovieDataset(X_train, y_train)
valid_set = MovieDataset(X_valid, y_valid)
test_set = MovieDataset(X_test, y_test)

In [50]:
BATCH_SIZE = 1024
torch.manual_seed(42)

train_loader = DataLoader(
    train_set,
    batch_size=BATCH_SIZE,
    num_workers=0,
    shuffle=True,
    pin_memory=True
)

valid_loader = DataLoader(valid_set, batch_size=BATCH_SIZE, pin_memory=True)
test_loader = DataLoader(test_set, batch_size=BATCH_SIZE, pin_memory=True)

Model architecture

In [51]:
class Recommender(nn.Module):
    def __init__(self, n_users, n_movies, embedding_dim=32, dropout_rate=0.3):
        super().__init__()
        self.user_embedding = nn.Embedding(n_users, embedding_dim)
        self.movie_embedding = nn.Embedding(n_movies, embedding_dim)

        self.mlp = nn.Sequential(
            nn.Linear(embedding_dim * 2, 128),
            nn.BatchNorm1d(128),
            nn.SiLU(),
            nn.Dropout(dropout_rate),
            nn.Linear(128, 64),
            nn.SiLU(),
            nn.Dropout(dropout_rate),
            nn.Linear(64, 1)
        )

        self.user_embedding.weight.data.uniform_(0, 0.05)
        self.movie_embedding.weight.data.uniform_(0, 0.05)
    
    def forward(self, user, movie):
        user_vector = self.user_embedding(user)
        movie_vector = self.movie_embedding(movie)
        x = torch.cat([user_vector, movie_vector], dim=-1)

        return self.mlp(x).squeeze()

In [52]:
n_users = df['user_id'].nunique()
n_movies = df['movie_id'].nunique()

train model and find hyperparameters using optuna

In [53]:
def objective(trial):
    emb_dim = trial.suggest_categorical("embedding_dim", [16, 32, 64])
    lr = trial.suggest_float("lr", 1e-4, 1e-2, log=True)
    dropout = trial.suggest_float("dropout", 0.2, 0.5)

    model = Recommender(n_users, n_movies, emb_dim, dropout).to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    criterion = nn.MSELoss()
    n_epochs = 5

    for epoch in range(n_epochs):
        model.train()
        for users, movies, ratings in train_loader:
            users, movies, ratings = users.to(device), movies.to(device), ratings.to(device)
            optimizer.zero_grad()
            pred = model(users, movies)
            loss = criterion(pred, ratings)
            loss.backward()
            optimizer.step()
        
        model.eval()
        valid_mse = 0
        with torch.inference_mode():
            for users, movies, ratings in valid_loader:
                users, movies, ratings = users.to(device), movies.to(device), ratings.to(device)
                preds = model(users, movies)
                valid_mse += criterion(preds, ratings).item()
        
        avg_valid_mse = valid_mse / len(valid_loader)

        trial.report(avg_valid_mse, epoch)
        if trial.should_prune():
            raise optuna.TrialPruned()
    
    return avg_valid_mse

In [54]:
torch.manual_seed(42)
sampler = optuna.samplers.TPESampler(seed=42)
study = optuna.create_study(
    direction="minimize",
    sampler=sampler,
    pruner=optuna.pruners.MedianPruner()
)

study.optimize(objective, n_trials=20)

[I 2026-01-25 19:39:06,123] A new study created in memory with name: no-name-98311912-481b-46cd-86bb-c43fb8e2b344
[I 2026-01-25 19:39:35,985] Trial 0 finished with value: 0.8685365879535675 and parameters: {'embedding_dim': 32, 'lr': 0.0015751320499779737, 'dropout': 0.24680559213273096}. Best is trial 0 with value: 0.8685365879535675.
[I 2026-01-25 19:40:11,118] Trial 1 finished with value: 0.824805548787117 and parameters: {'embedding_dim': 64, 'lr': 0.0015930522616241021, 'dropout': 0.41242177333881364}. Best is trial 1 with value: 0.824805548787117.
[I 2026-01-25 19:40:38,415] Trial 2 finished with value: 0.8557070595026016 and parameters: {'embedding_dim': 32, 'lr': 0.00026587543983272726, 'dropout': 0.2545474901621302}. Best is trial 1 with value: 0.824805548787117.
[I 2026-01-25 19:41:13,461] Trial 3 finished with value: 0.8755368548631668 and parameters: {'embedding_dim': 64, 'lr': 0.0007309539835912913, 'dropout': 0.2873687420594126}. Best is trial 1 with value: 0.824805548787