In [3]:
# Neural Collaborative Filtering Training & Hyperparameter Tuning with Model Saving

import sys
import os
sys.path.append(os.path.abspath(".."))

import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from sklearn.model_selection import train_test_split
from src.data.ncf_data import create_id_mappings, NetflixDataset
from src.models.ncf_model import NCF
from src.utils.metrics import batch_eval


def train_epoch(model, dataloader, optimizer, criterion, device):
    model.train()
    running_loss = 0.0
    for users, items, ratings in dataloader:
        users, items, ratings = users.to(device), items.to(device), ratings.to(device)
        optimizer.zero_grad()
        outputs = model(users, items)
        loss = criterion(outputs, ratings)
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * users.size(0)
    return running_loss / len(dataloader.dataset)

def eval_epoch(model, dataloader, criterion, device):
    model.eval()
    running_loss = 0.0
    with torch.no_grad():
        for users, items, ratings in dataloader:
            users, items, ratings = users.to(device), items.to(device), ratings.to(device)
            outputs = model(users, items)
            loss = criterion(outputs, ratings)
            running_loss += loss.item() * users.size(0)
    return running_loss / len(dataloader.dataset)

def generate_top_n_recs(model, train_df, user2idx, item2idx, users, n=10, device="cpu"):
    model.eval()
    top_n = {}
    all_items_set = set(item2idx.values())
    with torch.no_grad():
        for uid in users:
            u_idx = user2idx[uid]
            rated_items = set(train_df[train_df['userId'] == uid]['movieId'].map(item2idx))
            to_predict = list(all_items_set - rated_items)
            if not to_predict:
                top_n[uid] = []
                continue
            user_tensor = torch.tensor([u_idx]*len(to_predict), dtype=torch.long).to(device)
            items_tensor = torch.tensor(to_predict, dtype=torch.long).to(device)
            scores = model(user_tensor, items_tensor)
            top_indices = torch.topk(scores, n).indices
            inv_item2idx = {v: k for k, v in item2idx.items()}
            top_items = [inv_item2idx[to_predict[i.item()]] for i in top_indices]
            top_n[uid] = top_items
    return top_n

# Load and prepare data
full_train = pd.read_csv("../data/sample/train.csv")
test = pd.read_csv("../data/sample/test.csv")
user2idx, item2idx, idx2user, idx2item = create_id_mappings(full_train)
train_df, val_df = train_test_split(full_train, test_size=0.2, random_state=42, stratify=full_train["userId"])
train_dataset = NetflixDataset(train_df, user2idx, item2idx)
val_dataset = NetflixDataset(val_df, user2idx, item2idx)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Hyperparameter grid
embedding_dims = [16, 32, 64]
learning_rates = [1e-3, 5e-4]
hidden_layer_configs = [[64, 32], [128, 64, 32]]

best_val_loss = float('inf')
best_params = None
best_model = None

for emb_dim in embedding_dims:
    for lr in learning_rates:
        for hidden_layers in hidden_layer_configs:
            print(f"Training NCF with embedding_dim={emb_dim}, lr={lr}, hidden_layers={hidden_layers}")
            train_loader = DataLoader(train_dataset, batch_size=1024, shuffle=True, num_workers=2)
            val_loader = DataLoader(val_dataset, batch_size=1024, shuffle=False, num_workers=2)
            model = NCF(len(user2idx), len(item2idx), embedding_dim=emb_dim, hidden_layers=hidden_layers).to(device)
            optimizer = torch.optim.Adam(model.parameters(), lr=lr)
            criterion = nn.MSELoss()

            epochs = 5  # Keep small for tuning speed
            for epoch in range(epochs):
                train_loss = train_epoch(model, train_loader, optimizer, criterion, device)
                val_loss = eval_epoch(model, val_loader, criterion, device)
                print(f"Epoch {epoch+1}/{epochs}: train_loss={train_loss:.4f}, val_loss={val_loss:.4f}")

            if val_loss < best_val_loss:
                best_val_loss = val_loss
                best_params = (emb_dim, lr, hidden_layers)
                best_model = model
                torch.save(best_model.state_dict(), "../model_artifacts/ncf_best.pth")
                print(f"Saved new best model with val_loss: {best_val_loss:.4f}")

print(f"\nBest Validation Loss: {best_val_loss:.4f} with params: embedding_dim={best_params[0]}, lr={best_params[1]}, hidden_layers={best_params[2]}")

# Evaluate on test set
users = test['userId'].unique()
test_dict = test.groupby("userId")["movieId"].apply(list).to_dict()

top_n = generate_top_n_recs(best_model, full_train, user2idx, item2idx, users, n=10, device=device)

prec, recall, ndcg = batch_eval(users, test_dict, top_n, k=10)
print(f"NCF model test: Precision@10={prec:.4f} Recall@10={recall:.4f} NDCG@10={ndcg:.4f}")


Training NCF with embedding_dim=16, lr=0.001, hidden_layers=[64, 32]
Epoch 1/5: train_loss=2.0163, val_loss=1.1303
Epoch 2/5: train_loss=1.2993, val_loss=1.0233
Epoch 3/5: train_loss=1.1873, val_loss=0.9525
Epoch 4/5: train_loss=1.1148, val_loss=0.9139
Epoch 5/5: train_loss=1.0611, val_loss=0.8912
Saved new best model with val_loss: 0.8912
Training NCF with embedding_dim=16, lr=0.001, hidden_layers=[128, 64, 32]
Epoch 1/5: train_loss=1.9459, val_loss=1.1174
Epoch 2/5: train_loss=1.3144, val_loss=0.9986
Epoch 3/5: train_loss=1.2028, val_loss=0.9317
Epoch 4/5: train_loss=1.1236, val_loss=0.8953
Epoch 5/5: train_loss=1.0622, val_loss=0.8741
Saved new best model with val_loss: 0.8741
Training NCF with embedding_dim=16, lr=0.0005, hidden_layers=[64, 32]
Epoch 1/5: train_loss=2.9704, val_loss=1.2477
Epoch 2/5: train_loss=1.4879, val_loss=1.1208
Epoch 3/5: train_loss=1.3581, val_loss=1.0538
Epoch 4/5: train_loss=1.2841, val_loss=1.0014
Epoch 5/5: train_loss=1.2278, val_loss=0.9719
Training NC