# PyTorch Recommender Notebook

This notebook is a PyTorch-based conversion of the original TensorFlow ANN recommender.

In [1]:
!pip install torch torchvision torchaudio

Collecting torchvision
  Downloading torchvision-0.22.1-cp312-cp312-macosx_11_0_arm64.whl.metadata (6.1 kB)
Collecting torchaudio
  Downloading torchaudio-2.7.1-cp312-cp312-macosx_11_0_arm64.whl.metadata (6.6 kB)
Collecting torch
  Downloading torch-2.7.1-cp312-none-macosx_11_0_arm64.whl.metadata (29 kB)
Collecting sympy>=1.13.3 (from torch)
  Downloading sympy-1.14.0-py3-none-any.whl.metadata (12 kB)
Downloading torchvision-0.22.1-cp312-cp312-macosx_11_0_arm64.whl (1.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.9/1.9 MB[0m [31m3.4 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0mm
[?25hDownloading torch-2.7.1-cp312-none-macosx_11_0_arm64.whl (68.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m68.6/68.6 MB[0m [31m10.7 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hDownloading torchaudio-2.7.1-cp312-cp312-macosx_11_0_arm64.whl (1.8 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.8/1.8 MB[0m [31m10.8 MB/s[0m et

In [2]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from math import sqrt
from sklearn.metrics import mean_squared_error

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

Using device: cpu


In [None]:
rating_url = "https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBM-ML321EN-SkillsNetwork/labs/datasets/ratings.csv"
rating_df = pd.read_csv(rating_url)
rating_df.head()

In [3]:
# Load & encode data
# Assumes you have a DataFrame rating_df with ['user','item','rating']
# Example: rating_df = pd.read_csv('ratings.csv')
rating_df = pd.read_csv('ratings.csv')

users = rating_df['user'].unique().tolist()
items = rating_df['item'].unique().tolist()
user2idx = {u: i for i, u in enumerate(users)}
item2idx = {i: j for j, i in enumerate(items)}

rating_df['u_idx'] = rating_df['user'].map(user2idx)
rating_df['i_idx'] = rating_df['item'].map(item2idx)

# Scale ratings to [0,1]
scaler = MinMaxScaler()
rating_df['r_scaled'] = scaler.fit_transform(rating_df[['rating']])

FileNotFoundError: [Errno 2] No such file or directory: 'ratings.csv'

In [None]:
# Train / Val / Test Split
train_val, test = train_test_split(rating_df, test_size=0.1, random_state=42)
train, val     = train_test_split(train_val, test_size=0.1111, random_state=42)

class CFData(Dataset):
    def __init__(self, df):
        self.users = df['u_idx'].values
        self.items = df['i_idx'].values
        self.rats  = df['r_scaled'].values.astype(np.float32)
    def __len__(self): return len(self.rats)
    def __getitem__(self, i):
        return {
            'user': self.users[i],
            'item': self.items[i],
            'rating': self.rats[i]
        }

batch_size = 256
train_dl = DataLoader(CFData(train), batch_size=batch_size, shuffle=True)
val_dl   = DataLoader(CFData(val),   batch_size=batch_size)
test_dl  = DataLoader(CFData(test),  batch_size=batch_size)

In [None]:
class RecommenderNet(nn.Module):
    def __init__(self, n_users, n_items, emb_size=16):
        super().__init__()
        self.user_emb = nn.Embedding(n_users, emb_size)
        self.item_emb = nn.Embedding(n_items, emb_size)
        self.user_bias = nn.Embedding(n_users, 1)
        self.item_bias = nn.Embedding(n_items, 1)

    def forward(self, user_idx, item_idx):
        u = self.user_emb(user_idx)
        i = self.item_emb(item_idx)
        b_u = self.user_bias(user_idx).squeeze()
        b_i = self.item_bias(item_idx).squeeze()
        dot = (u * i).sum(dim=1)
        return dot + b_u + b_i

# Instantiate model
n_users = len(users)
n_items = len(items)
model = RecommenderNet(n_users, n_items, emb_size=16).to(device)

criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)

In [None]:
# Training Loop
def run_epoch(dataloader, train=True):
    epoch_loss = 0
    if train: model.train()
    else:     model.eval()

    for batch in dataloader:
        u = batch['user'].to(device)
        i = batch['item'].to(device)
        r = batch['rating'].to(device)

        preds = model(u, i)
        loss = criterion(preds, r)

        if train:
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        epoch_loss += loss.item() * len(r)

    return sqrt(epoch_loss / len(dataloader.dataset))

n_epochs = 20
for epoch in range(1, n_epochs+1):
    train_rmse = run_epoch(train_dl, train=True)
    val_rmse   = run_epoch(val_dl,   train=False)
    print(f"Epoch {epoch:02d}  Train RMSE: {train_rmse:.4f}  Val RMSE: {val_rmse:.4f}")

In [None]:
# Test Evaluation
model.eval()
all_preds, all_trues = [], []
with torch.no_grad():
    for batch in test_dl:
        u = batch['user'].to(device)
        i = batch['item'].to(device)
        r = batch['rating'].cpu().numpy()
        preds = model(u, i).cpu().numpy()
        all_preds.append(preds)
        all_trues.append(r)

y_pred = np.concatenate(all_preds)
y_true = np.concatenate(all_trues)

# Inverse scale
y_pred = scaler.inverse_transform(y_pred.reshape(-1,1)).ravel()
y_true = scaler.inverse_transform(y_true.reshape(-1,1)).ravel()

test_rmse = sqrt(mean_squared_error(y_true, y_pred))
print(f"Test RMSE: {test_rmse:.4f}")