# DMF Regression 

In [None]:
import random
import numpy as np
import torch

torch.cuda.empty_cache()

def set_seed(seed: int):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

set_seed(42)

In [2]:

########################################
# Data Preparation
########################################

from sklearn.model_selection import train_test_split
from datasets import load_dataset
import pandas as pd
from torch.utils.data import Dataset, DataLoader
from dmf import *

# Load the MovieLens dataset from Hugging Face
dataset_name = "ashraq/movielens_ratings"
dataset = load_dataset(dataset_name)

print("Dataset Splits:", dataset.keys())

train_full = dataset["train"].to_pandas()[["user_id", "movie_id", "rating"]]
test = dataset["validation"].to_pandas()[["user_id", "movie_id", "rating"]]

train, valid = train_test_split(train_full, test_size=0.2, random_state=42)
train_users = set(train['user_id'].unique())
train_movies = set(train['movie_id'].unique())
valid = valid[
    valid['user_id'].isin(train_users) &
    valid['movie_id'].isin(train_movies)
]
test = test[
    test['user_id'].isin(train_users) &
    test['movie_id'].isin(train_movies)
]

print("Train Shape:", train.shape)
print("Valid Shape:", valid.shape)
print("Test Shape:", test.shape)

all_users = set(train['user_id']).union(valid['user_id']).union(test['user_id'])
all_movies = set(train['movie_id']).union(valid['movie_id']).union(test['movie_id'])
user_id_map = {user: idx for idx, user in enumerate(sorted(all_users))}
movie_id_map = {movie: idx for idx, movie in enumerate(sorted(all_movies))}

num_users = len(user_id_map) 
num_movies = len(movie_id_map) 
print("Number of Users:", num_users)
print("Number of Movies:", num_movies)

interaction_matrix = np.zeros((num_users, num_movies), dtype=np.float32)
# Fill the matrix using the mappings
for _, row in train.iterrows():
    user_idx = user_id_map[int(row['user_id'])]
    movie_idx = movie_id_map[int(row['movie_id'])]
    interaction_matrix[user_idx, movie_idx] = row['rating']

global_interaction = torch.tensor(interaction_matrix)
print("Global Interaction Shape:", global_interaction.shape)

########################################
# Define a PyTorch Dataset
########################################

train_dataset = DMFDataset(train, user_id_map, movie_id_map)
valid_dataset = DMFDataset(valid, user_id_map, movie_id_map)
test_dataset = DMFDataset(test, user_id_map, movie_id_map)


Dataset Splits: dict_keys(['train', 'validation'])
Train Shape: (713105, 3)
Valid Shape: (176002, 3)
Test Shape: (97753, 3)
Number of Users: 42385
Number of Movies: 14559
Global Interaction Shape: torch.Size([42385, 14559])


In [3]:
########################################
# Initilize DMF Model
########################################
import wandb

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = DMFRegressor(num_users=num_users, 
                 num_movies=num_movies,
                 global_interaction=global_interaction,
                 user_embedding_size=32,  
                 item_embedding_size=32,  
                 user_hidden_sizes=[64, 32],  
                 item_hidden_sizes=[64, 32],
                 dropout=0.3,
                 activation="leaky_relu",
                 bn=True,
                 init_method="norm").to(device)



########################################
# Running Training and Evaluation with DMF Model
########################################

batch_size = 256
num_epochs = 30
lr=0.0001
weight_decay=1e-4
patience = 5


wandb = wandb.init(project="FedRec", 
    name="centralized_regression_warm",
    reinit=True, 
    config={
    "batch_size": batch_size,
    "num_epochs": num_epochs,
    "lr": lr,
    "weight_decay": weight_decay,
    "patience": patience
})


# Train the model
train_model_w_early_stopping(model, 
                            train_dataset, 
                            valid_dataset, 
                            device, 
                            batch_size=batch_size, 
                            num_epochs=num_epochs, 
                            lr=lr, 
                            weight_decay=weight_decay, 
                            patience=patience,
                            wandb = wandb, 
                            save_as = "models/centeralized_model.pt") 


[34m[1mwandb[0m: Currently logged in as: [33mhh_upb[0m ([33mhh_upb-paderborn-university[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


                                                                            

Epoch 1/30  Train Loss: 0.3092
Epoch 1/30  Validation Loss: 0.2524
  Validation loss improved. Saving model state.


                                                                            

Epoch 2/30  Train Loss: 0.2649
Epoch 2/30  Validation Loss: 0.2425
  Validation loss improved. Saving model state.


                                                                            

Epoch 3/30  Train Loss: 0.2559
Epoch 3/30  Validation Loss: 0.2363
  Validation loss improved. Saving model state.


                                                                            

Epoch 4/30  Train Loss: 0.2503
Epoch 4/30  Validation Loss: 0.2355
  Validation loss improved. Saving model state.


                                                                            

Epoch 5/30  Train Loss: 0.2465
Epoch 5/30  Validation Loss: 0.2327
  Validation loss improved. Saving model state.


                                                                            

Epoch 6/30  Train Loss: 0.2433
Epoch 6/30  Validation Loss: 0.2261
  Validation loss improved. Saving model state.


                                                                            

Epoch 7/30  Train Loss: 0.2410
Epoch 7/30  Validation Loss: 0.2306
  No improvement in validation loss for 1 epoch(s).


                                                                            

Epoch 8/30  Train Loss: 0.2392
Epoch 8/30  Validation Loss: 0.2279
  No improvement in validation loss for 2 epoch(s).


                                                                            

Epoch 9/30  Train Loss: 0.2376
Epoch 9/30  Validation Loss: 0.2290
  No improvement in validation loss for 3 epoch(s).


                                                                             

Epoch 10/30  Train Loss: 0.2357
Epoch 10/30  Validation Loss: 0.2263
  No improvement in validation loss for 4 epoch(s).


                                                                             

Epoch 11/30  Train Loss: 0.2346
Epoch 11/30  Validation Loss: 0.2273
  No improvement in validation loss for 5 epoch(s).
Early stopping triggered.
Loaded best model state with validation loss: 0.2261
Saved best model state


0,1
epoch,▁▂▂▃▄▅▅▆▇▇█
patience_counter,▁▁▁▁▁▁▁▃▅▆█
train_loss,█▄▃▂▂▂▂▁▁▁▁
val_loss,█▅▄▄▃▁▂▁▂▁▁

0,1
epoch,11.0
patience_counter,4.0
train_loss,0.23461
val_loss,0.2273


In [4]:
# Evaluate the model.

average_loss, mae, rmse, r2 = evaluate_DMFRegressor(model, test_dataset, device, batch_size=batch_size)
 

                                                              

Evaluation - Loss: 0.2278
MAE: 0.6614, RMSE: 0.8827, R^2: 0.2976


