In [1]:
import random
import numpy as np
import torch


def set_seed(seed: int):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

set_seed(42)


In [2]:
########################################
# Data Preparation
########################################

from datasets import load_dataset
import pandas as pd
from flwr_datasets import FederatedDataset
from flwr_datasets.partitioner import IidPartitioner


dataset_name =  "ashraq/movielens_ratings"
partitioner = IidPartitioner(num_partitions=5)           
fds = FederatedDataset(dataset=dataset_name,
                       partitioners={"train": partitioner})


partition_0 = fds.load_partition(0, "train")
partition_1 = fds.load_partition(1, "train")
partition_2 = fds.load_partition(2, "train")
partition_3 = fds.load_partition(3, "train")
partition_4 = fds.load_partition(4, "train")


train_0 = partition_0.to_pandas()[["user_id", "movie_id", "rating"]]
train_1 = partition_1.to_pandas()[["user_id", "movie_id", "rating"]]
train_2 = partition_2.to_pandas()[["user_id", "movie_id", "rating"]]
train_3 = partition_3.to_pandas()[["user_id", "movie_id", "rating"]]
train_4 = partition_4.to_pandas()[["user_id", "movie_id", "rating"]]

test = fds.load_split("validation").to_pandas()[["user_id", "movie_id", "rating"]]

print("Train_0 Shape:", train_0.shape)    
print("Train_1 Shape:", train_1.shape)
print("Train_2 Shape:", train_2.shape)
print("Train_3 Shape:", train_3.shape)
print("Train_4 Shape:", train_4.shape)
print("Test Shape:", test.shape)



print(len(set(train_0['user_id']).union(test['user_id'])))
print(len(set(train_1['user_id']).union(test['user_id'])))
print(len(set(train_2['user_id']).union(test['user_id'])))
print(len(set(train_3['user_id']).union(test['user_id'])))
print(len(set(train_4['user_id']).union(test['user_id'])))




Train_0 Shape: (178277, 3)
Train_1 Shape: (178277, 3)
Train_2 Shape: (178276, 3)
Train_3 Shape: (178276, 3)
Train_4 Shape: (178276, 3)
Test Shape: (99043, 3)
35534
35501
35529
35582
35436


In [3]:
from dmf import *
from sklearn.model_selection import train_test_split


eval_results = []

# Loop over the five training partitions.
for i in range(5):
    print(f"\n==================== Processing train_{i} ==================")
    
    train_full= globals()[f"train_{i}"]  
    train, valid = train_test_split(train_full, test_size=0.2, random_state=42)
    
    train_users = set(train['user_id'].unique())
    train_movies = set(train['movie_id'].unique())
    valid = valid[
        valid['user_id'].isin(train_users) &
        valid['movie_id'].isin(train_movies)
    ]
    test = test[
        test['user_id'].isin(train_users) &
        test['movie_id'].isin(train_movies)
    ]

    all_users = set(train['user_id']).union(valid['user_id']).union(test['user_id'])
    all_movies = set(train['movie_id']).union(valid['movie_id']).union(test['movie_id'])
    
    user_id_map = {user: idx for idx, user in enumerate(sorted(all_users))}
    movie_id_map = {movie: idx for idx, movie in enumerate(sorted(all_movies))}
    
    num_users = len(user_id_map)
    num_movies = len(movie_id_map)
    
    print("Number of Users:", num_users)
    print("Number of Movies:", num_movies)
    
    # Build a dense interaction matrix from the training data using mapped indices.
    # Each row corresponds to a user and each column to a movie.
    interaction_matrix = np.zeros((num_users, num_movies), dtype=np.float32)
    for _, row in train.iterrows():
        user_idx = user_id_map[int(row['user_id'])]
        movie_idx = movie_id_map[int(row['movie_id'])]
        interaction_matrix[user_idx, movie_idx] = row['rating']
    
    global_interaction = torch.tensor(interaction_matrix)
    print("Global Interaction Shape:", global_interaction.shape)
        
    ########################################
    # Define Datasets and DataLoaders
    ########################################

    train_dataset = DMFDataset(train, user_id_map, movie_id_map)
    valid_dataset = DMFDataset(valid, user_id_map, movie_id_map)
    test_dataset = DMFDataset(test, user_id_map, movie_id_map)
    
    ########################################
    # Initialize DMF Model
    ########################################
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    
    # Initialize the DMFRegressor with the mapped number of users and movies,
    # and pass the global_interaction tensor.
    model = DMFRegressor(
        num_users=num_users, 
        num_movies=num_movies,
        global_interaction=global_interaction,
        user_embedding_size=32,  
        item_embedding_size=32,  
        user_hidden_sizes=[64, 32],  
        item_hidden_sizes=[64, 32],
        dropout=0.3,
        activation="leaky_relu",
        bn=True,
        init_method="norm"
    ).to(device)
    
    ########################################
    # Train and Evaluate the Model
    ########################################
    # Training parameters.
    batch_size = 256
    num_epochs = 30
    lr = 0.0001
    weight_decay = 1e-4
    patience = 5

    run_name = f"local_regression_{i}"  

    # If there is an active wandb run, finish it.
    if wandb.run is not None:
        wandb.finish() 

    # Initialize a new wandb run.
    wandb.init(
        project="FedRec", 
        name=run_name,
        reinit=True, 
        config={
            "batch_size": batch_size,
            "num_epochs": num_epochs,
            "lr": lr,
            "weight_decay": weight_decay,
            "patience": patience
        }
    )
    
    # Train the model.
    train_model_w_early_stopping(
        model, 
        train_dataset, 
        valid_dataset, 
        device, 
        batch_size=batch_size, 
        num_epochs=num_epochs, 
        lr=lr, 
        weight_decay=weight_decay, 
        patience=patience,
        wandb=wandb,
        save_as=f"models/local_{i}.pt"
    )
    
    # Evaluate the model.
    average_loss, mae, rmse, r2 = evaluate_DMFRegressor(model, test_dataset, device, batch_size=batch_size)

    # Append evaluation results.
    eval_results.append({
        "partition": i,
        "average_loss": average_loss,
        "mae": mae,
        "rmse": rmse,
        "r2": r2
    })
    
    wandb.finish()
    print(f"\n==================================================================")



Number of Users: 29146
Number of Movies: 8507
Global Interaction Shape: torch.Size([29146, 8507])
First row of global interaction:
 tensor([0., 0., 0.,  ..., 0., 0., 0.])


[34m[1mwandb[0m: Currently logged in as: [33mhh_upb[0m ([33mhh_upb-paderborn-university[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


                                                                          

Epoch 1/50  Train Loss: 0.4385
Epoch 1/50  Validation Loss: 0.4186
  Validation loss improved. Saving model state.


                                                                          

Epoch 2/50  Train Loss: 0.3316
Epoch 2/50  Validation Loss: 0.3258
  Validation loss improved. Saving model state.


                                                                          

Epoch 3/50  Train Loss: 0.3100
Epoch 3/50  Validation Loss: 0.2986
  Validation loss improved. Saving model state.


                                                                          

Epoch 4/50  Train Loss: 0.2964
Epoch 4/50  Validation Loss: 0.2820
  Validation loss improved. Saving model state.


                                                                          

Epoch 5/50  Train Loss: 0.2880
Epoch 5/50  Validation Loss: 0.2924
  No improvement in validation loss for 1 epoch(s).


                                                                          

Epoch 6/50  Train Loss: 0.2810
Epoch 6/50  Validation Loss: 0.2827
  No improvement in validation loss for 2 epoch(s).


                                                                          

Epoch 7/50  Train Loss: 0.2748
Epoch 7/50  Validation Loss: 0.2721
  Validation loss improved. Saving model state.


                                                                          

Epoch 8/50  Train Loss: 0.2691
Epoch 8/50  Validation Loss: 0.2847
  No improvement in validation loss for 1 epoch(s).


                                                                          

Epoch 9/50  Train Loss: 0.2664
Epoch 9/50  Validation Loss: 0.2776
  No improvement in validation loss for 2 epoch(s).


                                                                           

Epoch 10/50  Train Loss: 0.2621
Epoch 10/50  Validation Loss: 0.2780
  No improvement in validation loss for 3 epoch(s).


                                                                           

Epoch 11/50  Train Loss: 0.2610
Epoch 11/50  Validation Loss: 0.2775
  No improvement in validation loss for 4 epoch(s).


                                                                           

Epoch 12/50  Train Loss: 0.2574
Epoch 12/50  Validation Loss: 0.2770
  No improvement in validation loss for 5 epoch(s).
Early stopping triggered.
Loaded best model state with validation loss: 0.2721
Saved best model state


0,1
epoch,▁▂▂▃▄▄▅▅▆▇▇█
patience_counter,▁▁▁▁▁▃▅▁▃▅▆█
train_loss,█▄▃▃▂▂▂▁▁▁▁▁
val_loss,█▄▂▁▂▂▁▂▁▁▁▁

0,1
epoch,12.0
patience_counter,4.0
train_loss,0.25742
val_loss,0.27699


                                                              

Evaluation - Loss: 0.2748
MAE: 0.7599, RMSE: 1.0102, R^2: 0.0642


Number of Users: 29081
Number of Movies: 8487
Global Interaction Shape: torch.Size([29081, 8487])
First row of global interaction:
 tensor([0., 0., 0.,  ..., 0., 0., 0.])


                                                                          

Epoch 1/50  Train Loss: 0.4422
Epoch 1/50  Validation Loss: 0.3704
  Validation loss improved. Saving model state.


                                                                          

Epoch 2/50  Train Loss: 0.3265
Epoch 2/50  Validation Loss: 0.2973
  Validation loss improved. Saving model state.


                                                                          

Epoch 3/50  Train Loss: 0.3002
Epoch 3/50  Validation Loss: 0.2885
  Validation loss improved. Saving model state.


                                                                          

Epoch 4/50  Train Loss: 0.2898
Epoch 4/50  Validation Loss: 0.2945
  No improvement in validation loss for 1 epoch(s).


                                                                          

Epoch 5/50  Train Loss: 0.2804
Epoch 5/50  Validation Loss: 0.2723
  Validation loss improved. Saving model state.


                                                                          

Epoch 6/50  Train Loss: 0.2756
Epoch 6/50  Validation Loss: 0.2800
  No improvement in validation loss for 1 epoch(s).


                                                                          

Epoch 7/50  Train Loss: 0.2693
Epoch 7/50  Validation Loss: 0.2682
  Validation loss improved. Saving model state.


                                                                          

Epoch 8/50  Train Loss: 0.2659
Epoch 8/50  Validation Loss: 0.2665
  Validation loss improved. Saving model state.


                                                                          

Epoch 9/50  Train Loss: 0.2613
Epoch 9/50  Validation Loss: 0.2838
  No improvement in validation loss for 1 epoch(s).


                                                                           

Epoch 10/50  Train Loss: 0.2596
Epoch 10/50  Validation Loss: 0.2729
  No improvement in validation loss for 2 epoch(s).


                                                                           

Epoch 11/50  Train Loss: 0.2568
Epoch 11/50  Validation Loss: 0.2652
  Validation loss improved. Saving model state.


                                                                           

Epoch 12/50  Train Loss: 0.2550
Epoch 12/50  Validation Loss: 0.2712
  No improvement in validation loss for 1 epoch(s).


                                                                           

Epoch 13/50  Train Loss: 0.2528
Epoch 13/50  Validation Loss: 0.2694
  No improvement in validation loss for 2 epoch(s).


                                                                           

Epoch 14/50  Train Loss: 0.2504
Epoch 14/50  Validation Loss: 0.2656
  No improvement in validation loss for 3 epoch(s).


                                                                           

Epoch 15/50  Train Loss: 0.2481
Epoch 15/50  Validation Loss: 0.2592
  Validation loss improved. Saving model state.


                                                                           

Epoch 16/50  Train Loss: 0.2477
Epoch 16/50  Validation Loss: 0.2682
  No improvement in validation loss for 1 epoch(s).


                                                                           

Epoch 17/50  Train Loss: 0.2460
Epoch 17/50  Validation Loss: 0.2773
  No improvement in validation loss for 2 epoch(s).


                                                                           

Epoch 18/50  Train Loss: 0.2446
Epoch 18/50  Validation Loss: 0.2724
  No improvement in validation loss for 3 epoch(s).


                                                                           

Epoch 19/50  Train Loss: 0.2425
Epoch 19/50  Validation Loss: 0.2646
  No improvement in validation loss for 4 epoch(s).


                                                                           

Epoch 20/50  Train Loss: 0.2414
Epoch 20/50  Validation Loss: 0.2799
  No improvement in validation loss for 5 epoch(s).
Early stopping triggered.
Loaded best model state with validation loss: 0.2592
Saved best model state


0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
patience_counter,▁▁▁▁▃▁▃▁▁▃▅▁▃▅▆▁▃▅▆█
train_loss,█▄▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁
val_loss,█▃▃▃▂▂▂▁▃▂▁▂▂▁▁▂▂▂▁▂

0,1
epoch,20.0
patience_counter,4.0
train_loss,0.24136
val_loss,0.27991


                                                              

Evaluation - Loss: 0.2768
MAE: 0.7647, RMSE: 1.0199, R^2: 0.0341


Number of Users: 29097
Number of Movies: 8468
Global Interaction Shape: torch.Size([29097, 8468])
First row of global interaction:
 tensor([0., 0., 0.,  ..., 0., 0., 0.])


                                                                          

Epoch 1/50  Train Loss: 0.4297
Epoch 1/50  Validation Loss: 0.3543
  Validation loss improved. Saving model state.


                                                                          

Epoch 2/50  Train Loss: 0.3280
Epoch 2/50  Validation Loss: 0.3212
  Validation loss improved. Saving model state.


                                                                          

Epoch 3/50  Train Loss: 0.3071
Epoch 3/50  Validation Loss: 0.2974
  Validation loss improved. Saving model state.


                                                                          

Epoch 4/50  Train Loss: 0.2960
Epoch 4/50  Validation Loss: 0.2821
  Validation loss improved. Saving model state.


                                                                          

Epoch 5/50  Train Loss: 0.2870
Epoch 5/50  Validation Loss: 0.2803
  Validation loss improved. Saving model state.


                                                                          

Epoch 6/50  Train Loss: 0.2807
Epoch 6/50  Validation Loss: 0.2686
  Validation loss improved. Saving model state.


                                                                          

Epoch 7/50  Train Loss: 0.2764
Epoch 7/50  Validation Loss: 0.2864
  No improvement in validation loss for 1 epoch(s).


                                                                          

Epoch 8/50  Train Loss: 0.2690
Epoch 8/50  Validation Loss: 0.2785
  No improvement in validation loss for 2 epoch(s).


                                                                          

Epoch 9/50  Train Loss: 0.2668
Epoch 9/50  Validation Loss: 0.2865
  No improvement in validation loss for 3 epoch(s).


                                                                           

Epoch 10/50  Train Loss: 0.2633
Epoch 10/50  Validation Loss: 0.2962
  No improvement in validation loss for 4 epoch(s).


                                                                           

Epoch 11/50  Train Loss: 0.2615
Epoch 11/50  Validation Loss: 0.2718
  No improvement in validation loss for 5 epoch(s).
Early stopping triggered.
Loaded best model state with validation loss: 0.2686
Saved best model state


0,1
epoch,▁▂▂▃▄▅▅▆▇▇█
patience_counter,▁▁▁▁▁▁▁▃▅▆█
train_loss,█▄▃▂▂▂▂▁▁▁▁
val_loss,█▅▃▂▂▁▂▂▂▃▁

0,1
epoch,11.0
patience_counter,4.0
train_loss,0.26148
val_loss,0.27185


                                                              

Evaluation - Loss: 0.2636
MAE: 0.7378, RMSE: 0.9731, R^2: 0.1144


Number of Users: 29194
Number of Movies: 8509
Global Interaction Shape: torch.Size([29194, 8509])
First row of global interaction:
 tensor([0., 0., 0.,  ..., 0., 0., 0.])


                                                                          

Epoch 1/50  Train Loss: 0.4285
Epoch 1/50  Validation Loss: 0.3659
  Validation loss improved. Saving model state.


                                                                          

Epoch 2/50  Train Loss: 0.3296
Epoch 2/50  Validation Loss: 0.3334
  Validation loss improved. Saving model state.


                                                                          

Epoch 3/50  Train Loss: 0.3088
Epoch 3/50  Validation Loss: 0.3164
  Validation loss improved. Saving model state.


                                                                          

Epoch 4/50  Train Loss: 0.2965
Epoch 4/50  Validation Loss: 0.2943
  Validation loss improved. Saving model state.


                                                                          

Epoch 5/50  Train Loss: 0.2903
Epoch 5/50  Validation Loss: 0.3017
  No improvement in validation loss for 1 epoch(s).


                                                                          

Epoch 6/50  Train Loss: 0.2829
Epoch 6/50  Validation Loss: 0.3015
  No improvement in validation loss for 2 epoch(s).


                                                                          

Epoch 7/50  Train Loss: 0.2757
Epoch 7/50  Validation Loss: 0.2842
  Validation loss improved. Saving model state.


                                                                          

Epoch 8/50  Train Loss: 0.2736
Epoch 8/50  Validation Loss: 0.2901
  No improvement in validation loss for 1 epoch(s).


                                                                          

Epoch 9/50  Train Loss: 0.2681
Epoch 9/50  Validation Loss: 0.2961
  No improvement in validation loss for 2 epoch(s).


                                                                           

Epoch 10/50  Train Loss: 0.2633
Epoch 10/50  Validation Loss: 0.2927
  No improvement in validation loss for 3 epoch(s).


                                                                           

Epoch 11/50  Train Loss: 0.2604
Epoch 11/50  Validation Loss: 0.2953
  No improvement in validation loss for 4 epoch(s).


                                                                           

Epoch 12/50  Train Loss: 0.2593
Epoch 12/50  Validation Loss: 0.2886
  No improvement in validation loss for 5 epoch(s).
Early stopping triggered.
Loaded best model state with validation loss: 0.2842
Saved best model state


0,1
epoch,▁▂▂▃▄▄▅▅▆▇▇█
patience_counter,▁▁▁▁▁▃▅▁▃▅▆█
train_loss,█▄▃▃▂▂▂▂▁▁▁▁
val_loss,█▅▄▂▃▂▁▂▂▂▂▁

0,1
epoch,12.0
patience_counter,4.0
train_loss,0.25933
val_loss,0.28857


                                                              

Evaluation - Loss: 0.2759
MAE: 0.7641, RMSE: 1.0075, R^2: 0.0454


Number of Users: 29017
Number of Movies: 8511
Global Interaction Shape: torch.Size([29017, 8511])
First row of global interaction:
 tensor([0., 0., 0.,  ..., 0., 0., 0.])


                                                                          

Epoch 1/50  Train Loss: 0.4359
Epoch 1/50  Validation Loss: 0.3919
  Validation loss improved. Saving model state.


                                                                          

Epoch 2/50  Train Loss: 0.3325
Epoch 2/50  Validation Loss: 0.3257
  Validation loss improved. Saving model state.


                                                                          

Epoch 3/50  Train Loss: 0.3096
Epoch 3/50  Validation Loss: 0.3126
  Validation loss improved. Saving model state.


                                                                          

Epoch 4/50  Train Loss: 0.2984
Epoch 4/50  Validation Loss: 0.2946
  Validation loss improved. Saving model state.


                                                                          

Epoch 5/50  Train Loss: 0.2864
Epoch 5/50  Validation Loss: 0.2806
  Validation loss improved. Saving model state.


                                                                          

Epoch 6/50  Train Loss: 0.2810
Epoch 6/50  Validation Loss: 0.2932
  No improvement in validation loss for 1 epoch(s).


                                                                          

Epoch 7/50  Train Loss: 0.2750
Epoch 7/50  Validation Loss: 0.2829
  No improvement in validation loss for 2 epoch(s).


                                                                          

Epoch 8/50  Train Loss: 0.2710
Epoch 8/50  Validation Loss: 0.2737
  Validation loss improved. Saving model state.


                                                                          

Epoch 9/50  Train Loss: 0.2666
Epoch 9/50  Validation Loss: 0.2919
  No improvement in validation loss for 1 epoch(s).


                                                                           

Epoch 10/50  Train Loss: 0.2630
Epoch 10/50  Validation Loss: 0.2924
  No improvement in validation loss for 2 epoch(s).


                                                                           

Epoch 11/50  Train Loss: 0.2612
Epoch 11/50  Validation Loss: 0.2747
  No improvement in validation loss for 3 epoch(s).


                                                                           

Epoch 12/50  Train Loss: 0.2593
Epoch 12/50  Validation Loss: 0.2719
  Validation loss improved. Saving model state.


                                                                           

Epoch 13/50  Train Loss: 0.2570
Epoch 13/50  Validation Loss: 0.2771
  No improvement in validation loss for 1 epoch(s).


                                                                           

Epoch 14/50  Train Loss: 0.2546
Epoch 14/50  Validation Loss: 0.2797
  No improvement in validation loss for 2 epoch(s).


                                                                           

Epoch 15/50  Train Loss: 0.2528
Epoch 15/50  Validation Loss: 0.2887
  No improvement in validation loss for 3 epoch(s).


                                                                           

Epoch 16/50  Train Loss: 0.2502
Epoch 16/50  Validation Loss: 0.2903
  No improvement in validation loss for 4 epoch(s).


                                                                           

Epoch 17/50  Train Loss: 0.2485
Epoch 17/50  Validation Loss: 0.2887
  No improvement in validation loss for 5 epoch(s).
Early stopping triggered.
Loaded best model state with validation loss: 0.2719
Saved best model state


0,1
epoch,▁▁▂▂▃▃▄▄▅▅▅▆▆▇▇██
patience_counter,▁▁▁▁▁▁▃▅▁▃▅▆▁▃▅▆█
train_loss,█▄▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁
val_loss,█▄▃▂▂▂▂▁▂▂▁▁▁▁▂▂▂

0,1
epoch,17.0
patience_counter,4.0
train_loss,0.24846
val_loss,0.28866


                                                              

Evaluation - Loss: 0.2718
MAE: 0.7561, RMSE: 0.9921, R^2: 0.0685





In [6]:
# display evaluation results
eval_df = pd.DataFrame(eval_results)
display(eval_df)

Unnamed: 0,partition,average_loss,mae,rmse,r2
0,0,0.274781,0.759903,1.01017,0.064155
1,1,0.276759,0.764737,1.01994,0.034093
2,2,0.263644,0.737837,0.973077,0.114442
3,3,0.27594,0.76414,1.007528,0.045387
4,4,0.271848,0.756101,0.992086,0.068458
