## Basic Model building

In [38]:
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
import numpy as np
from ast import literal_eval

# Load dataset
df = pd.read_csv('merged_data.csv')

  df = pd.read_csv('merged_data.csv')


In [39]:
# Convert string representation of list to actual list for Genres column
df['Genres'] = df['Genres'].apply(literal_eval)

# Step 1: Zero-Index all categorical features
df["UserID"] = df["UserID"] - 1  # Zero-index UserID
df["MovieID"] = df["MovieID"] - 1  # Zero-index MovieID
df["Gender"] = df["Gender"].astype('category').cat.codes  # Zero-index Gender
df["Occupation"] = df["Occupation"].astype('category').cat.codes  # Zero-index Occupation


In [40]:
# Step 2: Split data by user age group or any criteria you want to simulate clients
age_groups = df['Age'].unique()
client_datasets = {}

# Create simulated client datasets based on age groups
for age in age_groups:
    client_data = df[df['Age'] == age]
    client_datasets[f"client_{age}"] = client_data

# Get all unique genres from the dataset
all_genres = set()
for genres in df['Genres']:
    all_genres.update(genres)
all_genres = sorted(all_genres)
num_genres = len(all_genres)

# Create genre mapping
genre_to_idx = {genre: i for i, genre in enumerate(all_genres)}

In [41]:
# Example Federated Recommender Model using PyTorch
class FederatedRecommender(nn.Module):
    def __init__(self, num_users, num_movies, num_genders, num_occupations, num_genres, embedding_dim=10):
        super(FederatedRecommender, self).__init__()
        
        # Embeddings for each feature
        self.user_embedding = nn.Embedding(num_users, embedding_dim)
        self.movie_embedding = nn.Embedding(num_movies, embedding_dim)
        self.gender_embedding = nn.Embedding(num_genders, embedding_dim)
        self.occupation_embedding = nn.Embedding(num_occupations, embedding_dim)
        
        # Genre embeddings - we'll use a linear layer
        self.genre_projection = nn.Linear(num_genres, embedding_dim)
        
        # Fully connected layers for rating prediction
        self.fc1 = nn.Linear(embedding_dim * 5, 128)  # 5 features: user, movie, gender, occupation, genres
        self.fc2 = nn.Linear(128, 1)

    def forward(self, user, movie, gender, occupation, genres):
        # Embedding lookup
        user_embedded = self.user_embedding(user)
        movie_embedded = self.movie_embedding(movie)
        gender_embedded = self.gender_embedding(gender)
        occupation_embedded = self.occupation_embedding(occupation)
        
        # Process genres - project binary flags to embedding space
        genre_embedded = self.genre_projection(genres.float())
        
        # Concatenate all embeddings
        all_embeddings = torch.cat([
            user_embedded, 
            movie_embedded, 
            gender_embedded, 
            occupation_embedded,
            genre_embedded
        ], dim=-1)
        
        # Pass through fully connected layers
        x = torch.relu(self.fc1(all_embeddings))
        rating = self.fc2(x)
        return rating.squeeze()


In [42]:
# Federated learning training function for each client
def train_local_model(client_data, model, epochs=5, batch_size=64, lr=0.001):
    # Prepare the data
    user = torch.tensor(client_data['UserID'].values, dtype=torch.long)
    movie = torch.tensor(client_data['MovieID'].values, dtype=torch.long)
    gender = torch.tensor(client_data['Gender'].values, dtype=torch.long)
    occupation = torch.tensor(client_data['Occupation'].values, dtype=torch.long)
    
    # Create genre vectors
    genre_vectors = []
    for genres in client_data['Genres']:
        vec = torch.zeros(num_genres)
        for genre in genres:
            vec[genre_to_idx[genre]] = 1
        genre_vectors.append(vec)
    genres = torch.stack(genre_vectors)
    
    ratings = torch.tensor(client_data['Rating'].values, dtype=torch.float32)
    
    # Initialize the optimizer and loss function
    optimizer = optim.Adam(model.parameters(), lr=lr)
    criterion = nn.MSELoss()

    # Training loop
    for epoch in range(epochs):
        model.train()
        optimizer.zero_grad()
        
        # Forward pass
        predictions = model(user, movie, gender, occupation, genres)
        
        # Compute loss
        loss = criterion(predictions, ratings)
        
        # Backward pass and optimization
        loss.backward()
        optimizer.step()
        
        print(f"Client Training Epoch {epoch+1}/{epochs} - Loss: {loss.item():.4f}")
    
    return model, loss.item()

In [43]:
# Federated learning training function for each client
def train_local_model(client_data, model, epochs=5, batch_size=64, lr=0.001):
    # Prepare the data
    user = torch.tensor(client_data['UserID'].values, dtype=torch.long)
    movie = torch.tensor(client_data['MovieID'].values, dtype=torch.long)
    gender = torch.tensor(client_data['Gender'].values, dtype=torch.long)
    occupation = torch.tensor(client_data['Occupation'].values, dtype=torch.long)
    
    # Create genre vectors
    genre_vectors = []
    for genres in client_data['Genres']:
        vec = torch.zeros(num_genres)
        for genre in genres:
            vec[genre_to_idx[genre]] = 1
        genre_vectors.append(vec)
    genres = torch.stack(genre_vectors)
    
    ratings = torch.tensor(client_data['Rating'].values, dtype=torch.float32)
    
    # Initialize the optimizer and loss function
    optimizer = optim.Adam(model.parameters(), lr=lr)
    criterion = nn.MSELoss()

    # Training loop
    for epoch in range(epochs):
        model.train()
        optimizer.zero_grad()
        
        # Forward pass
        predictions = model(user, movie, gender, occupation, genres)
        
        # Compute loss
        loss = criterion(predictions, ratings)
        
        # Backward pass and optimization
        loss.backward()
        optimizer.step()
        
        print(f"Client Training Epoch {epoch+1}/{epochs} - Loss: {loss.item():.4f}")
    
    return model, loss.item()

In [44]:
# Step 4: Federated Learning Training
local_models = []
validation_losses = []

# Get the maximum values for each categorical feature
num_users = df['UserID'].max() + 1
num_movies = df['MovieID'].max() + 1
num_genders = df['Gender'].max() + 1
num_occupations = df['Occupation'].max() + 1

for age in age_groups:
    age_key = f"client_{int(age)}"
    if age_key not in client_datasets:
        print(f"Skipping {age_key}, no data available.")
        continue
    
    # Initialize the model for this client
    model = FederatedRecommender(
        num_users=num_users, 
        num_movies=num_movies, 
        num_genders=num_genders, 
        num_occupations=num_occupations, 
        num_genres=num_genres
    )
    
    # Train model locally for the current client
    trained_model, val_loss = train_local_model(client_datasets[age_key], model)
    
    local_models.append(trained_model)
    validation_losses.append(val_loss)

# Step 5: Display training completion
print(f"✅ Federated Learning Training Completed!")
print(f"📊 Validation Loss per Client: {validation_losses}")

Client Training Epoch 1/5 - Loss: 14.4215
Client Training Epoch 2/5 - Loss: 13.9146
Client Training Epoch 3/5 - Loss: 13.4185
Client Training Epoch 4/5 - Loss: 12.9325
Client Training Epoch 5/5 - Loss: 12.4560
Client Training Epoch 1/5 - Loss: 14.1718
Client Training Epoch 2/5 - Loss: 13.7101
Client Training Epoch 3/5 - Loss: 13.2568
Client Training Epoch 4/5 - Loss: 12.8118
Client Training Epoch 5/5 - Loss: 12.3747
Client Training Epoch 1/5 - Loss: 14.0486
Client Training Epoch 2/5 - Loss: 13.5297
Client Training Epoch 3/5 - Loss: 13.0224
Client Training Epoch 4/5 - Loss: 12.5263
Client Training Epoch 5/5 - Loss: 12.0411
Client Training Epoch 1/5 - Loss: 14.8650
Client Training Epoch 2/5 - Loss: 14.4458
Client Training Epoch 3/5 - Loss: 14.0341
Client Training Epoch 4/5 - Loss: 13.6297
Client Training Epoch 5/5 - Loss: 13.2321
Client Training Epoch 1/5 - Loss: 15.1804
Client Training Epoch 2/5 - Loss: 14.7104
Client Training Epoch 3/5 - Loss: 14.2493
Client Training Epoch 4/5 - Loss: 

### Federated learning model is training well on each client. The loss is steadily decreasing across epochs, which indicates that the model is learning effectively