## Basic Model building

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
import numpy as np
from ast import literal_eval

# Load dataset
df = pd.read_csv('merged_data.csv')

  df = pd.read_csv('merged_data.csv')


In [2]:
# Convert string representation of list to actual list for Genres column
df['Genres'] = df['Genres'].apply(literal_eval)

# Step 1: Zero-Index all categorical features
df["UserID"] = df["UserID"] - 1  # Zero-index UserID
df["MovieID"] = df["MovieID"] - 1  # Zero-index MovieID
df["Gender"] = df["Gender"].astype('category').cat.codes  # Zero-index Gender
df["Occupation"] = df["Occupation"].astype('category').cat.codes  # Zero-index Occupation


In [3]:
# Step 2: Split data by user age group or any criteria you want to simulate clients
age_groups = df['Age'].unique()
client_datasets = {}

# Create simulated client datasets based on age groups
for age in age_groups:
    client_data = df[df['Age'] == age]
    client_datasets[f"client_{age}"] = client_data

# Get all unique genres from the dataset
all_genres = set()
for genres in df['Genres']:
    all_genres.update(genres)
all_genres = sorted(all_genres)
num_genres = len(all_genres)

# Create genre mapping
genre_to_idx = {genre: i for i, genre in enumerate(all_genres)}

In [4]:
import torch
import torch.nn as nn

class FederatedRecommender(nn.Module):
    def __init__(self, num_movies, num_genders, num_occupations, num_genres, num_age_groups, embedding_dim=10):
        super(FederatedRecommender, self).__init__()

        self.age_embedding = nn.Embedding(num_age_groups, embedding_dim)
        self.movie_embedding = nn.Embedding(num_movies, embedding_dim)
        self.gender_embedding = nn.Embedding(num_genders, embedding_dim)
        self.occupation_embedding = nn.Embedding(num_occupations, embedding_dim)
        
        self.genre_projection = nn.Linear(num_genres, embedding_dim)

        self.fc1 = nn.Linear(embedding_dim * 5, 128)  # age, movie, gender, occupation, genres
        self.fc2 = nn.Linear(128, 1)

    def forward(self, age_group, movie, gender, occupation, genres):
        age_embedded = self.age_embedding(age_group)
        movie_embedded = self.movie_embedding(movie)
        gender_embedded = self.gender_embedding(gender)
        occupation_embedded = self.occupation_embedding(occupation)
        genre_embedded = self.genre_projection(genres.float())

        all_embeddings = torch.cat([
            age_embedded,
            movie_embedded,
            gender_embedded,
            occupation_embedded,
            genre_embedded
        ], dim=-1)

        x = torch.relu(self.fc1(all_embeddings))
        rating = self.fc2(x)
        return rating


In [5]:
def train_local(model, dataloader, criterion, optimizer, device):
    model.train()
    total_loss = 0
    for batch in dataloader:
        age_group, movie, gender, occupation, genres, rating = [x.to(device) for x in batch]
        optimizer.zero_grad()
        outputs = model(age_group, movie, gender, occupation, genres)
        loss = criterion(outputs.squeeze(), rating.float())
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    return total_loss / len(dataloader)


def average_models(global_model, client_models):
    global_dict = global_model.state_dict()
    for key in global_dict.keys():
        global_dict[key] = torch.stack([client.state_dict()[key].float() for client in client_models], 0).mean(0)
    global_model.load_state_dict(global_dict)
    return global_model


def federated_train(global_model, client_loaders, num_rounds=5, lr=0.001, device="cpu"):
    criterion = nn.MSELoss()

    for round in range(num_rounds):
        client_models = []
        print(f"--- Federated Round {round + 1} ---")
        
        for i, loader in enumerate(client_loaders):
            model_copy = copy.deepcopy(global_model)
            optimizer = torch.optim.Adam(model_copy.parameters(), lr=lr)
            loss = train_local(model_copy, loader, criterion, optimizer, device)
            print(f"Client {i} Loss: {loss:.4f}")
            client_models.append(model_copy)

        global_model = average_models(global_model, client_models)

    return global_model


In [None]:
# Step 4: Federated Learning Training
local_models = []
validation_losses = []

# Get the maximum values for each categorical feature
num_users = df['UserID'].max() + 1
num_movies = df['MovieID'].max() + 1
num_genders = df['Gender'].max() + 1
num_occupations = df['Occupation'].max() + 1

for age in age_groups:
    age_key = f"client_{int(age)}"
    if age_key not in client_datasets:
        print(f"Skipping {age_key}, no data available.")
        continue
    
    # Initialize the model for this client
    num_age_groups = len(age_groups)

    model = FederatedRecommender(
    num_movies=num_movies, 
    num_genders=num_genders, 
    num_occupations=num_occupations, 
    num_genres=num_genres,
    num_age_groups=num_age_groups
    )
    
    # Train model locally for the current client
    trained_model, val_loss = train_local_model(client_datasets[age_key], model)
    # Save each trained model
    model_save_path = f"client_models/client_model_{int(age)}.pth"
    torch.save(model.state_dict(), model_save_path)
    print(f"💾 Saved model for client_{int(age)} to {model_save_path}")
    
    
    local_models.append(trained_model)
    validation_losses.append(val_loss)

# Step 5: Display training completion
print(f"✅ Federated Learning Training Completed!")
print(f"📊 Validation Loss per Client: {validation_losses}")

IndexError: index out of range in self

### Federated learning model is training well on each client. The loss is steadily decreasing across epochs, which indicates that the model is learning effectively

In [None]:
# Function to prepare input features for a single prediction (with AgeGroup instead of UserID)
def prepare_input(age_group_val, movie_id, gender_str, occupation_str, genres_list):
    age_group = torch.tensor([age_group_val], dtype=torch.long)
    movie = torch.tensor([movie_id], dtype=torch.long)

    gender_code = pd.Series([gender_str]).astype('category').cat.codes[0]
    occupation_code = pd.Series([occupation_str]).astype('category').cat.codes[0]

    gender = torch.tensor([gender_code], dtype=torch.long)
    occupation = torch.tensor([occupation_code], dtype=torch.long)

    genre_vec = torch.zeros(num_genres)
    for genre in genres_list:
        if genre in genre_to_idx:
            genre_vec[genre_to_idx[genre]] = 1
    genres = genre_vec.unsqueeze(0)

    return age_group, movie, gender, occupation, genres


# Recreate the model architecture with age group
test_model = FederatedRecommender(
    num_movies=num_movies, 
    num_genders=num_genders, 
    num_occupations=num_occupations, 
    num_genres=num_genres,
    num_age_groups=num_age_groups
)

# Sample input: Modify with actual known values from your dataset
sample_row = df.iloc[15]

sample_movie_id = sample_row["MovieID"]  # 0-indexed
sample_gender = sample_row["Gender"]
sample_occupation = sample_row["Occupation"]
sample_genres = sample_row["Genres"]
user_age = sample_row["Age"]  # Assuming this is an actual age

# Convert age to age group (e.g. 0-15 = 0, 15-25 = 1, etc.)
if user_age <= 15:
    age_group_val = 0
elif user_age <= 25:
    age_group_val = 1
elif user_age <= 35:
    age_group_val = 2
elif user_age <= 50:
    age_group_val = 3
else:
    age_group_val = 4

model_filename = f"client_models/client_model_{age_group_val}.pth"
test_model.load_state_dict(torch.load(model_filename))
test_model.eval()

# Prepare input
age_group, movie, gender, occupation, genres = prepare_input(
    age_group_val, sample_movie_id, sample_gender, sample_occupation, sample_genres
)

# Predict
with torch.no_grad():
    predicted_rating = test_model(age_group, movie, gender, occupation, genres)

    predicted_rating = torch.clamp(predicted_rating, 1.0, 5.0)
    predicted_rating = torch.round(predicted_rating)

    print(f"⭐ Predicted rating: {predicted_rating.item():.0f} for Movie {sample_movie_id}, Age Group {age_group_val}")


⭐ Predicted rating: 4 for User 0, Movie 2790 Age group  1


In [None]:
print(df.iloc[15])

UserID                        0
MovieID                    2790
Rating                        4
Timestamp             978302188
Gender                        0
Age                           1
Occupation                   10
Zip-code                  48067
Title          Airplane! (1980)
Genres                 [Comedy]
User                          0
Movie                      2586
Action                        0
Adventure                     0
Animation                     0
Children's                    0
Comedy                        1
Crime                         0
Documentary                   0
Drama                         0
Fantasy                       0
Film-Noir                     0
Horror                        0
Musical                       0
Mystery                       0
Romance                       0
Sci-Fi                        0
Thriller                      0
War                           0
Western                       0
Name: 15, dtype: object


In [None]:
import torch

# Initialize the federated model (this is the final model that combines knowledge from all clients)
federated_model = FederatedRecommender(
    num_users=num_users, 
    num_movies=num_movies, 
    num_genders=num_genders, 
    num_occupations=num_occupations, 
    num_genres=num_genres
)

# Aggregation function: Average the weights from all local models
def aggregate_local_models(local_models):
    # Initialize an empty dictionary to hold the averaged weights
    federated_weights = {}

    # Initialize the federated model weights with zeroes (or the first model's weights)
    for name, param in local_models[0].named_parameters():
        federated_weights[name] = torch.zeros_like(param)

    # Iterate through each local model and accumulate its weights
    for model in local_models:
        for name, param in model.named_parameters():
            federated_weights[name] += param.data

    # Average the accumulated weights
    num_models = len(local_models)
    for name, param in federated_weights.items():
        federated_weights[name] /= num_models

    return federated_weights

# Aggregate the local models into the federated model
aggregated_weights = aggregate_local_models(local_models)

# Load the aggregated weights into the federated model
federated_model.load_state_dict(aggregated_weights)

# Save the federated model
torch.save(federated_model.state_dict(), "federated_model.pth")
print("💾 Federated model saved as federated_model.pth")


💾 Federated model saved as federated_model.pth


In [None]:
# Load the federated model for prediction
federated_model.load_state_dict(torch.load("federated_model.pth"))
federated_model.eval()

# Function to recommend movies using the federated model
def recommend_movies_federated(user_id, gender, occupation, genres, num_recommendations=5):
    all_movie_ids = range(num_movies)  # All possible movie IDs
    movie_ratings = []

    # Loop over all movie IDs to get predictions
    for movie_id in all_movie_ids:
        # Prepare the input for the model (ensure correct format for input)
        user_tensor, movie_tensor, gender_tensor, occupation_tensor, genres_tensor = prepare_input(
            user_id, movie_id, gender, occupation, genres
        )
        
        # Predict the rating for the movie using the federated model
        with torch.no_grad():
            predicted_rating = federated_model(user_tensor, movie_tensor, gender_tensor, occupation_tensor, genres_tensor)
            predicted_rating = torch.clamp(predicted_rating, 1.0, 5.0)  # Ensure it's between 1 and 5
            movie_ratings.append((movie_id, predicted_rating.item()))

    # Sort movies by predicted rating (highest to lowest)
    movie_ratings.sort(key=lambda x: x[1], reverse=True)
    
    # Get the top N recommendations
    top_recommendations = movie_ratings[:num_recommendations]
    
    # Create a list of movie names and predicted ratings to display
    recommendations = []
    for movie_id, rating in top_recommendations:
        # Fetch the movie name from a movie_names dictionary
        movie_name = movie_names.get(movie_id, f"Unknown Movie {movie_id}")  # If no name found, fallback to the movie ID
        recommendations.append(f"{movie_name}")
    
    return recommendations


In [None]:
# Function to get user input
def get_user_input():
    user_id = int(input("Enter User age: "))
    gender = input("Enter Gender (M/F): ")
    occupation = input("Enter Occupation: ")
    genres = input("Enter Preferred Genres (comma separated): ").split(",")
    
    return user_id, gender, occupation, genres

# Get user input
user_id, gender, occupation, genres = get_user_input()  # This should be a function to get user input
recommendations = recommend_movies_federated(user_id, gender, occupation, genres, num_recommendations=5)

# Print recommended movies
for recommendation in recommendations:
    print(recommendation)

Toy Story (1995)
Jumanji (1995)
Grumpier Old Men (1995)
Waiting to Exhale (1995)
Father of the Bride Part II (1995)


In [9]:
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
import numpy as np
from ast import literal_eval
from torch.utils.data import DataLoader, TensorDataset
import os

# Load dataset
df = pd.read_csv('merged_data.csv')
df['Genres'] = df['Genres'].apply(literal_eval)

# Zero-index categorical features
df["UserID"] = df["UserID"] - 1
df["MovieID"] = df["MovieID"] - 1
df["Gender"] = df["Gender"].astype('category').cat.codes
df["Occupation"] = df["Occupation"].astype('category').cat.codes

# Map age values to index for embedding
age_groups = sorted(df['Age'].unique())
age_value_to_idx = {age: idx for idx, age in enumerate(age_groups)}
df["AgeGroupID"] = df["Age"].map(age_value_to_idx)

# Group clients by age
client_datasets = {f"client_{age}": df[df['Age'] == age] for age in age_groups}

# Build genre mapping
all_genres = sorted(set(genre for sublist in df['Genres'] for genre in sublist))
genre_to_idx = {genre: i for i, genre in enumerate(all_genres)}
num_genres = len(all_genres)

# Define the federated model
class FederatedRecommender(nn.Module):
    def __init__(self, num_movies, num_genders, num_occupations, num_genres, num_age_groups, embedding_dim=10):
        super(FederatedRecommender, self).__init__()
        self.age_embedding = nn.Embedding(num_age_groups, embedding_dim)
        self.movie_embedding = nn.Embedding(num_movies, embedding_dim)
        self.gender_embedding = nn.Embedding(num_genders, embedding_dim)
        self.occupation_embedding = nn.Embedding(num_occupations, embedding_dim)
        self.genre_projection = nn.Linear(num_genres, embedding_dim)
        self.fc1 = nn.Linear(embedding_dim * 5, 128)
        self.fc2 = nn.Linear(128, 1)

    def forward(self, age_group, movie, gender, occupation, genres):
        age_embedded = self.age_embedding(age_group)
        movie_embedded = self.movie_embedding(movie)
        gender_embedded = self.gender_embedding(gender)
        occupation_embedded = self.occupation_embedding(occupation)
        genre_embedded = self.genre_projection(genres.float())
        x = torch.cat([age_embedded, movie_embedded, gender_embedded, occupation_embedded, genre_embedded], dim=-1)
        x = torch.relu(self.fc1(x))
        return self.fc2(x)

# Train model locally on each client
def train_local(model, dataloader, criterion, optimizer, device='cpu'):
    model.train()
    total_loss = 0
    for batch in dataloader:
        age, movie, gender, occupation, genres, rating = [x.to(device) for x in batch]
        optimizer.zero_grad()
        output = model(age, movie, gender, occupation, genres)
        loss = criterion(output.squeeze(), rating.float())
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    return model, total_loss / len(dataloader)

# Create dataloader for a client's dataset
def create_dataloader(df, batch_size=32):
    age_tensor = torch.tensor(df["AgeGroupID"].values, dtype=torch.long)
    movie_tensor = torch.tensor(df["MovieID"].values, dtype=torch.long)
    gender_tensor = torch.tensor(df["Gender"].values, dtype=torch.long)
    occupation_tensor = torch.tensor(df["Occupation"].values, dtype=torch.long)
    rating_tensor = torch.tensor(df["Rating"].values, dtype=torch.float)

    genre_tensor = torch.zeros((len(df), num_genres))
    for i, genres in enumerate(df["Genres"]):
        for genre in genres:
            if genre in genre_to_idx:
                genre_tensor[i][genre_to_idx[genre]] = 1

    dataset = TensorDataset(age_tensor, movie_tensor, gender_tensor, occupation_tensor, genre_tensor, rating_tensor)
    return DataLoader(dataset, batch_size=batch_size, shuffle=True)

# Model parameters
num_users = df['UserID'].max() + 1
num_movies = df['MovieID'].max() + 1
num_genders = df['Gender'].max() + 1
num_occupations = df['Occupation'].max() + 1
num_age_groups = len(age_groups)

# Train local models
local_models = []
validation_losses = []

os.makedirs("client_models", exist_ok=True)

for age in age_groups:
    age_key = f"client_{age}"
    client_df = client_datasets[age_key]
    dataloader = create_dataloader(client_df)

    model = FederatedRecommender(
        num_movies=num_movies,
        num_genders=num_genders,
        num_occupations=num_occupations,
        num_genres=num_genres,
        num_age_groups=num_age_groups
    )

    optimizer = optim.Adam(model.parameters(), lr=0.001)
    criterion = nn.MSELoss()

    trained_model, val_loss = train_local(model, dataloader, criterion, optimizer)
    local_models.append(trained_model)
    validation_losses.append(val_loss)

    torch.save(trained_model.state_dict(), f"client_models/client_model_{age}.pth")
    print(f"✅ Trained and saved model for age group {age} — Loss: {val_loss:.4f}")

# Aggregate local models into a federated model
def aggregate_local_models(local_models):
    federated_weights = {}
    for name, param in local_models[0].named_parameters():
        federated_weights[name] = torch.zeros_like(param.data)

    for model in local_models:
        for name, param in model.named_parameters():
            federated_weights[name] += param.data

    for name in federated_weights:
        federated_weights[name] /= len(local_models)

    return federated_weights

federated_model = FederatedRecommender(
    num_movies=num_movies,
    num_genders=num_genders,
    num_occupations=num_occupations,
    num_genres=num_genres,
    num_age_groups=num_age_groups
)
aggregated_weights = aggregate_local_models(local_models)
federated_model.load_state_dict(aggregated_weights)

# Save final federated model
torch.save(federated_model.state_dict(), "federated_model.pth")
print("📦 Federated model saved as 'federated_model.pth'")
print(f"📊 Validation losses: {validation_losses}")


  df = pd.read_csv('merged_data.csv')


✅ Trained and saved model for age group 1 — Loss: 1.6399
✅ Trained and saved model for age group 18 — Loss: 1.2790
✅ Trained and saved model for age group 25 — Loss: 1.1052
✅ Trained and saved model for age group 35 — Loss: 1.1092
✅ Trained and saved model for age group 45 — Loss: 1.1531
✅ Trained and saved model for age group 50 — Loss: 1.1519
✅ Trained and saved model for age group 56 — Loss: 1.2269
📦 Federated model saved as 'federated_model.pth'
📊 Validation losses: [1.6398995787640716, 1.2790050504513533, 1.1051856758785064, 1.1092273479758552, 1.1531153766476843, 1.1518623506270567, 1.2268742714876388]
