In [1]:
import pandas as pd
import numpy as np
import torch
import os
import pickle
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader

In [2]:
print("Current Working Directory:", os.getcwd())

Current Working Directory: c:\Users\dell\Desktop\CS 5100 FAI\Final project\MovieRecommendations\src\neural_collaborative_filtering


## NCF Model

### Custom Dataset for NCF

In [3]:
class NCFDataset(torch.utils.data.Dataset):
    def __init__(self, user_ids, movie_ids, ratings):
        self.user_ids = torch.tensor(user_ids.to_numpy(), dtype=torch.long)
        self.movie_ids = torch.tensor(movie_ids.to_numpy(), dtype=torch.long)
        self.ratings = torch.tensor(ratings.to_numpy(), dtype=torch.float32)
    def __len__(self):
        return len(self.ratings)
    def __getitem__(self, idx):
        return self.user_ids[idx], self.movie_ids[idx], self.ratings[idx]

    # Define the NCF model
class NCFModel(torch.nn.Module):
    def __init__(self, num_users, num_movies, embed_dim):
        super(NCFModel, self).__init__()
        self.user_embedding = torch.nn.Embedding(num_users, embed_dim)
        self.movie_embedding = torch.nn.Embedding(num_movies, embed_dim)
        self.fc = torch.nn.Sequential(
            torch.nn.Linear(embed_dim * 2, 8),
            torch.nn.ReLU(),
            torch.nn.Linear(8, 1),
            torch.nn.Sigmoid()
        )
    def forward(self, user_ids, movie_ids):
        user_embeds = self.user_embedding(user_ids)
        movie_embeds = self.movie_embedding(movie_ids)
        x = torch.cat([user_embeds, movie_embeds], dim=-1)
        return self.fc(x).squeeze() * 4 + 1 # scale to [1,5]

### Train the model

In [4]:
def train_model(model, data_loader, criterion, optimizer, epochs=5, k=5, device=None):


    for epoch in range(epochs):
        model.train()
        total_loss = 0
        for user_ids, movie_ids, ratings in data_loader:
            # Move data to the same device as the model (GPU)
            user_ids = user_ids.to(device)
            movie_ids = movie_ids.to(device)
            ratings = ratings.to(device)

            # Zero the gradients, run the forward pass, compute loss, and backpropagate
            optimizer.zero_grad()
            outputs = model(user_ids, movie_ids)
            loss = criterion(outputs, ratings)
            loss.backward()
            optimizer.step()

            total_loss += loss.item()

        print(f"Epoch {epoch + 1} | Loss: {total_loss / len(data_loader):.4f}")

    # Print the profiler results



## Validation with metrics

### Metrics for regression

In [5]:
def calculate_mae(y_true, y_pred):
    return np.mean(np.abs(y_true - y_pred))

In [6]:
def calculate_rmse(y_true, y_pred):
    return np.sqrt(np.mean((y_true - y_pred) ** 2))

In [7]:
TOP_N = 5
# MODEL_PATH = "C:/Users/anujp/OneDrive/Desktop/MovieRecommendations/models/neural_collaborative_filtering/ncff_model.pkl"  # Path to save the model

In [8]:
def evaluate_model(model, data_loader, k=TOP_N):
    model.eval()
    total_loss = 0
    all_targets = []
    all_predictions = []
    with torch.no_grad():
        for user_ids, movie_ids, ratings in data_loader:
            outputs = model(user_ids, movie_ids)
            all_targets.extend(ratings.cpu().numpy())
            all_predictions.extend(outputs.cpu().numpy())

    # Calculate metrics
    all_targets = np.array(all_targets)
    all_predictions = np.array(all_predictions)
    mae = calculate_mae(all_targets, all_predictions)
    rmse = calculate_rmse(all_targets, all_predictions)

    print(f"Evaluation | MAE: {mae:.4f} | RMSE: {rmse:.4f}")

## Recommend movies for a user

In [9]:
def recommend_movies(model, user_id, all_movie_ids, k=TOP_N):
    model.eval()
    with torch.no_grad():
        user_tensor = torch.tensor([user_id] * len(all_movie_ids), dtype=torch.long)
        movie_tensor = torch.tensor(all_movie_ids, dtype=torch.long)
        predictions = model(user_tensor, movie_tensor)
    top_k_indices = predictions.argsort(descending=True)[:k]
    return top_k_indices.numpy(), predictions[top_k_indices].numpy()

In [10]:
def get_user_input():
    """Prompt the user to input a user ID and select a model."""
    print("\n--- Recommendation System ---")
    print("Enter 'q' at any time to quit.")
    user_id = input("Enter the User ID for recommendations (1-6040): ")
    if user_id.strip().lower() == 'q':
        return 'q'
    while not user_id.isdigit() or not (1 <= int(user_id) <= 6040):
        print("Invalid input. Please enter a numeric User ID between 1 and 6040 or 'q' to quit.")
        user_id = input("Enter the User ID for recommendations: ")
        if user_id.strip().lower() == 'q':
            return 'q'
    return int(user_id)

# Function to save the model

In [11]:
def save_model(model, data, model_path="../../models/neural_collaborative_filtering/ncf_model.pkl"):
    """
    Save model along with mappings at specified path.
    """
    try:
        # Create directory path if it doesn't exist
        directory = os.path.dirname(model_path)
        os.makedirs(directory, exist_ok=True)
        
        # Create mappings for UserID and MovieID
        user_mapping = dict(enumerate(data['UserID'].astype('category').cat.categories))
        movie_mapping = dict(enumerate(data['MovieID'].astype('category').cat.categories))
        
        # Create save dictionary
        save_dict = {
            'model_state': model.state_dict(),
            'user_mapping': user_mapping,
            'movie_mapping': movie_mapping,
            'model_config': {
                'num_users': len(user_mapping),
                'num_movies': len(movie_mapping),
                'embed_dim': 60
            }
        }
        
        # Save everything to pickle file
        with open(model_path, "wb") as f:
            pickle.dump(save_dict, f)
        
        print(f"Model and mappings saved successfully to: {model_path}")
        
    except Exception as e:
        print(f"Error saving model: {str(e)}")

# Main function

In [12]:
def main():
    # Load and merge data
    data = pd.read_csv('../../data/Final_data/Final_data.csv')

    # Preprocessing: Convert UserID and MovieID to categorical codes
    data['UserID'] = data['UserID'].astype('category').cat.codes
    data['MovieID'] = data['MovieID'].astype('category').cat.codes

    # Extract features for NCF
    user_ids = data['UserID']
    movie_ids = data['MovieID']
    ratings = data['Rating']
    titles = data['Title']

    # Map MovieID to Title for quick access
    movie_id_to_title = dict(zip(movie_ids, titles))

    # Determine unique users and movies for embedding
    num_users = user_ids.nunique()
    num_movies = movie_ids.nunique()
    embed_dim = 60

    # Train-test split
    user_train, user_test, movie_train, movie_test, rating_train, rating_test = train_test_split(
        user_ids, movie_ids, ratings, test_size=0.2, random_state=42
    )

    # Create datasets and loaders
    train_dataset = NCFDataset(user_train, movie_train, rating_train)
    train_loader = DataLoader(train_dataset, batch_size=256, shuffle=True)
    test_dataset = NCFDataset(user_test, movie_test, rating_test)
    test_loader = DataLoader(test_dataset, batch_size=256, shuffle=False)

    # Initialize and train the model
    model = NCFModel(num_users, num_movies, embed_dim)
    criterion = torch.nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    train_model(model, train_loader, criterion, optimizer, epochs=10, k=5)
    
    # Save the model - UNCOMMENTED THIS LINE
    save_model(model, data)
    
    evaluate_model(model, test_loader, k=TOP_N)

    # Generate recommendations based on user input
    while True:
        user_id = get_user_input()
        if user_id == 'q':
            print("Exiting the Recommendation System!")
            break
        unique_movie_ids = movie_ids.unique()
        top_movies, predicted_ratings = recommend_movies(model, user_id, unique_movie_ids)
        print(f"\nTop {TOP_N} recommended movies for User {user_id}:")
        for i, (movie, rating) in enumerate(zip(top_movies, predicted_ratings)):
            movie_title = movie_id_to_title.get(movie, "Unknown Movie")
            print(f"{i + 1}: {movie_title}, Predicted Rating: {rating:.2f}")

In [13]:
if __name__ == "__main__":
    main()

Epoch 1 | Loss: 1.1177
Epoch 2 | Loss: 0.9022
Epoch 3 | Loss: 0.8363
Epoch 4 | Loss: 0.7837
Epoch 5 | Loss: 0.7423
Epoch 6 | Loss: 0.7098
Epoch 7 | Loss: 0.6848
Epoch 8 | Loss: 0.6629
Epoch 9 | Loss: 0.6446
Epoch 10 | Loss: 0.6305
Model and mappings saved successfully to: ../../models/neural_collaborative_filtering/ncf_model.pkl
Evaluation | MAE: 0.6811 | RMSE: 0.8927

--- Recommendation System ---
Enter 'q' at any time to quit.

Top 5 recommended movies for User 9:
1: The Intern (2015), Predicted Rating: 4.23
2: Team America: World Police (2004), Predicted Rating: 4.20
3: Spanish Apartment, The (L'auberge espagnole) (2002), Predicted Rating: 4.19
4: City by the Sea (2002), Predicted Rating: 4.17
5: Act of Killing, The (2012), Predicted Rating: 4.16

--- Recommendation System ---
Enter 'q' at any time to quit.

Top 5 recommended movies for User 9:
1: The Intern (2015), Predicted Rating: 4.23
2: Team America: World Police (2004), Predicted Rating: 4.20
3: Spanish Apartment, The (L'auber

In [15]:
# Load the dataset
merged_data = pd.read_csv('../../data/Final_data/Final_data.csv')

# Function to get top 5 rated movies for a specific user ID
def top_5_rated_movies(user_id, data):
    # Filter the dataset for the given user ID
    user_data = data[data['UserID'] == user_id]

    # Sort the ratings in descending order
    top_movies = user_data.sort_values(by='Rating', ascending=False).head(5)

    # Return the top 5 rated movies
    return top_movies[['MovieID', 'Title', 'Rating']]

# Example: Fetch top 5 rated movies for User ID 6
user_id = 9
top_movies = top_5_rated_movies(user_id, merged_data)

print(f"Top 5 rated movies for User {user_id}:\n", top_movies)


Top 5 rated movies for User 9:
       MovieID                                              Title  Rating
1111     5902                                  Adaptation (2002)     5.0
1082     1198  Raiders of the Lost Ark (Indiana Jones and the...     5.0
1112     5952      Lord of the Rings: The Two Towers, The (2002)     5.0
1089     2300                              Producers, The (1968)     5.0
1103     5481                 Austin Powers in Goldmember (2002)     5.0
