In [368]:
import pandas as pd
import numpy as np
import torch
import os
import pickle
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader

In [369]:
print("Current Working Directory:", os.getcwd())

Current Working Directory: /content


## NCF Model

### Custom Dataset for NCF

In [370]:
class NCFDataset(torch.utils.data.Dataset):
    def __init__(self, user_ids, movie_ids, ratings):
        self.user_ids = torch.tensor(user_ids.to_numpy(), dtype=torch.long)
        self.movie_ids = torch.tensor(movie_ids.to_numpy(), dtype=torch.long)
        self.ratings = torch.tensor(ratings.to_numpy(), dtype=torch.float32)
    def __len__(self):
        return len(self.ratings)
    def __getitem__(self, idx):
        return self.user_ids[idx], self.movie_ids[idx], self.ratings[idx]

    # Define the NCF model
class NCFModel(torch.nn.Module):
    def __init__(self, num_users, num_movies, embed_dim):
        super(NCFModel, self).__init__()
        self.user_embedding = torch.nn.Embedding(num_users, embed_dim)
        self.movie_embedding = torch.nn.Embedding(num_movies, embed_dim)
        self.fc = torch.nn.Sequential(
            torch.nn.Linear(embed_dim * 2, 8),
            torch.nn.ReLU(),
            torch.nn.Linear(8, 1),
            torch.nn.Sigmoid()
        )
    def forward(self, user_ids, movie_ids):
        user_embeds = self.user_embedding(user_ids)
        movie_embeds = self.movie_embedding(movie_ids)
        x = torch.cat([user_embeds, movie_embeds], dim=-1)
        return self.fc(x).squeeze() * 4 + 1 # scale to [1,5]

### Train the model

In [371]:
def train_model(model, data_loader, criterion, optimizer, epochs=5, k=5, device=None):


    for epoch in range(epochs):
        model.train()
        total_loss = 0
        for user_ids, movie_ids, ratings in data_loader:
            # Move data to the same device as the model (GPU)
            user_ids = user_ids.to(device)
            movie_ids = movie_ids.to(device)
            ratings = ratings.to(device)

            # Zero the gradients, run the forward pass, compute loss, and backpropagate
            optimizer.zero_grad()
            outputs = model(user_ids, movie_ids)
            loss = criterion(outputs, ratings)
            loss.backward()
            optimizer.step()

            total_loss += loss.item()

        print(f"Epoch {epoch + 1} | Loss: {total_loss / len(data_loader):.4f}")

    # Print the profiler results



## Validation with metrics

### Metrics for regression

In [372]:
def calculate_mae(y_true, y_pred):
    return np.mean(np.abs(y_true - y_pred))

In [373]:
def calculate_rmse(y_true, y_pred):
    return np.sqrt(np.mean((y_true - y_pred) ** 2))

In [374]:
TOP_N = 5
#MODEL_PATH = "models/neural_collaborative_filtering/ncf_model.pkl"  # Path to save the model

In [375]:
def evaluate_model(model, data_loader, k=TOP_N):
    model.eval()
    total_loss = 0
    all_targets = []
    all_predictions = []
    with torch.no_grad():
        for user_ids, movie_ids, ratings in data_loader:
            outputs = model(user_ids, movie_ids)
            all_targets.extend(ratings.cpu().numpy())
            all_predictions.extend(outputs.cpu().numpy())

    # Calculate metrics
    all_targets = np.array(all_targets)
    all_predictions = np.array(all_predictions)
    mae = calculate_mae(all_targets, all_predictions)
    rmse = calculate_rmse(all_targets, all_predictions)

    print(f"Evaluation | MAE: {mae:.4f} | RMSE: {rmse:.4f}")

## Recommend movies for a user

In [376]:
def recommend_movies(model, user_id, all_movie_ids, k=TOP_N):
    model.eval()
    with torch.no_grad():
        user_tensor = torch.tensor([user_id] * len(all_movie_ids), dtype=torch.long)
        movie_tensor = torch.tensor(all_movie_ids, dtype=torch.long)
        predictions = model(user_tensor, movie_tensor)
    top_k_indices = predictions.argsort(descending=True)[:k]
    return top_k_indices.numpy(), predictions[top_k_indices].numpy()

In [377]:
def get_user_input():
    """Prompt the user to input a user ID and select a model."""
    print("\n--- Recommendation System ---")
    print("Enter 'q' at any time to quit.")
    user_id = input("Enter the User ID for recommendations (1-6040): ")
    if user_id.strip().lower() == 'q':
        return 'q'
    while not user_id.isdigit() or not (1 <= int(user_id) <= 6040):
        print("Invalid input. Please enter a numeric User ID between 1 and 6040 or 'q' to quit.")
        user_id = input("Enter the User ID for recommendations: ")
        if user_id.strip().lower() == 'q':
            return 'q'
    return int(user_id)

# Function to save the model

In [378]:
# def save_model(model, model_path):
#     with open(model_path, "wb") as f:
#         pickle.dump(model, f)
#     print(f"Model saved to {model_path}")

# Main function

In [379]:
def main():
    # Load and merge data
    data = pd.read_csv('Final_data.csv')

    # Preprocessing: Convert UserID and MovieID to categorical codes
    data['UserID'] = data['UserID'].astype('category').cat.codes
    data['MovieID'] = data['MovieID'].astype('category').cat.codes

    # Extract features for NCF
    user_ids = data['UserID']
    movie_ids = data['MovieID']
    ratings = data['Rating']
    titles = data['Title']

    # Map MovieID to Title for quick access
    movie_id_to_title = dict(zip(movie_ids, titles))

    # Determine unique users and movies for embedding
    num_users = user_ids.nunique()
    num_movies = movie_ids.nunique()
    embed_dim = 60

    # Train-test split
    user_train, user_test, movie_train, movie_test, rating_train, rating_test = train_test_split(
        user_ids, movie_ids, ratings, test_size=0.2, random_state=42
    )

    # Create datasets and loaders
    train_dataset = NCFDataset(user_train, movie_train, rating_train)
    train_loader = DataLoader(train_dataset, batch_size=256, shuffle=True)
    test_dataset = NCFDataset(user_test, movie_test, rating_test)
    test_loader = DataLoader(test_dataset, batch_size=256, shuffle=False)

    # Initialize and train the model
    model = NCFModel(num_users, num_movies, embed_dim)
    criterion = torch.nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    train_model(model, train_loader, criterion, optimizer, epochs=10, k=5)
    # save_model(model, MODEL_PATH)
    evaluate_model(model, test_loader, k=TOP_N)

    # Generate recommendations based on user input
    while True:
        user_id = get_user_input()
        if user_id == 'q':
            print("Exiting the Recommendation System!")
            break
        unique_movie_ids = movie_ids.unique()
        top_movies, predicted_ratings = recommend_movies(model, user_id, unique_movie_ids)
        print(f"\nTop {TOP_N} recommended movies for User {user_id}:")
        for i, (movie, rating) in enumerate(zip(top_movies, predicted_ratings)):
            movie_title = movie_id_to_title.get(movie, "Unknown Movie")
            print(f"{i + 1}: {movie_title}, Predicted Rating: {rating:.2f}")


In [380]:
if __name__ == "__main__":
    main()

Epoch 1 | Loss: 1.1737
Epoch 2 | Loss: 0.8964
Epoch 3 | Loss: 0.8294
Epoch 4 | Loss: 0.7803
Epoch 5 | Loss: 0.7404
Epoch 6 | Loss: 0.7080
Epoch 7 | Loss: 0.6842
Epoch 8 | Loss: 0.6626
Epoch 9 | Loss: 0.6453
Epoch 10 | Loss: 0.6318
Evaluation | MAE: 0.6827 | RMSE: 0.8944

--- Recommendation System ---
Enter 'q' at any time to quit.
Enter the User ID for recommendations (1-6040): 6

Top 5 recommended movies for User 6:
1: Curly Sue (1991), Predicted Rating: 4.67
2: Zebraman (2004), Predicted Rating: 4.59
3: Legend of Zorro, The (2005), Predicted Rating: 4.53
4: Bulletproof Monk (2003), Predicted Rating: 4.50
5: First Time, The (2012), Predicted Rating: 4.47

--- Recommendation System ---
Enter 'q' at any time to quit.
Enter the User ID for recommendations (1-6040): q
Exiting the Recommendation System!


In [381]:
# Load the dataset
merged_data = pd.read_csv('Final_data.csv')

# Function to get top 5 rated movies for a specific user ID
def top_5_rated_movies(user_id, data):
    # Filter the dataset for the given user ID
    user_data = data[data['UserID'] == user_id]

    # Sort the ratings in descending order
    top_movies = user_data.sort_values(by='Rating', ascending=False).head(5)

    # Return the top 5 rated movies
    return top_movies[['MovieID', 'Title', 'Rating']]

# Example: Fetch top 5 rated movies for User ID 6
user_id = 6
top_movies = top_5_rated_movies(user_id, merged_data)

print(f"Top 5 rated movies for User {user_id}:\n", top_movies)


Top 5 rated movies for User 6:
      MovieID                                 Title  Rating
703      318      Shawshank Redemption, The (1994)     5.0
732      364                 Lion King, The (1994)     5.0
804      587                          Ghost (1990)     5.0
725      356                   Forrest Gump (1994)     5.0
839      780  Independence Day (a.k.a. ID4) (1996)     5.0
