# Introduction
Neural Collaborative Filtering (NCF) is a technique used in recommendation systems that leverages neural networks to learn user-item interactions and make personalized recommendations.<br>
NCF enhances CF by employing neural networks to model user-item interactions. It combines the strengths of collaborative filtering with the non-linear capabilities of neural networks, allowing it to capture complex patterns and dependencies in data.

# Setup

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from sklearn.metrics import precision_score, recall_score, f1_score
import pandas as pd

# Load Dataset

In [2]:

# Load the MovieLens dataset
url = 'https://files.grouplens.org/datasets/movielens/ml-latest-small.zip'
dataset_path = '/content/ml-latest-small.zip'

# Download and unzip the dataset
!wget -nc $url -O $dataset_path
!unzip -n $dataset_path -d /content/

# Load data into pandas dataframes
ratings = pd.read_csv('/content/ml-latest-small/ratings.csv')
movies = pd.read_csv('/content/ml-latest-small/movies.csv')

--2024-06-30 05:51:05--  https://files.grouplens.org/datasets/movielens/ml-latest-small.zip
Resolving files.grouplens.org (files.grouplens.org)... 128.101.65.152
Connecting to files.grouplens.org (files.grouplens.org)|128.101.65.152|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 978202 (955K) [application/zip]
Saving to: ‘/content/ml-latest-small.zip’


2024-06-30 05:51:07 (938 KB/s) - ‘/content/ml-latest-small.zip’ saved [978202/978202]

Archive:  /content/ml-latest-small.zip
   creating: /content/ml-latest-small/
  inflating: /content/ml-latest-small/links.csv  
  inflating: /content/ml-latest-small/tags.csv  
  inflating: /content/ml-latest-small/ratings.csv  
  inflating: /content/ml-latest-small/README.txt  
  inflating: /content/ml-latest-small/movies.csv  


In [3]:
# Ensure that user and item IDs are zero-indexed and contiguous
user_id_map = {id: idx for idx, id in enumerate(ratings['userId'].unique())}
item_id_map = {id: idx for idx, id in enumerate(ratings['movieId'].unique())}


In [4]:
ratings['userId'] = ratings['userId'].map(user_id_map)
ratings['movieId'] = ratings['movieId'].map(item_id_map)

In [5]:
from sklearn.model_selection import train_test_split

# Split the data into training and testing sets
train_data, test_data = train_test_split(ratings, test_size=0.2, random_state=42)

# Display the size of the training and testing sets
print(f'Training data size: {len(train_data)}')
print(f'Testing data size: {len(test_data)}')


Training data size: 80668
Testing data size: 20168


In [6]:
# Define a dataset class
class MovieLensDataset(Dataset):
    def __init__(self, df):
        self.users = torch.tensor(df['userId'].values, dtype=torch.long)
        self.items = torch.tensor(df['movieId'].values, dtype=torch.long)
        self.ratings = torch.tensor(df['rating'].values, dtype=torch.float32)

    def __len__(self):
        return len(self.users)

    def __getitem__(self, idx):
        return self.users[idx], self.items[idx], self.ratings[idx]

train_dataset = MovieLensDataset(train_data)
test_dataset = MovieLensDataset(test_data)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)


# Neural Collaborative Filtering (NCF)

In [7]:
class NCF(nn.Module):
    def __init__(self, num_users, num_items, embedding_dim=32):
        super(NCF, self).__init__()
        self.user_embedding = nn.Embedding(num_users, embedding_dim)
        self.item_embedding = nn.Embedding(num_items, embedding_dim)
        self.fc1 = nn.Linear(embedding_dim * 2, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 1)
        self.relu = nn.ReLU()

    def forward(self, user, item):
        user_emb = self.user_embedding(user)
        item_emb = self.item_embedding(item)
        x = torch.cat([user_emb, item_emb], dim=-1)
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.fc3(x)
        return x.squeeze()


In [8]:
# Initialize the Model, Loss Function, and Optimizer
num_users = ratings['userId'].nunique()
num_items = ratings['movieId'].nunique()
model = NCF(num_users, num_items)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)


In [13]:
# Training the NCF Model
epochs = 100
for epoch in range(epochs):
    model.train()
    total_loss = 0
    for user, item, rating in train_loader:
        optimizer.zero_grad()
        output = model(user, item)
        loss = criterion(output, rating)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f'Epoch {epoch+1}, Loss: {total_loss/len(train_loader)}')


Epoch 1, Loss: 0.12443275705665278
Epoch 2, Loss: 0.12259010771181639
Epoch 3, Loss: 0.11996916712842982
Epoch 4, Loss: 0.11682445621804809
Epoch 5, Loss: 0.11507377792725858
Epoch 6, Loss: 0.11200470077043767
Epoch 7, Loss: 0.11110227847505807
Epoch 8, Loss: 0.107911557295935
Epoch 9, Loss: 0.10616805242532591
Epoch 10, Loss: 0.1033319415765372
Epoch 11, Loss: 0.10187044237207837
Epoch 12, Loss: 0.10047022563765014
Epoch 13, Loss: 0.09792401810294102
Epoch 14, Loss: 0.09741753001507551
Epoch 15, Loss: 0.09463637385998899
Epoch 16, Loss: 0.09330134262448261
Epoch 17, Loss: 0.09199076949143864
Epoch 18, Loss: 0.09035812252741027
Epoch 19, Loss: 0.08862357415128898
Epoch 20, Loss: 0.08830355685337209
Epoch 21, Loss: 0.08611474075693634
Epoch 22, Loss: 0.08434879851012736
Epoch 23, Loss: 0.08338370667822204
Epoch 24, Loss: 0.0826250907952449
Epoch 25, Loss: 0.08113884546518516
Epoch 26, Loss: 0.08031611471281082
Epoch 27, Loss: 0.07912479469982478
Epoch 28, Loss: 0.07801626555763427
Epoch

In [14]:
# Initialize lists for predictions and labels
all_preds = []
all_labels = []

# Set a threshold for relevance
threshold = 4.0

model.eval()
with torch.no_grad():
    for user, item, rating in test_loader:
        output = model(user, item)
        all_preds.extend(output.numpy())
        all_labels.extend(rating.numpy())

# Convert predictions to binary relevance (rating >= threshold is relevant)
all_preds_binary = [1 if pred >= threshold else 0 for pred in all_preds]
all_labels_binary = [1 if label >= threshold else 0 for label in all_labels]

# Calculate accuracy
correct = sum(1 for pred, label in zip(all_preds_binary, all_labels_binary) if pred == label)
accuracy = correct / len(all_preds_binary)

print(f'NCF Test Accuracy: {accuracy:.4f}')


NCF Test Accuracy: 0.6260


In [12]:
# Save the model
torch.save(model.state_dict(), 'ncf_model.pth')

----------------