In [2]:
import torch
import pandas as pd

In [3]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Using device: {device}')

Using device: cuda


In [4]:
# Load the ratings data
ratings = pd.read_csv('./dataset/ml-100k/u.data', sep='\t', names=['userId', 'movieId', 'rating', 'timestamp'])
ratings = ratings.drop(columns=['timestamp'])

# Encode userId and movieId to start from 0 for easier indexing
user_map = {u: i for i, u in enumerate(ratings['userId'].unique())}
movie_map = {m: i for i, m in enumerate(ratings['movieId'].unique())}

ratings['userId'] = ratings['userId'].map(user_map)
ratings['movieId'] = ratings['movieId'].map(movie_map)

num_users = len(user_map)
num_movies = len(movie_map)
print(f'Number of users: {num_users}, Number of movies: {num_movies}')

Number of users: 943, Number of movies: 1682


In [5]:
from torch.utils.data import Dataset, DataLoader

class MovieLensDataset(Dataset):
    def __init__(self, ratings):
        self.user_ids = torch.tensor(ratings['userId'].values, dtype=torch.long)
        self.movie_ids = torch.tensor(ratings['movieId'].values, dtype=torch.long)
        self.ratings = torch.tensor(ratings['rating'].values, dtype=torch.float32)

    def __len__(self):
        return len(self.ratings)

    def __getitem__(self, idx):
        return self.user_ids[idx], self.movie_ids[idx], self.ratings[idx]

# Create the dataset and data loader
dataset = MovieLensDataset(ratings)
train_loader = DataLoader(dataset, batch_size=64, shuffle=True)

In [6]:
import torch.nn as nn

class NCF(nn.Module):
    def __init__(self, num_users, num_items, embedding_size=50, hidden_units=[64, 32, 16]):
        super(NCF, self).__init__()
        self.user_embedding = nn.Embedding(num_users, embedding_size)
        self.item_embedding = nn.Embedding(num_items, embedding_size)

        # MLP layers
        layers = []
        input_size = embedding_size * 2
        for hidden_unit in hidden_units:
            layers.append(nn.Linear(input_size, hidden_unit))
            layers.append(nn.ReLU())
            input_size = hidden_unit
        self.mlp = nn.Sequential(*layers)

        # Output layer
        self.output = nn.Linear(hidden_units[-1], 1)

    def forward(self, user, item):
        # Embedding lookup
        user_embedded = self.user_embedding(user)
        item_embedded = self.item_embedding(item)

        # Concatenate user and item embeddings
        x = torch.cat([user_embedded, item_embedded], dim=-1)

        # Pass through MLP
        x = self.mlp(x)

        # Output layer
        x = self.output(x)
        return x.squeeze()  # Remove extra dimension

In [7]:
import torch.optim as optim

# Instantiate the model and move it to the GPU if available
model = NCF(num_users, num_movies).to(device)

# Loss function and optimizer
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
num_epochs = 5

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for user, item, rating in train_loader:
        user = user.to(device)
        item = item.to(device)
        rating = rating.to(device)

        # Forward pass
        optimizer.zero_grad()
        outputs = model(user, item)
        loss = criterion(outputs, rating)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    # Print loss for the epoch
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}')

Epoch [1/5], Loss: 1.3580
Epoch [2/5], Loss: 0.9431
Epoch [3/5], Loss: 0.8871
Epoch [4/5], Loss: 0.8612
Epoch [5/5], Loss: 0.8378


In [8]:
def predict_rating(user_id, movie_id):
    model.eval()
    with torch.no_grad():
        user_tensor = torch.tensor([user_id], dtype=torch.long).to(device)
        item_tensor = torch.tensor([movie_id], dtype=torch.long).to(device)
        predicted_rating = model(user_tensor, item_tensor)
        return predicted_rating.item()

# Predict the rating for user 0 and movie 1
user_id = 0
movie_id = 1
predicted_rating = predict_rating(user_id, movie_id)
print(f'Predicted rating for user {user_id} and movie {movie_id}: {predicted_rating:.2f}')

Predicted rating for user 0 and movie 1: 4.13
