In [1]:
import os
folder_path = '../data/raw/ml-100k'


In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np

# Re-loading the ratings DataFrame
ratings_file = folder_path+'/u.data'
ratings_df = pd.read_csv(ratings_file, sep='\t', names=['user_id', 'movie_id', 'rating', 'timestamp'])

# Define a custom dataset for PyTorch
class MovieLensDataset(Dataset):
    def __init__(self, users, movies, ratings):
        self.users = torch.LongTensor(users)
        self.movies = torch.LongTensor(movies)
        self.ratings = torch.FloatTensor(ratings)

    def __len__(self):
        return len(self.users)

    def __getitem__(self, idx):
        return self.users[idx], self.movies[idx], self.ratings[idx]


# Encoding user IDs and movie IDs
user_ids = ratings_df['user_id'].astype('category').cat.codes.values
movie_ids = ratings_df['movie_id'].astype('category').cat.codes.values

# Normalizing ratings
scaler = MinMaxScaler()
ratings = scaler.fit_transform(ratings_df[['rating']].values.astype(float)).flatten()

# Splitting the dataset into training and test set
train_user_ids, test_user_ids, train_movie_ids, test_movie_ids, train_ratings, test_ratings = train_test_split(
    user_ids, movie_ids, ratings, test_size=0.2, random_state=42)

# Creating PyTorch datasets
train_dataset = MovieLensDataset(train_user_ids, train_movie_ids, train_ratings)
test_dataset = MovieLensDataset(test_user_ids, test_movie_ids, test_ratings)

# Creating data loaders
train_loader = DataLoader(train_dataset, batch_size=512, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=512, shuffle=False)

# Number of unique users and movies
num_users = len(np.unique(user_ids))
num_movies = len(np.unique(movie_ids))

num_users, num_movies



(943, 1682)

In [3]:
class RecommenderNet(nn.Module):
    def __init__(self, num_users, num_movies, embedding_size):
        super(RecommenderNet, self).__init__()
        self.user_embedding = nn.Embedding(num_users, embedding_size)
        self.movie_embedding = nn.Embedding(num_movies, embedding_size)
        self.fc1 = nn.Linear(embedding_size * 2, 128)
        self.fc2 = nn.Linear(128, 1)
        self.dropout = nn.Dropout(0.5)

    def forward(self, user_input, movie_input):
        user_embedded = self.user_embedding(user_input)
        movie_embedded = self.movie_embedding(movie_input)
        x = torch.cat([user_embedded, movie_embedded], dim=1)
        x = torch.relu(self.fc1(x))
        x = self.dropout(x)
        x = torch.sigmoid(self.fc2(x))
        return x


In [4]:
# Initialize the model
num_epochs=15
embedding_size = 10
model = RecommenderNet(num_users, num_movies, embedding_size)

# Loss function and optimizer
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)

# Training loop
for epoch in range(num_epochs):
    model.train()
    for user_input, movie_input, ratings in train_loader:
        # Convert inputs to LongTensor
        user_input = user_input.long()
        movie_input = movie_input.long()
        ratings = ratings.float()

        # Zero the parameter gradients
        optimizer.zero_grad()
        
        # Forward pass
        outputs = model(user_input, movie_input)
        loss = criterion(outputs, ratings.view(-1, 1))
        
        # Backward pass and optimize
        loss.backward()
        optimizer.step()
    
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {loss.item()}")



Epoch 1/15, Loss: 0.07265891134738922
Epoch 2/15, Loss: 0.053911130875349045
Epoch 3/15, Loss: 0.055319659411907196
Epoch 4/15, Loss: 0.053758345544338226
Epoch 5/15, Loss: 0.05349694564938545
Epoch 6/15, Loss: 0.05598805844783783
Epoch 7/15, Loss: 0.05083082243800163
Epoch 8/15, Loss: 0.058934204280376434
Epoch 9/15, Loss: 0.03695748746395111
Epoch 10/15, Loss: 0.04944753646850586
Epoch 11/15, Loss: 0.055173423141241074
Epoch 12/15, Loss: 0.04911082610487938
Epoch 13/15, Loss: 0.045614201575517654
Epoch 14/15, Loss: 0.052079711109399796
Epoch 15/15, Loss: 0.050773151218891144
