In [19]:
import pandas as pd
from torch import nn, optim
from sklearn.model_selection import train_test_split

ratings = pd.read_csv('../data/lens_tmdb/ratings_small.csv')
ratings['userId'] = ratings['userId'].astype('category').cat.codes.values
ratings['movieId'] = ratings['movieId'].astype('category').cat.codes.values
train, valid = train_test_split(ratings, test_size=0.2)




In [20]:
# from torch.utils.data import Dataset, DataLoader
# # MovieLens Dataset
# class MovieLensDataset(Dataset):
#     def __init__(self, df):
#         self.users = df['userId'].cat.codes.values
#         self.movies = df['movieId'].cat.codes.values
#         self.ratings = df['rating'].values
#
#     def __len__(self):
#         return len(self.ratings)
#
#     def __getitem__(self, idx):
#         return self.users[idx], self.movies[idx], self.ratings[idx]


In [21]:
import torch
import torch.nn as nn

class RecommenderRNN(nn.Module):
    def __init__(self, n_users, n_movies, n_factors, hidden_size):
        super().__init__()
        self.user_emb = nn.Embedding(n_users, n_factors)
        self.movie_emb = nn.Embedding(n_movies, n_factors)
        self.rnn = nn.RNN(n_factors * 2, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, 1)

    def forward(self, users, movies):
        user_emb = self.user_emb(users)
        movie_emb = self.movie_emb(movies)
        x = torch.cat([user_emb, movie_emb], dim=1)
        x, _ = self.rnn(x.unsqueeze(1))
        x = self.fc(x.squeeze(1))
        return x

n_users = len(ratings['userId'].unique())
n_movies = len(ratings['movieId'].unique())
n_factors = 50
hidden_size = 64
model = RecommenderRNN(n_users, n_movies, n_factors, hidden_size)


In [15]:
def rmse(predictions, targets):
    return torch.sqrt(torch.mean((predictions - targets) ** 2))

In [22]:
import torch.optim as optim

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
n_epochs = 10
for epoch in range(n_epochs):
    model.train()
    total_loss = 0
    for _, row in train.iterrows():
        user = torch.LongTensor([int(row['userId'])]).to(device)
        movie = torch.LongTensor([int(row['movieId'])]).to(device)
        rating = torch.FloatTensor([float(row['rating'])]).to(device)

        optimizer.zero_grad()
        prediction = model(user, movie)
        loss = criterion(prediction, rating)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
    print(f"Epoch {epoch+1}/{n_epochs}, Loss: {total_loss/len(train)}")


  return F.mse_loss(input, target, reduction=self.reduction)


Epoch 1/10, Loss: 0.9941380616683048
Epoch 2/10, Loss: 0.8337304513400677
Epoch 3/10, Loss: 0.7681676854335572
Epoch 4/10, Loss: 0.7160558120252601
Epoch 5/10, Loss: 0.6687520823694334
Epoch 6/10, Loss: 0.6257108379525442
Epoch 7/10, Loss: 0.5869633907143088
Epoch 8/10, Loss: 0.5516647528403157
Epoch 9/10, Loss: 0.5202086933660031
Epoch 10/10, Loss: 0.4916394420354959
