In [58]:
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split


ratings = pd.read_csv("C:/Users/lenovo/OneDrive/Desktop/ratings.csv", nrows=100000)  # Loading only the first 100k rows as more data was taking a lot of memory

# Encoding the userId and movieId as continuous indices
user_ids = ratings['userId'].astype('category').cat.codes.values
movie_ids = ratings['movieId'].astype('category').cat.codes.values
ratings['userId'] = user_ids
ratings['movieId'] = movie_ids

# Convertig to tensors
users = torch.tensor(user_ids, dtype=torch.long)
movies = torch.tensor(movie_ids, dtype=torch.long)
ratings_tensor = torch.tensor(ratings['rating'].values, dtype=torch.float32)


train_users, test_users, train_movies, test_movies, train_ratings, test_ratings = train_test_split(
    users, movies, ratings_tensor, test_size=0.2, random_state=42
)

# Defineing the Collaborative Filtering Model
class Recommender(nn.Module):
    def __init__(self, num_users, num_movies, embedding_dim=50):
        super(Recommender, self).__init__()
        self.user_embedding = nn.Embedding(num_users, embedding_dim)
        self.movie_embedding = nn.Embedding(num_movies, embedding_dim)
        self.fc = nn.Linear(embedding_dim * 2, 1)  # Fully connected layer

    def forward(self, user, movie):
        user_emb = self.user_embedding(user)
        movie_emb = self.movie_embedding(movie)
        x = torch.cat([user_emb, movie_emb], dim=1)
        x = self.fc(x)
        return x.view(-1)


num_users = users.max().item() + 1
num_movies = movies.max().item() + 1
model = Recommender(num_users, num_movies)

# Loss and optimizer
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.005)


epochs = 20
for epoch in range(epochs):
    model.train()
    optimizer.zero_grad()
    predictions = model(train_users, train_movies)
    loss = criterion(predictions, train_ratings)
    loss.backward()
    optimizer.step()
    print(f"Epoch {epoch+1}/{epochs}, Loss: {loss.item()}")

# Evaluating on test data
model.eval()
with torch.no_grad():
    test_predictions = model(test_users, test_movies)
    test_loss = criterion(test_predictions, test_ratings)
    print(f"Test Loss: {test_loss.item()}")


movies_df = pd.read_csv("C:/Users/lenovo/OneDrive/Desktop/movies_metadata.csv", low_memory=False)


movies_df['id'] = movies_df['id'].astype(str)


def recommend_movies(user_id, movie_df, top_n=10):
    """Recommend top N movies for a given user"""
    model.eval()
    
    # Get all movie indices
    all_movie_ids = torch.arange(num_movies, dtype=torch.long)
    user_tensor = torch.tensor([user_id] * num_movies, dtype=torch.long)
    
    with torch.no_grad():
        predicted_ratings = model(user_tensor, all_movie_ids)
    
    # Sort movies by predicted rating
    top_movies = torch.argsort(predicted_ratings, descending=True)[:top_n]
    
    # Convert indices to original movie IDs
    recommended_movie_ids = ratings[['movieId']].drop_duplicates().iloc[top_movies.numpy()]['movieId'].values

    # Match recommended movie IDs with titles
    matched_movies = movie_df[movie_df['id'].astype(str).isin(map(str, recommended_movie_ids))][['title']].copy()

    # Ensure at least `top_n` movies are returned
    if len(matched_movies) < top_n:
        extra_movies = movie_df[~movie_df['title'].isna()].sample(top_n - len(matched_movies))['title']
        matched_movies = pd.concat([matched_movies, extra_movies])

    return matched_movies['title'].tolist()


user_id = 1000  # Change this to any user ID
recommended_movies = recommend_movies(user_id, movies_df, top_n=10)

print(f"Top 10 Recommended Movies for User {user_id}:")
print(recommended_movies)


Epoch 1/20, Loss: 13.606049537658691
Epoch 2/20, Loss: 13.294517517089844
Epoch 3/20, Loss: 13.001571655273438
Epoch 4/20, Loss: 12.72293472290039
Epoch 5/20, Loss: 12.453154563903809
Epoch 6/20, Loss: 12.18696117401123
Epoch 7/20, Loss: 11.919876098632812
Epoch 8/20, Loss: 11.648111343383789
Epoch 9/20, Loss: 11.368651390075684
Epoch 10/20, Loss: 11.079366683959961
Epoch 11/20, Loss: 10.77892017364502
Epoch 12/20, Loss: 10.46664047241211
Epoch 13/20, Loss: 10.142382621765137
Epoch 14/20, Loss: 9.806462287902832
Epoch 15/20, Loss: 9.459607124328613
Epoch 16/20, Loss: 9.102937698364258
Epoch 17/20, Loss: 8.737943649291992
Epoch 18/20, Loss: 8.366470336914062
Epoch 19/20, Loss: 7.990702152252197
Epoch 20/20, Loss: 7.61314058303833
Test Loss: 7.30539083480835
Top 10 Recommended Movies for User 1000:
['Reality Bites', 'Full Metal Jacket', 'Top Secret!', 'Harsh Times', 'Pirates of the Caribbean: On Stranger Tides', 'M.D. Geist II: Death Force', 'Andre', 'Regarding Henry', 'From the Hip', 'I