<a href="https://colab.research.google.com/github/Dara4hem/Recommendation-System/blob/main/Recommendation_System.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Install and Load Dataset

In [15]:
import pandas as pd
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset

# Example: Load MovieLens dataset
url = "https://files.grouplens.org/datasets/movielens/ml-latest-small.zip"
!wget -nc $url
!unzip -n ml-latest-small.zip

ratings = pd.read_csv('ml-latest-small/ratings.csv')
train_data, test_data = train_test_split(ratings, test_size=0.2)


File ‘ml-latest-small.zip’ already there; not retrieving.

Archive:  ml-latest-small.zip


Define Dataset and DataLoader

In [16]:
class RatingsDataset(Dataset):
    def __init__(self, data):
        self.users = torch.tensor(data['userId'].values)
        self.items = torch.tensor(data['movieId'].values)
        self.ratings = torch.tensor(data['rating'].values, dtype=torch.float32)

    def __len__(self):
        return len(self.ratings)

    def __getitem__(self, idx):
        return self.users[idx], self.items[idx], self.ratings[idx]

train_dataset = RatingsDataset(train_data)
test_dataset = RatingsDataset(test_data)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)


Determine the Number of Users and Items

In [17]:
# Determine the number of users and items
num_users = ratings['userId'].max() + 1
num_items = ratings['movieId'].max() + 1
embedding_dim = 50


Build the Recommendation Model

In [18]:
class RecommendationModel(nn.Module):
    def __init__(self, num_users, num_items, embedding_dim):
        super(RecommendationModel, self).__init__()
        self.user_embedding = nn.Embedding(num_users, embedding_dim)
        self.item_embedding = nn.Embedding(num_items, embedding_dim)

    def forward(self, user, item):
        user_embedded = self.user_embedding(user)
        item_embedded = self.item_embedding(item)
        return (user_embedded * item_embedded).sum(1)

model = RecommendationModel(num_users, num_items, embedding_dim)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)


Train the Model with Progress Bar

In [None]:
from tqdm import tqdm

def train(model, train_loader, criterion, optimizer, num_epochs):
    model.train()
    for epoch in range(num_epochs):
        epoch_loss = 0
        # Initialize the progress bar
        for users, items, ratings in tqdm(train_loader, desc=f'Epoch {epoch+1}', unit='batch'):
            optimizer.zero_grad()
            outputs = model(users, items)
            loss = criterion(outputs, ratings)
            loss.backward()
            optimizer.step()

            # Update the loss for this epoch
            epoch_loss += loss.item()

        # Print the average loss for this epoch
        avg_loss = epoch_loss / len(train_loader)
        print(f'Epoch {epoch+1}, Average Loss: {avg_loss:.4f}')

# Train the model with progress bar
train(model, train_loader, criterion, optimizer, num_epochs=5)


Epoch 1: 100%|█████████▉| 1260/1261 [04:17<00:00,  5.50batch/s]

Evaluate the Model

In [None]:
def evaluate(model, test_loader):
    model.eval()
    total_loss = 0
    with torch.no_grad():
        for users, items, ratings in test_loader:
            outputs = model(users, items)
            loss = criterion(outputs, ratings)
            total_loss += loss.item()
    return total_loss / len(test_loader)

test_loss = evaluate(model, test_loader)
print(f'Test Loss: {test_loss}')


Load Movie Titles

In [None]:
# Load movie titles dataset
movies = pd.read_csv('ml-latest-small/movies.csv')
movie_titles = movies['title'].tolist()


Define Recommendation Function and Test

In [None]:
def get_movie_recommendations(user_id, model, movie_titles, num_recommendations=5):
    user_id_tensor = torch.tensor([user_id])

    # Get embeddings for the user and all movies
    with torch.no_grad():
        user_embedding = model.user_embedding(user_id_tensor)
        all_movie_ids = torch.arange(len(movie_titles))
        movie_embeddings = model.item_embedding(all_movie_ids)

        # Calculate predicted ratings
        predicted_ratings = (user_embedding @ movie_embeddings.t()).squeeze()

        # Get the top N movie indices
        top_movie_indices = torch.topk(predicted_ratings, num_recommendations).indices.tolist()

        # Get the corresponding movie titles
        recommended_movies = [movie_titles[i] for i in top_movie_indices]

    return recommended_movies

# Get recommendations for a sample user (e.g., user_id=1)
sample_user_id = 1
recommended_movies = get_movie_recommendations(sample_user_id, model, movie_titles, num_recommendations=5)

# Display the recommended movies
print(f"Recommended movies for user {sample_user_id}:")
for i, movie_title in enumerate(recommended_movies, 1):
    print(f"{i}. {movie_title}")
