In [None]:
# Music Recommendation System using Neural Collaborative Filtering in PyTorch
# -----------------------------------------------------------------------------
# This script builds a music recommendation system using synthetic user-song rating data.
# It trains a neural network to predict ratings a user would give to a song.
# Based on this, the model can recommend songs with the highest predicted ratings.

import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset, DataLoader

# Set seed for reproducibility and choose device (GPU if available)
torch.manual_seed(42)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Generate synthetic user-song interaction data
num_users = 1000
num_songs = 5000
np.random.seed(42)
ratings_data = {
    'user_id': [],
    'song_id': [],
    'rating': []
}
# Create 50,000 random user-song ratings between 1 and 5
for _ in range(50000):
    user_id = np.random.randint(0, num_users)
    song_id = np.random.randint(0, num_songs)
    rating = np.random.randint(1, 6)
    ratings_data['user_id'].append(user_id)
    ratings_data['song_id'].append(song_id)
    ratings_data['rating'].append(rating)
ratings_df = pd.DataFrame(ratings_data)

# Custom Dataset class to handle our ratings data
class MusicDataset(Dataset):
    def __init__(self, df):
        self.users = torch.tensor(df['user_id'].values, dtype=torch.long)
        self.songs = torch.tensor(df['song_id'].values, dtype=torch.long)
        self.ratings = torch.tensor(df['rating'].values, dtype=torch.float32)

    def __len__(self):
        return len(self.ratings)

    def __getitem__(self, idx):
        return self.users[idx], self.songs[idx], self.ratings[idx]

# Split our data into 80% training and 20% testing
train_df, test_df = train_test_split(ratings_df, test_size=0.2, random_state=42)
train_dataset = MusicDataset(train_df)
test_dataset = MusicDataset(test_df)

# Load data in batches
train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=128, shuffle=False)

# Define a simple Neural Collaborative Filtering model
class MusicRecommender(nn.Module):
    def __init__(self, num_users, num_songs, embedding_dim=50):
        super(MusicRecommender, self).__init__()
        # Embedding layers convert user/song IDs to dense vectors
        self.user_embedding = nn.Embedding(num_users, embedding_dim)
        self.song_embedding = nn.Embedding(num_songs, embedding_dim)

        # Fully connected layers to learn complex interactions between user and song features
        self.fc_layers = nn.Sequential(
            nn.Linear(embedding_dim * 2, 128),  # Combine both embeddings and pass through FC layer
            nn.ReLU(),                         # Activation to introduce non-linearity
            nn.Dropout(0.3),                   # Dropout to prevent overfitting
            nn.Linear(128, 64),                # Another FC layer for deeper learning
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(64, 1)                   # Final output: predicted rating (a single float)
        )

    def forward(self, user, song):
        # Look up user and song embeddings
        user_emb = self.user_embedding(user)
        song_emb = self.song_embedding(song)
        # Concatenate user and song features to form the input to the FC layers
        x = torch.cat([user_emb, song_emb], dim=-1)
        return self.fc_layers(x).squeeze()  # Output a scalar rating prediction

# Initialize the model, loss function and optimizer
model = MusicRecommender(num_users, num_songs).to(device)
criterion = nn.MSELoss()  # Mean Squared Error to measure prediction quality
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Function to train our model
def train_model(model, train_loader, test_loader, epochs=200):
    for epoch in range(epochs):
        model.train()  # Set model to training mode
        running_loss = 0.0
        for users, songs, ratings in train_loader:
            # Move data to GPU if available
            users, songs, ratings = users.to(device), songs.to(device), ratings.to(device)

            optimizer.zero_grad()        # Reset gradients
            outputs = model(users, songs)  # Forward pass
            loss = criterion(outputs, ratings)  # Compute loss
            loss.backward()              # Backpropagation
            optimizer.step()             # Update model weights
            running_loss += loss.item()  # Accumulate loss

        # Evaluate on test data every 20 epochs
        if (epoch + 1) % 20 == 0:
            model.eval()  # Switch to evaluation mode
            test_loss = 0.0
            with torch.no_grad():
                for users, songs, ratings in test_loader:
                    users, songs, ratings = users.to(device), songs.to(device), ratings.to(device)
                    outputs = model(users, songs)
                    loss = criterion(outputs, ratings)
                    test_loss += loss.item()
            print(f"Epoch {epoch + 1}: "
                  f"Train Loss: {running_loss/len(train_loader):.4f}, "
                  f"Test Loss: {test_loss/len(test_loader):.4f}")

# Start training the model
print("Training Music Recommendation Model...")
train_model(model, train_loader, test_loader)

# Recommend top N songs for a given user
def recommend_songs(model, user_id, num_songs, num_recommend=5):
    model.eval()
    with torch.no_grad():
        # Generate a list of all song IDs
        song_ids = torch.arange(num_songs, dtype=torch.long, device=device)
        # Create a tensor of the same user ID to match song IDs
        user_ids = torch.full_like(song_ids, user_id)
        # Predict ratings for all songs
        predictions = model(user_ids, song_ids)
        # Get top N song indices with highest predicted ratings
        _, top_indices = torch.topk(predictions, num_recommend)
        return top_indices.cpu().numpy()

# Example usage: Recommend 5 songs for user 0
recommended_songs = recommend_songs(model, user_id=0, num_songs=num_songs)
print(f"\nRecommended songs for user 0: {recommended_songs}")

# Final evaluation on test set
model.eval()
test_loss = 0.0
with torch.no_grad():
    for users, songs, ratings in test_loader:
        users, songs, ratings = users.to(device), songs.to(device), ratings.to(device)
        outputs = model(users, songs)
        loss = criterion(outputs, ratings)
        test_loss += loss.item()
print(f"\nFinal Test Loss: {test_loss/len(test_loader):.4f}")


Training Music Recommendation Model...
Epoch 20: Train Loss: 1.4543, Test Loss: 2.4177
Epoch 40: Train Loss: 0.8179, Test Loss: 2.8023
Epoch 60: Train Loss: 0.5438, Test Loss: 3.0330
Epoch 80: Train Loss: 0.4268, Test Loss: 3.1503
Epoch 100: Train Loss: 0.3576, Test Loss: 3.0704
Epoch 120: Train Loss: 0.3179, Test Loss: 3.1039
Epoch 140: Train Loss: 0.2897, Test Loss: 3.1401
Epoch 160: Train Loss: 0.2644, Test Loss: 3.0921
Epoch 180: Train Loss: 0.2484, Test Loss: 3.0493
Epoch 200: Train Loss: 0.2323, Test Loss: 3.1142

Recommended songs for user 0: [ 235  117 2960 4466 1954]

Final Test Loss: 3.1142
