In [1]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split


In [2]:
%run "EDA & Data_Preprocessing.ipynb"

Dataset shapes:
Anime: (12294, 7)
Ratings: (7813737, 3)

--- Anime Dataset ---

--- Rating Dataset ---
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12294 entries, 0 to 12293
Data columns (total 7 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   anime_id  12294 non-null  int64  
 1   name      12294 non-null  object 
 2   genre     12232 non-null  object 
 3   type      12269 non-null  object 
 4   episodes  12294 non-null  object 
 5   rating    12064 non-null  float64
 6   members   12294 non-null  int64  
dtypes: float64(1), int64(2), object(4)
memory usage: 672.5+ KB
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7813737 entries, 0 to 7813736
Data columns (total 3 columns):
 #   Column    Dtype
---  ------    -----
 0   user_id   int64
 1   anime_id  int64
 2   rating    int64
dtypes: int64(3)
memory usage: 178.8 MB

---- Anime Statistics ----

--- Rating Statistics ----

--- Missing Values ---
(12294, 53)
(7788750, 12)
(7788750, 1

In [3]:

# ============================================
# MODEL 1: Matrix Factorization with Deep Learning
# ============================================

class MatrixFactorizationNN(nn.Module):
    """
    Neural Collaborative Filtering (NCF)
    
    Combines matrix factorization with deep learning
    Better than traditional SVD/ALS
    """
    
    def __init__(self, num_users, num_items, embedding_dim=50, hidden_layers=[128, 64, 32]):
        super(MatrixFactorizationNN, self).__init__()
        
        # Embedding layers
        self.user_embedding = nn.Embedding(num_users, embedding_dim)
        self.item_embedding = nn.Embedding(num_items, embedding_dim)
        
        # MLP layers
        layers = []
        input_dim = embedding_dim * 2
        
        for hidden_dim in hidden_layers:
            layers.append(nn.Linear(input_dim, hidden_dim))
            layers.append(nn.ReLU())
            layers.append(nn.Dropout(0.2))
            input_dim = hidden_dim
        
        layers.append(nn.Linear(input_dim, 1))
        
        self.mlp = nn.Sequential(*layers)
        
        # Initialize weights
        self._init_weights()
    
    def _init_weights(self):
        """Xavier initialization for better convergence"""
        nn.init.xavier_uniform_(self.user_embedding.weight)
        nn.init.xavier_uniform_(self.item_embedding.weight)
    
    def forward(self, user_ids, item_ids):
        user_embeds = self.user_embedding(user_ids)
        item_embeds = self.item_embedding(item_ids)
        
        # Concatenate embeddings
        x = torch.cat([user_embeds, item_embeds], dim=1)
        
        # Pass through MLP
        output = self.mlp(x)
        
        return output.squeeze()


class AnimeDataset(Dataset):
    """PyTorch Dataset for anime ratings"""
    
    def __init__(self, users, items, ratings):
        self.users = torch.LongTensor(users)
        self.items = torch.LongTensor(items)
        self.ratings = torch.FloatTensor(ratings)
    
    def __len__(self):
        return len(self.users)
    
    def __getitem__(self, idx):
        return self.users[idx], self.items[idx], self.ratings[idx]


def train_ncf_model(train_df, num_epochs=10, batch_size=256, learning_rate=0.001):
    """
    Train Neural Collaborative Filtering model
    
    Returns trained model and user/item mappings
    """
    # Create user and item mappings
    user_ids = train_df['user_id'].unique()
    item_ids = train_df['anime_id'].unique()
    
    user_map = {uid: idx for idx, uid in enumerate(user_ids)}
    item_map = {iid: idx for idx, iid in enumerate(item_ids)}
    
    # Map IDs to indices
    train_df['user_idx'] = train_df['user_id'].map(user_map)
    train_df['item_idx'] = train_df['anime_id'].map(item_map)
    
    # Create dataset
    dataset = AnimeDataset(
        train_df['user_idx'].values,
        train_df['item_idx'].values,
        train_df['rating'].values
    )
    
    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
    
    # Initialize model
    model = MatrixFactorizationNN(
        num_users=len(user_ids),
        num_items=len(item_ids),
        embedding_dim=64,
        hidden_layers=[128, 64, 32]
    )
    
    # Loss and optimizer
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=1e-5)
    
    # Training loop
    model.train()
    for epoch in range(num_epochs):
        total_loss = 0
        for user_batch, item_batch, rating_batch in dataloader:
            optimizer.zero_grad()
            
            predictions = model(user_batch, item_batch)
            loss = criterion(predictions, rating_batch)
            
            loss.backward()
            optimizer.step()
            
            total_loss += loss.item()
        
        avg_loss = total_loss / len(dataloader)
        print(f"Epoch {epoch+1}/{num_epochs}, Loss: {avg_loss:.4f}")
    
    return model, user_map, item_map



In [4]:
# ============================================
# MODEL 2: Two-Tower Neural Network (YouTube-style)
# ============================================

class TwoTowerModel(nn.Module):
    """
    Two-Tower Architecture (used by YouTube, Pinterest)
    
    Advantages:
    - Scalable to millions of items
    - Can pre-compute item embeddings
    - Fast at inference time
    """
    
    def __init__(self, num_users, num_items, embedding_dim=64):
        super(TwoTowerModel, self).__init__()
        
        # User Tower
        self.user_tower = nn.Sequential(
            nn.Embedding(num_users, embedding_dim),
            nn.Linear(embedding_dim, 128),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, embedding_dim)
        )
        
        # Item Tower
        self.item_tower = nn.Sequential(
            nn.Embedding(num_items, embedding_dim),
            nn.Linear(embedding_dim, 128),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, embedding_dim)
        )
    
    def forward(self, user_ids, item_ids):
        user_embeds = self.user_tower(user_ids)
        item_embeds = self.item_tower(item_ids)
        
        # Dot product for similarity
        scores = (user_embeds * item_embeds).sum(dim=1)
        
        return scores
    
    def get_user_embedding(self, user_id):
        """Get embedding for a user"""
        with torch.no_grad():
            return self.user_tower(user_id)
    
    def get_item_embedding(self, item_id):
        """Get embedding for an item"""
        with torch.no_grad():
            return self.item_tower(item_id)



In [5]:
# ============================================
# MODEL 3: Session-Based Recommendations (GRU)
# ============================================

class SessionGRU(nn.Module):
    """
    GRU-based session recommendation
    
    Use case: Recommend next anime based on watching sequence
    """
    
    def __init__(self, num_items, embedding_dim=64, hidden_dim=128):
        super(SessionGRU, self).__init__()
        
        self.item_embedding = nn.Embedding(num_items, embedding_dim)
        self.gru = nn.GRU(embedding_dim, hidden_dim, batch_first=True)
        self.fc = nn.Linear(hidden_dim, num_items)
    
    def forward(self, item_sequence):
        """
        Args:
            item_sequence: (batch_size, seq_length) - sequence of anime IDs
        
        Returns:
            predictions: (batch_size, num_items) - scores for next item
        """
        embeds = self.item_embedding(item_sequence)
        
        # GRU processes sequence
        output, hidden = self.gru(embeds)
        
        # Use last hidden state to predict next item
        last_hidden = hidden[-1]
        predictions = self.fc(last_hidden)
        
        return predictions



In [6]:

# ============================================
# MODEL 4: Feature-Enhanced Hybrid Model
# ============================================

class HybridRecommender(nn.Module):
    """
    Combines collaborative filtering with content features
    
    Features:
    - User/Item embeddings (CF)
    - Anime metadata (genres, type, etc.)
    - User demographics (if available)
    """
    
    def __init__(self, num_users, num_items, num_genres, embedding_dim=64):
        super(HybridRecommender, self).__init__()
        
        # Collaborative filtering embeddings
        self.user_embedding = nn.Embedding(num_users, embedding_dim)
        self.item_embedding = nn.Embedding(num_items, embedding_dim)
        
        # Content features
        self.genre_embedding = nn.Embedding(num_genres, 32)
        
        # Combine CF and content
        self.fc = nn.Sequential(
            nn.Linear(embedding_dim * 2 + 32, 128),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, 1)
        )
    
    def forward(self, user_ids, item_ids, genre_ids):
        user_embeds = self.user_embedding(user_ids)
        item_embeds = self.item_embedding(item_ids)
        genre_embeds = self.genre_embedding(genre_ids).mean(dim=1)  # Average if multiple genres
        
        # Concatenate all features
        x = torch.cat([user_embeds, item_embeds, genre_embeds], dim=1)
        
        output = self.fc(x)
        return output.squeeze()



In [7]:

# ============================================
# INFERENCE & RECOMMENDATION
# ============================================

class RecommenderInference:
    """
    Fast inference for production
    
    Pre-computes and caches embeddings
    """
    
    def __init__(self, model, user_map, item_map, anime_df):
        self.model = model
        self.user_map = user_map
        self.item_map = item_map
        self.anime_df = anime_df
        
        # Pre-compute all item embeddings
        self.item_embeddings = self._precompute_item_embeddings()
    
    def _precompute_item_embeddings(self):
        """Pre-compute embeddings for all items"""
        self.model.eval()
        item_embeddings = {}
        
        with torch.no_grad():
            for anime_id, item_idx in self.item_map.items():
                item_tensor = torch.LongTensor([item_idx])
                
                # Get embedding from model
                if hasattr(self.model, 'item_embedding'):
                    embed = self.model.item_embedding(item_tensor).numpy()
                else:
                    # For two-tower model
                    embed = self.model.get_item_embedding(item_tensor).numpy()
                
                item_embeddings[anime_id] = embed
        
        return item_embeddings
    
    def recommend(self, user_id, n_recommendations=10, exclude_watched=True):
        """
        Generate top-N recommendations for a user
        
        Returns: List of (anime_id, score) tuples
        """
        if user_id not in self.user_map:
            # Cold start: return popular items
            return self._popular_fallback(n_recommendations)
        
        user_idx = self.user_map[user_id]
        user_tensor = torch.LongTensor([user_idx])
        
        self.model.eval()
        with torch.no_grad():
            # Get user embedding
            if hasattr(self.model, 'user_embedding'):
                user_embed = self.model.user_embedding(user_tensor).numpy()
            else:
                user_embed = self.model.get_user_embedding(user_tensor).numpy()
            
            # Score all items
            scores = {}
            for anime_id, item_embed in self.item_embeddings.items():
                # Dot product similarity
                score = np.dot(user_embed.flatten(), item_embed.flatten())
                scores[anime_id] = score
        
        # Sort by score
        ranked = sorted(scores.items(), key=lambda x: x[1], reverse=True)
        
        # Filter watched items if needed
        if exclude_watched:
            watched = set()  # Load from database
            ranked = [(aid, score) for aid, score in ranked if aid not in watched]
        
        return ranked[:n_recommendations]
    
    def _popular_fallback(self, n):
        """Fallback to popular items for cold start"""
        # Return top rated anime
        return self.anime_df.nlargest(n, 'rating')[['anime_id', 'rating']].values.tolist()



In [None]:

# ============================================
# USAGE EXAMPLE
# ============================================

# 1. Train model
model, user_map, item_map = train_ncf_model(
    train_df=rating_clean,
    num_epochs=10,
    batch_size=256
)

# 2. Save model
torch.save({
    'model_state': model.state_dict(),
    'user_map': user_map,
    'item_map': item_map
}, 'anime_recommender.pth')

# 3. Load and inference
checkpoint = torch.load('anime_recommender.pth')
model.load_state_dict(checkpoint['model_state'])

inference = RecommenderInference(model, user_map, item_map, anime_df)
recommendations = inference.recommend(user_id=123, n_recommendations=10)

print("Top 10 recommendations:")
for anime_id, score in recommendations:
    anime_name = anime_df[anime_df['anime_id'] == anime_id]['name'].values[0]
    print(f"{anime_name}: {score:.4f}")


Epoch 1/10, Loss: 5.4503
Epoch 2/10, Loss: 4.7423
Epoch 3/10, Loss: 4.5943
Epoch 4/10, Loss: 4.5404
Epoch 5/10, Loss: 4.4618
Epoch 6/10, Loss: 4.4064
Epoch 7/10, Loss: 4.3749
