In [1]:
# Cell 1: Import required libraries
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split  
from surprise import SVD, Dataset, Reader
from implicit.als import AlternatingLeastSquares
from scipy.sparse import csr_matrix
import pickle
from typing import Tuple, List, Dict, Set
import torch
import torch.nn as nn
import torch.optim as optim
from collections import defaultdict
from tqdm import tqdm

In [None]:
recommendatios_df = pd.read_pickle('recommendations_processed.pkl')

In [2]:
# Cell 2: Neural Matrix Factorization (NeuMF) Model
class NeuMF(nn.Module):
    def __init__(self, num_users: int, num_items: int, num_factors: int = 32,
                 layers: List[int] = [64, 32, 16, 8], dropout: float = 0.1):
        super(NeuMF, self).__init__()
        
        # GMF part
        self.user_embedding_gmf = nn.Embedding(num_users, num_factors)
        self.item_embedding_gmf = nn.Embedding(num_items, num_factors)
        
        # MLP part
        self.user_embedding_mlp = nn.Embedding(num_users, num_factors)
        self.item_embedding_mlp = nn.Embedding(num_items, num_factors)
        
        # MLP layers
        self.mlp_layers = nn.ModuleList()
        input_size = num_factors * 2
        for layer_size in layers:
            self.mlp_layers.append(nn.Linear(input_size, layer_size))
            self.mlp_layers.append(nn.ReLU())
            self.mlp_layers.append(nn.Dropout(dropout))
            input_size = layer_size
        
        # Prediction layer
        self.prediction = nn.Linear(layers[-1] + num_factors, 1)
        
        # Initialize weights
        self._init_weights()
    
    def _init_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Embedding):
                nn.init.normal_(m.weight, std=0.01)
            elif isinstance(m, nn.Linear):
                nn.init.xavier_uniform_(m.weight)
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
    
    def forward(self, user_input, item_input):
        # GMF part
        user_embedding_gmf = self.user_embedding_gmf(user_input)
        item_embedding_gmf = self.item_embedding_gmf(item_input)
        gmf_vector = user_embedding_gmf * item_embedding_gmf
        
        # MLP part
        user_embedding_mlp = self.user_embedding_mlp(user_input)
        item_embedding_mlp = self.item_embedding_mlp(item_input)
        mlp_vector = torch.cat([user_embedding_mlp, item_embedding_mlp], dim=-1)
        
        for layer in self.mlp_layers:
            mlp_vector = layer(mlp_vector)
        
        # Concatenate GMF and MLP parts
        vector = torch.cat([gmf_vector, mlp_vector], dim=-1)
        
        # Final prediction
        prediction = self.prediction(vector)
        return prediction.squeeze()

class NeuMFRecommender:
    def __init__(self, num_factors: int = 32, learning_rate: float = 0.001,
                 batch_size: int = 256, epochs: int = 20, device: str = 'cuda' if torch.cuda.is_available() else 'cpu'):
        self.num_factors = num_factors
        self.learning_rate = learning_rate
        self.batch_size = batch_size
        self.epochs = epochs
        self.device = device
        self.model = None
        self.optimizer = None
        
    def fit(self, ratings_matrix: np.ndarray, user_mapping: Dict, item_mapping: Dict) -> None:
        num_users, num_items = ratings_matrix.shape
        
        # Create model
        self.model = NeuMF(num_users, num_items, self.num_factors).to(self.device)
        self.optimizer = optim.Adam(self.model.parameters(), lr=self.learning_rate)
        criterion = nn.MSELoss()
        
        # Get non-zero ratings
        user_indices, item_indices = np.nonzero(ratings_matrix)
        ratings = ratings_matrix[user_indices, item_indices]
        
        # Convert to tensors
        user_indices = torch.LongTensor(user_indices).to(self.device)
        item_indices = torch.LongTensor(item_indices).to(self.device)
        ratings = torch.FloatTensor(ratings).to(self.device)
        
        # Training loop
        for epoch in range(self.epochs):
            self.model.train()
            total_loss = 0
            num_batches = len(ratings) // self.batch_size
            
            for i in range(num_batches):
                start_idx = i * self.batch_size
                end_idx = start_idx + self.batch_size
                
                batch_users = user_indices[start_idx:end_idx]
                batch_items = item_indices[start_idx:end_idx]
                batch_ratings = ratings[start_idx:end_idx]
                
                # Forward pass
                predictions = self.model(batch_users, batch_items)
                loss = criterion(predictions, batch_ratings)
                
                # Backward pass
                self.optimizer.zero_grad()
                loss.backward()
                self.optimizer.step()
                
                total_loss += loss.item()
            
            avg_loss = total_loss / num_batches
            if (epoch + 1) % 5 == 0:
                print(f'Epoch {epoch+1}/{self.epochs}, Loss: {avg_loss:.4f}')
    
    def predict(self, user_idx: int, item_idx: int) -> float:
        self.model.eval()
        with torch.no_grad():
            user_tensor = torch.LongTensor([user_idx]).to(self.device)
            item_tensor = torch.LongTensor([item_idx]).to(self.device)
            prediction = self.model(user_tensor, item_tensor)
            return prediction.item()

In [3]:
# Cell 3: Main Recommender System Class
class ModelBasedRecommender:
    def __init__(self):
        self.svd_model = None
        self.neumf_model = None
        self.als_model = None
        self.pmf_model = None
        self.user_mapping = None
        self.item_mapping = None
        self.reverse_user_mapping = None
        self.reverse_item_mapping = None
        self.ratings_matrix = None
        self.train_df = None
        self.val_df = None
        self.test_df = None
    
    def setup_data(self, train_df: pd.DataFrame, val_df: pd.DataFrame, test_df: pd.DataFrame) -> None:
        """Set up the data structures needed for the recommender system using pre-split data"""
        self.train_df = train_df
        self.val_df = val_df
        self.test_df = test_df
        
        # Create user and item mappings from training data only
        unique_users = train_df['user_id'].unique()
        unique_items = train_df['app_id'].unique()
        
        self.user_mapping = {user: idx for idx, user in enumerate(unique_users)}
        self.item_mapping = {item: idx for idx, item in enumerate(unique_items)}
        self.reverse_user_mapping = {idx: user for user, idx in self.user_mapping.items()}
        self.reverse_item_mapping = {idx: item for item, idx in self.item_mapping.items()}
        
        # Create ratings matrix from training data
        n_users = len(unique_users)
        n_items = len(unique_items)
        self.ratings_matrix = np.zeros((n_users, n_items))
        
        for _, row in train_df.iterrows():
            if row['user_id'] in self.user_mapping and row['app_id'] in self.item_mapping:
                user_idx = self.user_mapping[row['user_id']]
                item_idx = self.item_mapping[row['app_id']]
                self.ratings_matrix[user_idx, item_idx] = row['rating']
    
    def train_svd(self, n_factors: int = 100) -> None:
        """Train SVD model using surprise library."""
        if self.ratings_matrix is None:
            raise ValueError("Must call setup_data before training models")
            
        reader = Reader(rating_scale=(1, 5))
        ratings_list = []
        for user_idx in range(self.ratings_matrix.shape[0]):
            for item_idx in range(self.ratings_matrix.shape[1]):
                if self.ratings_matrix[user_idx, item_idx] > 0:
                    ratings_list.append({
                        'user': self.reverse_user_mapping[user_idx],
                        'item': self.reverse_item_mapping[item_idx],
                        'rating': self.ratings_matrix[user_idx, item_idx]
                    })
        
        df = pd.DataFrame(ratings_list)
        data = Dataset.load_from_df(df[['user', 'item', 'rating']], reader)
        trainset = data.build_full_trainset()
        
        self.svd_model = SVD(n_factors=n_factors)
        self.svd_model.fit(trainset)
        
    def train_als(self, factors: int = 100, regularization: float = 0.01, 
                  iterations: int = 15, alpha: float = 40) -> None:
        """Train Alternating Least Squares model using our own implementation."""
        if self.ratings_matrix is None:
            raise ValueError("Must call setup_data before training models")
    
        n_users, n_items = self.ratings_matrix.shape
    
        # Initialize random matrices
        user_factors = np.random.random((n_users, factors))
        item_factors = np.random.random((n_items, factors))
    
        # Get indices of non-zero entries
        user_idx, item_idx = self.ratings_matrix.nonzero()
        ratings = self.ratings_matrix[user_idx, item_idx]
    
        # Convert to confidence values
        confidence = 1 + alpha * ratings
    
        # Alternate between updating user and item factors
        for iteration in range(iterations):
            # Update user factors
            YtY = item_factors.T @ item_factors
            for u in range(n_users):
                # Get items rated by user u
                items = self.ratings_matrix[u].nonzero()[0]
                if len(items) == 0:
                    continue
                
                # Get confidence values for this user's ratings
                Cu = confidence[user_idx == u]
                Y_u = item_factors[items]
            
                # Compute user factors
                A = YtY + np.dot(Y_u.T, (Cu[:, np.newaxis] - 1) * Y_u)
                b = np.dot(Y_u.T, Cu * ratings[user_idx == u])
                user_factors[u] = np.linalg.solve(A + regularization * np.eye(factors), b)
        
            # Update item factors
            XtX = user_factors.T @ user_factors
            for i in range(n_items):
                # Get users who rated item i
                users = self.ratings_matrix[:, i].nonzero()[0]
                if len(users) == 0:
                    continue
                
                # Get confidence values for this item's ratings
                Ci = confidence[item_idx == i]
                X_i = user_factors[users]
            
                # Compute item factors
                A = XtX + np.dot(X_i.T, (Ci[:, np.newaxis] - 1) * X_i)
                b = np.dot(X_i.T, Ci * ratings[item_idx == i])
                item_factors[i] = np.linalg.solve(A + regularization * np.eye(factors), b)
        
            if (iteration + 1) % 5 == 0:
                print(f'Completed iteration {iteration + 1}/{iterations}')
    
        # Store the model components
        self.als_model = {
            'user_factors': user_factors,
            'item_factors': item_factors
        }
        
    def train_pmf(self, n_factors: int = 100, learning_rate: float = 0.005, 
                  regularization: float = 0.02, iterations: int = 50, 
                  batch_size: int = 1000) -> None:
        """Train Probabilistic Matrix Factorization model."""
        if self.ratings_matrix is None:
            raise ValueError("Must call setup_data before training models")
    
        n_users, n_items = self.ratings_matrix.shape
    
        # Initialize random matrices from normal distribution
        self.user_factors = np.random.normal(0, 0.1, (n_users, n_factors))
        self.item_factors = np.random.normal(0, 0.1, (n_items, n_factors))
    
        # Get all non-zero ratings
        user_indices, item_indices = self.ratings_matrix.nonzero()
        ratings = self.ratings_matrix[user_indices, item_indices]
        n_ratings = len(ratings)
    
        # Normalize ratings to [0, 1] range for numerical stability
        ratings = (ratings - ratings.min()) / (ratings.max() - ratings.min())
    
        # Training loop
        for iteration in range(iterations):
            # Shuffle the data
            idx = np.random.permutation(n_ratings)
            user_indices = user_indices[idx]
            item_indices = item_indices[idx]
            ratings = ratings[idx]
        
            total_loss = 0
        
            # Mini-batch training
            for i in range(0, n_ratings, batch_size):
                batch_users = user_indices[i:i + batch_size]
                batch_items = item_indices[i:i + batch_size]
                batch_ratings = ratings[i:i + batch_size]
            
                # Compute predictions
                pred_ratings = np.sum(
                    self.user_factors[batch_users] * self.item_factors[batch_items], 
                    axis=1
                )
            
                # Compute gradients
                error = batch_ratings - pred_ratings
                total_loss += np.sum(error ** 2)
            
                # Update user factors
                user_gradients = -2 * error[:, np.newaxis] * self.item_factors[batch_items] + \
                               2 * regularization * self.user_factors[batch_users]
                self.user_factors[batch_users] -= learning_rate * user_gradients
            
                # Update item factors
                item_gradients = -2 * error[:, np.newaxis] * self.user_factors[batch_users] + \
                               2 * regularization * self.item_factors[batch_items]
                self.item_factors[batch_items] -= learning_rate * item_gradients
        
            # Print progress
            if (iteration + 1) % 10 == 0:
                rmse = np.sqrt(total_loss / n_ratings)
                print(f'Iteration {iteration + 1}/{iterations}, RMSE: {rmse:.4f}')
    
        # Store the model
        self.pmf_model = {
            'user_factors': self.user_factors,
            'item_factors': self.item_factors
        }
        
    def train_neumf(self, n_factors: int = 32, learning_rate: float = 0.001,
                   batch_size: int = 256, epochs: int = 20) -> None:
        
        if self.ratings_matrix is None:
            raise ValueError("Must call setup_data before training models")
        
        # Initialize NeuMF model
        self.neumf_model = NeuMFRecommender(
            num_factors=n_factors,
            learning_rate=learning_rate,
            batch_size=batch_size,
            epochs=epochs
        )
    
        # Train the model
        self.neumf_model.fit(self.ratings_matrix, self.user_mapping, self.item_mapping)
    
    def get_recommendations(self, user_id: int, n_recommendations: int = 5, 
                          model_type: str = 'svd') -> List[Tuple[int, float]]:
        """Get top N recommendations for a user."""
        if user_id not in self.user_mapping:
            return []
        
        if model_type == 'svd':
            if self.svd_model is None:
                raise ValueError("SVD model not trained")
            predictions = []
            for item_id in self.item_mapping.keys():
                pred = self.svd_model.predict(user_id, item_id).est
                predictions.append((item_id, pred))
        elif model_type == 'als':
            if self.als_model is None:
                raise ValueError("ALS model not trained")
            user_idx = self.user_mapping[user_id]
            # Get scores for all items
            scores = self.als_model['user_factors'][user_idx] @ self.als_model['item_factors'].T
            # Create predictions list
            predictions = [(self.reverse_item_mapping[i], score) 
                          for i, score in enumerate(scores)]
            predictions.sort(key=lambda x: x[1], reverse=True)
            return predictions[:n_recommendations]
        elif model_type == 'pmf':
            if self.pmf_model is None:
                raise ValueError("PMF model not trained")
            user_idx = self.user_mapping[user_id]
            # Get scores for all items
            scores = np.dot(self.pmf_model['user_factors'][user_idx], 
                           self.pmf_model['item_factors'].T)
            # Create predictions list
            predictions = [(self.reverse_item_mapping[i], score) 
                          for i, score in enumerate(scores)]
        else:  # neumf
            if self.neumf_model is None:
                raise ValueError("NeuMF model not trained")
            user_idx = self.user_mapping[user_id]
            predictions = []
            for item_idx in range(len(self.item_mapping)):
                pred = self.neumf_model.predict(user_idx, item_idx)
                predictions.append((self.reverse_item_mapping[item_idx], pred))
    
        predictions.sort(key=lambda x: x[1], reverse=True)
        return predictions[:n_recommendations]

In [4]:
def create_train_val_test_split(recommendations_df, test_size=0.15, val_size=0.15, timestamp_col='date'):
    # Sort by the specified timestamp column if it exists
    if timestamp_col in recommendations_df.columns:
        recommendations_df = recommendations_df.sort_values(timestamp_col)
    
    # Group by user
    user_groups = recommendations_df.groupby('user_id')
    
    train_data = []
    val_data = []
    test_data = []
    
    for user_id, user_data in user_groups:
        n_interactions = len(user_data)
        
        if n_interactions < 3:  # Need at least 3 interactions
            continue
            
        # Split chronologically
        n_test = max(1, int(n_interactions * test_size))
        n_val = max(1, int(n_interactions * val_size))
        
        user_train = user_data.iloc[:-n_test-n_val]
        user_val = user_data.iloc[-n_test-n_val:-n_test]
        user_test = user_data.iloc[-n_test:]
        
        train_data.append(user_train)
        val_data.append(user_val)
        test_data.append(user_test)
    
    return pd.concat(train_data), pd.concat(val_data), pd.concat(test_data)

# Create the splits
train_df, val_df, test_df = create_train_val_test_split(recommendations_df)

# Print split sizes
print(f"Train set: {len(train_df)} recommendations")
print(f"Validation set: {len(val_df)} recommendations")
print(f"Test set: {len(test_df)} recommendations")

In [5]:
def calculate_metrics_at_k(predictions: Dict[int, List[int]], 
                         ground_truth: Dict[int, Set[int]], 
                         k: int = 10) -> Tuple[float, float, float, float]:
    """
    Calculate Precision, Recall, F1, and NDCG at k for recommendations
    
    Parameters:
    -----------
    predictions : Dict[int, List[int]]
        Dictionary of user_id to list of recommended item_ids
    ground_truth : Dict[int, Set[int]]
        Dictionary of user_id to set of actual item_ids
    k : int
        Number of recommendations to consider
    
    Returns:
    --------
    Tuple[float, float, float, float]
        (precision@k, recall@k, f1@k, ndcg@k)
    """
    precision_list = []
    recall_list = []
    f1_list = []
    ndcg_list = []

    for user in ground_truth:
        if user not in predictions:
            continue
            
        pred = list(predictions[user])[:k]
        truth = ground_truth[user]
        if not truth:
            continue

        # Calculate Precision and Recall
        pred_set = set(pred)
        intersection = pred_set & truth

        precision = len(intersection) / k if len(pred) >= k else len(intersection) / len(pred)
        recall = len(intersection) / len(truth)
        
        # Calculate F1 score
        f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0
        
        # Calculate NDCG
        dcg = 0
        for i, item in enumerate(pred):
            rel = 1 if item in truth else 0
            dcg += rel / np.log2(i + 2)  # i + 2 because i starts from 0
            
        # Calculate IDCG
        ideal_ranking = [1] * min(len(truth), k)
        idcg = sum([rel / np.log2(i + 2) for i, rel in enumerate(ideal_ranking)])
        ndcg = dcg / idcg if idcg > 0 else 0
        
        precision_list.append(precision)
        recall_list.append(recall)
        f1_list.append(f1)
        ndcg_list.append(ndcg)

    if not precision_list:
        return 0.0, 0.0, 0.0, 0.0

    avg_precision = sum(precision_list) / len(precision_list)
    avg_recall = sum(recall_list) / len(recall_list)
    avg_f1 = sum(f1_list) / len(f1_list)
    avg_ndcg = sum(ndcg_list) / len(ndcg_list)

    return avg_precision, avg_recall, avg_f1, avg_ndcg

In [6]:
# Initialize recommender
recommender = ModelBasedRecommender()

# Load data and create train-test split
print("Loading data and creating train-test split...")
with open('recommendations_processed.pkl', 'rb') as f:
    ratings_data = pickle.load(f)

# Create the splits using the baseline model's function
train_df, val_df, test_df = create_train_val_test_split(ratings_data)
print(f"Train set: {len(train_df)} recommendations")
print(f"Validation set: {len(val_df)} recommendations")
print(f"Test set: {len(test_df)} recommendations")

# Setup the recommender with the data
recommender.setup_data(train_df, val_df, test_df)

# Train models
print("\nTraining SVD model...")
recommender.train_svd()

print("\nTraining ALS model...")
recommender.train_als()

print("\nTraining PMF model...")
recommender.train_pmf()

print("Training NeuMF model...")
recommender.train_neumf()

# Evaluate models
print("\nEvaluating models on test set...")
k = 5

# Create ground truth from test set
ground_truth = defaultdict(set)
for _, row in test_df.iterrows():
    if row['is_recommended'] == 1:
        ground_truth[row['user_id']].add(row['app_id'])

# Evaluate each model
results = {}
for model_type in ['svd', 'als', 'pmf', 'neumf']:
    print(f"\nEvaluating {model_type.upper()} model:")
    predictions = defaultdict(list)
    test_users = test_df['user_id'].unique()
    
    for user_id in tqdm(test_users, desc=f"Getting {model_type.upper()} predictions"):
        if user_id not in recommender.user_mapping:
            continue
        recs = recommender.get_recommendations(user_id, n_recommendations=k, model_type=model_type)
        predictions[user_id] = [rec[0] for rec in recs]
    
    precision, recall, f1, ndcg = calculate_metrics_at_k(predictions, ground_truth, k=k)
    results[model_type] = {
        'precision': precision,
        'recall': recall,
        'f1': f1,
        'ndcg': ndcg
    }
    
    print(f"Precision@{k}: {precision:.4f}")
    print(f"Recall@{k}: {recall:.4f}")
    print(f"F1@{k}: {f1:.4f}")
    print(f"NDCG@{k}: {ndcg:.4f}")

# Print comparison table
print("\nModel Comparison:")
print("-" * 60)
print(f"{'Model':<10} {'Precision@k':>12} {'Recall@k':>12} {'F1@k':>12} {'NDCG@k':>12}")
print("-" * 60)
for model_type, metrics in results.items():
    print(f"{model_type.upper():<10} {metrics['precision']:>12.4f} {metrics['recall']:>12.4f} "
          f"{metrics['f1']:>12.4f} {metrics['ndcg']:>12.4f}")
print("-" * 60)

Loading data and creating train-test split...
Train set: 724198 recommendations
Validation set: 155186 recommendations
Test set: 155186 recommendations

Training SVD model...

Training ALS model...
Completed iteration 5/15
Completed iteration 10/15
Completed iteration 15/15

Training PMF model...
Iteration 10/50, RMSE: 0.7649
Iteration 20/50, RMSE: 0.3268
Iteration 30/50, RMSE: 0.1949
Iteration 40/50, RMSE: 0.1494
Iteration 50/50, RMSE: 0.1257
Training NeuMF model...
Epoch 5/20, Loss: 0.0120
Epoch 10/20, Loss: 0.0056
Epoch 15/20, Loss: 0.0037
Epoch 20/20, Loss: 0.0028

Evaluating models on test set...

Evaluating SVD model:


Getting SVD predictions: 100%|██████████| 147226/147226 [03:38<00:00, 673.35it/s]


Precision@5: 0.0005
Recall@5: 0.0015
F1@5: 0.0007
NDCG@5: 0.0010

Evaluating ALS model:


Getting ALS predictions: 100%|██████████| 147226/147226 [00:32<00:00, 4487.66it/s]


Precision@5: 0.0109
Recall@5: 0.0495
F1@5: 0.0176
NDCG@5: 0.0271

Evaluating PMF model:


Getting PMF predictions: 100%|██████████| 147226/147226 [00:32<00:00, 4529.18it/s]


Precision@5: 0.0015
Recall@5: 0.0066
F1@5: 0.0024
NDCG@5: 0.0037

Evaluating NEUMF model:


Getting NEUMF predictions: 100%|██████████| 147226/147226 [3:29:14<00:00, 11.73it/s]  


Precision@5: 0.0021
Recall@5: 0.0096
F1@5: 0.0034
NDCG@5: 0.0063

Model Comparison:
------------------------------------------------------------
Model       Precision@k     Recall@k         F1@k       NDCG@k
------------------------------------------------------------
SVD              0.0005       0.0015       0.0007       0.0010
ALS              0.0109       0.0495       0.0176       0.0271
PMF              0.0015       0.0066       0.0024       0.0037
NEUMF            0.0021       0.0096       0.0034       0.0063
------------------------------------------------------------
