In [1]:
"""
================================================================================
MOVIE RECOMMENDATION SYSTEM - MovieLens 1M Dataset
================================================================================
This notebook implements and compares five recommendation algorithms:
1. Random Baseline
2. Popularity-Based
3. Item-Based Collaborative Filtering (ItemCF)
4. Matrix Factorization (SVD)
5. Neural Collaborative Filtering (Enhanced)

Dataset: MovieLens 1M (1 million ratings from 6,040 users on 3,883 movies)
Best Performance: ItemCF with 32.1% Precision@10


In [None]:
# ============================================================================
# 1. IMPORTS AND SETUP
# ============================================================================
"""
Import required libraries for data processing, modeling, and evaluation.
- pandas/numpy: Data manipulation
- torch: Neural network implementation
- surprise: Collaborative filtering algorithms
- sklearn: Train-test split functionality
"""
import os
import pandas as pd
import numpy as np
import torch
from torch import nn
from torch.utils.data import DataLoader, Dataset
from sklearn.model_selection import train_test_split
from sklearn.metrics.pairwise import cosine_similarity
from scipy.sparse import csr_matrix
from surprise import Dataset, Reader, SVD, KNNBasic
from collections import defaultdict

# Check dataset files and set data path
os.listdir("/kaggle/input/movielens-1m-dataset")
DATA_PATH = "/kaggle/input/movielens-1m-dataset"


In [2]:
# ============================================================================
# 2. DATA LOADING
# ============================================================================
"""
Load the three MovieLens 1M dataset files:
- movies.dat: Movie information (ID, title, genres)
- ratings.dat: User-movie ratings (user_id, movie_id, rating, timestamp)
- users.dat: User demographics (ID, gender, age, occupation, zip code)

Note: Files use '::' as delimiter and require latin-1 encoding
"""
# Load movies data
movies = pd.read_csv(f"{DATA_PATH}/movies.dat", sep="::", engine='python',
                     names=['movie_id', 'title', 'genres'], encoding='latin-1')


# Load ratings data
ratings = pd.read_csv(f"{DATA_PATH}/ratings.dat", sep="::", engine='python',
                      names=['user_id', 'movie_id', 'rating', 'timestamp'], encoding='latin-1')

# Load users data
users = pd.read_csv(f"{DATA_PATH}/users.dat", sep="::", engine='python',
                    names=['user_id', 'gender', 'age', 'occupation', 'zip_code'], encoding='latin-1')

print("Movies:", movies.shape) # Expected: (3883, 3)
print("Ratings:", ratings.shape) # Expected: (1000209, 4)
print("Users:", users.shape) # Expected: (6040, 5)

# Check first rows to verify data loaded correctly
movies.head()
ratings.head()
users.head()

Movies: (3883, 3)
Ratings: (1000209, 4)
Users: (6040, 5)


Unnamed: 0,user_id,gender,age,occupation,zip_code
0,1,F,1,10,48067
1,2,M,56,16,70072
2,3,M,25,15,55117
3,4,M,45,7,2460
4,5,M,25,20,55455


In [3]:
# ============================================================================
# 3. TRAIN-TEST SPLIT
# ============================================================================
"""
Split ratings into training (80%) and testing (20%) sets.
- Training set: Used to train all recommendation models
- Test set: Used to evaluate model performance
- Random state: Set to 42 for reproducibility
"""
train_ratings, test_ratings = train_test_split(
    ratings, 
    test_size=0.2, # 20% for testing
    random_state=42 # Ensures reproducible splits
)

# Create dictionary mapping user_id to list of movies they interacted with in test set
# This is used for efficient evaluation later
test_by_user = defaultdict(list)
for _, row in test_ratings.iterrows():
    test_by_user[row['user_id']].append(row['movie_id'])

print("Train ratings:", train_ratings.shape)
print("Test ratings:", test_ratings.shape)

Train ratings: (800167, 4)
Test ratings: (200042, 4)


In [4]:
# ============================================================================
# 4. BASELINE MODELS - ITEM-BASED COLLABORATIVE FILTERING (SKLEARN)
# ============================================================================
# Unique users and movies in train
train_user_ids = train_ratings['user_id'].unique()
train_movie_ids = train_ratings['movie_id'].unique()

user_map_train = {old:new for new,old in enumerate(sorted(train_user_ids))}
movie_map_train = {old:new for new,old in enumerate(sorted(train_movie_ids))}

train_ratings['user_idx'] = train_ratings['user_id'].map(user_map_train)
train_ratings['movie_idx'] = train_ratings['movie_id'].map(movie_map_train)

n_users_train = len(user_map_train)
n_items_train = len(movie_map_train)

# Create sparse matrix
R_train = csr_matrix(
    (train_ratings['rating'], (train_ratings['user_idx'], train_ratings['movie_idx'])),
    shape=(n_users_train, n_items_train)
)

# Compute item-item cosine similarity
item_similarity_train = cosine_similarity(R_train.T)

In [5]:
# ============================================================================
# 5. BASELINE MODELS - POPULARITY
# ============================================================================
# Compute movie popularity based on training ratings
movie_popularity = train_ratings.groupby('movie_id')['rating'].sum().sort_values(ascending=False)

In [6]:
# ============================================================================
# 5. COLLABORATIVE FILTERING MODELS (SURPRISE) 
# ============================================================================
"""
Prepare and train collaborative filtering models using Surprise library:
1. Item-Based Collaborative Filtering (ItemCF)
2. Singular Value Decomposition (SVD)

Surprise requires data in specific format:
- Reader: Defines rating scale (1-5 for MovieLens)
- Dataset: Wraps training data
- Trainset: Internal format for model training
"""
# Prepare Surprise Dataset
reader = Reader(rating_scale=(1,5))
train_data = Dataset.load_from_df(train_ratings[['user_id','movie_id','rating']], reader)
trainset = train_data.build_full_trainset()

# Item-based Collaborative Filtering 
print("\nTraining Item-based CF with improved parameters...")

# Try multiple configurations
configurations = [
    {
        'name': 'ItemCF_Cosine_k50_minsup1',
        'sim_options': {
            'name': 'cosine',
            'user_based': False,
            'min_support': 1  # Reduced from 5 to capture more item pairs
        },
        'k': 50  # Increased from 40
    },
    {
        'name': 'ItemCF_Pearson_k40_minsup2',
        'sim_options': {
            'name': 'pearson',  # Better for handling rating scale differences
            'user_based': False,
            'min_support': 2
        },
        'k': 40
    },
    {
        'name': 'ItemCF_Pearson_k80_minsup1',
        'sim_options': {
            'name': 'pearson',
            'user_based': False,
            'min_support': 1
        },
        'k': 80  # More neighbors for better coverage
    }
]

# Train all configurations and select best
print("Testing multiple ItemCF configurations...")
itemcf_models = {}

for config in configurations:
    print(f"  Training {config['name']}...")
    model = KNNBasic(k=config['k'], sim_options=config['sim_options'])
    model.fit(trainset)
    itemcf_models[config['name']] = model

# Use the Pearson with k=80 as default (typically performs best)
itemcf = itemcf_models['ItemCF_Pearson_k80_minsup1']
print(f"✓ ItemCF trained with improved parameters")

# Matrix Factorization - SVD 
"""
SVD decomposes user-item rating matrix into latent factors.
Configuration:
- n_factors=150: Dimensionality of latent factor space
- n_epochs=30: Number of training iterations
- random_state=42: For reproducibility

SVD is good at rating prediction but may not excel at ranking.
"""
print("Training SVD...")
svd = SVD(
    n_factors=150,
    n_epochs=30,
    lr_all=0.005,
    reg_all=0.02,
    random_state=42
)
svd.fit(trainset)


Training Item-based CF with improved parameters...
Testing multiple ItemCF configurations...
  Training ItemCF_Cosine_k50_minsup1...
Computing the cosine similarity matrix...
Done computing similarity matrix.
  Training ItemCF_Pearson_k40_minsup2...
Computing the pearson similarity matrix...
Done computing similarity matrix.
  Training ItemCF_Pearson_k80_minsup1...
Computing the pearson similarity matrix...
Done computing similarity matrix.
✓ ItemCF trained with improved parameters
Training SVD...


<surprise.prediction_algorithms.matrix_factorization.SVD at 0x78363a7d1f10>

In [13]:
# ============================================================================
# 7. NEURAL COLLABORATIVE FILTERING
# ============================================================================
"""
Implement deep learning-based recommendation using Neural Collaborative Filtering.
Architecture: GMF (Generalized Matrix Factorization) + MLP (Multi-Layer Perceptron)
Reference: He et al. "Neural Collaborative Filtering" (WWW 2017)

Steps:
1. Map user/movie IDs to continuous indices
2. Create PyTorch Dataset and DataLoader
3. Define neural network architecture
4. Train with Adam optimizer and MSE loss
"""
# Map user/movie IDs to indices
user_ids = train_ratings['user_id'].unique()
movie_ids = train_ratings['movie_id'].unique()

user2idx = {uid: idx for idx, uid in enumerate(user_ids)}
movie2idx = {mid: idx for idx, mid in enumerate(movie_ids)}

# Apply mappings
train_ratings_nn = train_ratings.copy()
train_ratings_nn['user_idx'] = train_ratings_nn['user_id'].map(user2idx)
train_ratings_nn['movie_idx'] = train_ratings_nn['movie_id'].map(movie2idx)

# Dataset class
class MovieLensDataset(Dataset):
    def __init__(self, df):
        self.users = torch.tensor(df['user_idx'].values, dtype=torch.long)
        self.movies = torch.tensor(df['movie_idx'].values, dtype=torch.long)
        self.ratings = torch.tensor(df['rating'].values, dtype=torch.float32)
        
    def __len__(self):
        return len(self.ratings)
    
    def __getitem__(self, idx):
        return self.users[idx], self.movies[idx], self.ratings[idx]

train_dataset = MovieLensDataset(train_ratings_nn)
train_loader = DataLoader(train_dataset, batch_size=4096, shuffle=True)  

# Neural CF Model with GMF + MLP fusion (like original NCF paper)
"""
    Enhanced Neural Collaborative Filtering with GMF + MLP fusion.
    
    Architecture:
    - GMF path: Element-wise product of user/item embeddings (captures linear interactions)
    - MLP path: Deep network on concatenated embeddings (captures non-linear interactions)
    - Fusion layer: Combines GMF and MLP outputs for final prediction
    
    Args:
        n_users: Number of unique users
        n_items: Number of unique items
        emb_size: Embedding dimension (default: 200)
        hidden_layers: List of hidden layer sizes (default: [512, 256, 128, 64])
    """
class EnhancedNeuralCF(nn.Module):
    def __init__(self, n_users, n_items, emb_size=200, hidden_layers=[512, 256, 128, 64]):
        super().__init__()
        
        # GMF (Generalized Matrix Factorization) path
        self.user_emb_gmf = nn.Embedding(n_users, emb_size)
        self.item_emb_gmf = nn.Embedding(n_items, emb_size)
        
        # MLP path
        self.user_emb_mlp = nn.Embedding(n_users, emb_size)
        self.item_emb_mlp = nn.Embedding(n_items, emb_size)
        
        # MLP layers
        self.bn1 = nn.BatchNorm1d(emb_size * 2)
        layers = []
        input_size = emb_size * 2
        for h in hidden_layers:
            layers.append(nn.Linear(input_size, h))
            layers.append(nn.ReLU())
            layers.append(nn.BatchNorm1d(h))
            layers.append(nn.Dropout(0.25))
            input_size = h
        self.mlp = nn.Sequential(*layers)
        
        # Fusion layer
        self.fusion = nn.Linear(emb_size + hidden_layers[-1], 1)
        
        self._init_weights()
        
    def _init_weights(self):
        #initialization
        nn.init.normal_(self.user_emb_gmf.weight, std=0.01)
        nn.init.normal_(self.item_emb_gmf.weight, std=0.01)
        nn.init.normal_(self.user_emb_mlp.weight, std=0.01)
        nn.init.normal_(self.item_emb_mlp.weight, std=0.01)
        
    def forward(self, user, item):
        # GMF path (element-wise multiplication)
        u_gmf = self.user_emb_gmf(user)
        i_gmf = self.item_emb_gmf(item)
        gmf_out = u_gmf * i_gmf
        
        # MLP path (concatenation)
        u_mlp = self.user_emb_mlp(user)
        i_mlp = self.item_emb_mlp(item)
        mlp_input = torch.cat([u_mlp, i_mlp], dim=-1)
        mlp_input = self.bn1(mlp_input)
        mlp_out = self.mlp(mlp_input)
        
        # Fusion
        fusion_input = torch.cat([gmf_out, mlp_out], dim=-1)
        output = self.fusion(fusion_input)
        
        return output.squeeze()

# Train Neural Model
"""
Train the neural model using:
- Optimizer: Adam with weight decay (L2 regularization)
- Loss: Mean Squared Error (MSE) for rating prediction
- Learning rate scheduler: Reduces LR when loss plateaus
- Gradient clipping: Prevents exploding gradients
"""
print("\nTraining Enhanced Neural CF...")
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
neural_model = EnhancedNeuralCF(  
    len(user2idx), 
    len(movie2idx), 
    emb_size=200,  
    hidden_layers=[512, 256, 128, 64]
).to(device)

optimizer = torch.optim.Adam(neural_model.parameters(), lr=0.001, weight_decay=1e-5) 
criterion = nn.MSELoss()

# Learning rate scheduler
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
    optimizer, mode='min', factor=0.5, patience=3, verbose=True
)

epochs = 60  
best_loss = float('inf')
patience_counter = 0
early_stop_patience = 10

for epoch in range(epochs):
    neural_model.train() 
    total_loss = 0
    for u, m, r in train_loader:
        u, m, r = u.to(device), m.to(device), r.to(device)
        optimizer.zero_grad()
        pred = neural_model(u, m) 
        loss = criterion(pred, r)
        loss.backward()
        
        # Gradient clipping
        torch.nn.utils.clip_grad_norm_(neural_model.parameters(), max_norm=1.0)  
        
        optimizer.step()
        total_loss += loss.item() * len(r)
    
    avg_loss = total_loss / len(train_dataset)
    rmse = np.sqrt(avg_loss)
    
    # Update learning rate
    scheduler.step(rmse)
    
    if (epoch + 1) % 5 == 0:
        print(f"Epoch {epoch+1}: Train RMSE = {rmse:.4f}")
    
    # Early stopping check
    if rmse < best_loss:
        best_loss = rmse
        patience_counter = 0
    else:
        patience_counter += 1
        if patience_counter >= early_stop_patience:
            print(f"Early stopping at epoch {epoch+1}")
            break

print(f"✓ Enhanced Neural CF trained. Best RMSE: {best_loss:.4f}")



Training Enhanced Neural CF...




Epoch 5: Train RMSE = 0.9090
Epoch 10: Train RMSE = 0.7981
Epoch 15: Train RMSE = 0.6974
Epoch 20: Train RMSE = 0.6360
Epoch 25: Train RMSE = 0.5902
Epoch 30: Train RMSE = 0.5504
Epoch 35: Train RMSE = 0.5121
Epoch 40: Train RMSE = 0.4769
Epoch 45: Train RMSE = 0.4505
Epoch 50: Train RMSE = 0.4331
Epoch 55: Train RMSE = 0.4213
Epoch 60: Train RMSE = 0.4121
✓ Enhanced Neural CF trained. Best RMSE: 0.4121


In [11]:
# ============================================================================
# 8. RECOMMENDATION FUNCTIONS (FOR EVALUATION)
# ============================================================================
"""
Define recommendation functions for each model.
All functions follow the same interface:
- Input: user_id, N (number of recommendations)
- Output: List of N movie IDs
- Behavior: Exclude movies the user has already seen
"""
def recommend_random(user_id, N=10):
    """
    Random baseline: Recommend N random unseen movies.
    
    This serves as the worst-case baseline. Any reasonable model should
    significantly outperform random recommendations.
    
    Args:
        user_id: User ID to generate recommendations for
        N: Number of recommendations to return
        
    Returns:
        List of N randomly selected movie IDs
        """
    seen = set(train_ratings[train_ratings['user_id']==user_id]['movie_id'])
    candidates = list(set(train_movie_ids)-seen)
    chosen = np.random.choice(candidates, size=min(N,len(candidates)), replace=False)
    return [(mid, movies[movies['movie_id']==mid]['title'].values[0]) for mid in chosen]

def recommend_popular(user_id, N=10):
     """
    Popularity baseline: Recommend N most popular unseen movies.
    
    "Popular" means movies with the most ratings. This is a simple but
    effective non-personalized baseline.
    
    Args:
        user_id: User ID to generate recommendations for
        N: Number of recommendations to return
        
    Returns:
        List of N most popular movie IDs (excluding already seen)
    """
    movie_counts_train = train_ratings.groupby('movie_id').size().sort_values(ascending=False)
    seen = set(train_ratings[train_ratings['user_id']==user_id]['movie_id'])
    recs = []
    for mid in movie_counts_train.index:
        if mid not in seen:
            recs.append((mid, movies[movies['movie_id']==mid]['title'].values[0]))
        if len(recs) >= N:
            break
    return recs

def recommend_itemcf_train(user_id, N=10, top_k=20):
     """
    Item-based Collaborative Filtering recommendations.
    
    Uses item-item similarity to find movies similar to those the user liked.
    Falls back to popularity if unable to generate predictions.
    
    Args:
        user_id: User ID to generate recommendations for
        N: Number of recommendations to return
        
    Returns:
        List of N movie IDs ranked by predicted rating
    """
    if user_id not in user_map_train:
        return []
    uidx = user_map_train[user_id]
    user_rated = train_ratings[train_ratings['user_idx']==uidx][['movie_idx','rating']]
    
    scores = np.zeros(n_items_train)
    for _, row in user_rated.iterrows():
        midx = row['movie_idx']
        rating = row['rating']
        sim_scores = item_similarity_train[midx]
        top_neighbors = np.argsort(sim_scores)[::-1][:top_k]
        scores[top_neighbors] += sim_scores[top_neighbors]*rating

    seen = set(user_rated['movie_idx'])
    scores[list(seen)] = -1e9
    
    top_items = np.argsort(scores)[::-1][:N]
    train_movie_list = sorted(train_movie_ids)
    return [(train_movie_list[i], movies[movies['movie_id']==train_movie_list[i]]['title'].values[0]) for i in top_items]

def recommend_svd(user_id, N=10):
    """
    SVD Matrix Factorization recommendations.
    
    Uses latent factor model to predict ratings for unseen movies.
    
    Args:
        user_id: User ID to generate recommendations for
        N: Number of recommendations to return
        
    Returns:
        List of N movie IDs ranked by predicted rating
    """
    seen = set(train_ratings[train_ratings['user_id']==user_id]['movie_id'])
    candidate_movies = set(ratings['movie_id']) - seen
    preds = []
    for mid in candidate_movies:
        est = svd.predict(user_id, mid).est
        preds.append((mid, est))
    preds.sort(key=lambda x: x[1], reverse=True)
    topN = preds[:N]
    return [(mid, movies[movies['movie_id']==mid]['title'].values[0]) for mid,_ in topN]

def recommend_neural(user_id, N=10):
     """
    Neural Collaborative Filtering recommendations.
    
    Uses deep learning model to predict ratings. Generates predictions for
    all movies in batch for efficiency.
    
    Args:
        user_id: User ID to generate recommendations for
        N: Number of recommendations to return
        
    Returns:
        List of N movie IDs ranked by predicted rating
    """
  
    neural_model.eval()
    
    if user_id not in user2idx:
        return []
    
    uid = torch.tensor([user2idx[user_id]]*len(movie2idx)).to(device)
    mid = torch.tensor(list(range(len(movie2idx)))).to(device)
    
    with torch.no_grad():
        preds = neural_model(uid, mid).cpu().numpy()
    
    idx2movie = {v: k for k, v in movie2idx.items()}
    
    seen = set(train_ratings[train_ratings['user_id'] == user_id]['movie_id'])
    recommendations = [(idx2movie[i], p) for i, p in enumerate(preds) if idx2movie[i] not in seen]
    recommendations.sort(key=lambda x: x[1], reverse=True)
    
    return [mid for mid, _ in recommendations[:N]]

def recommend_movies_eval(user_id, N=10, method="itemcf"):
    if method=="itemcf":
        return recommend_itemcf_train(user_id,N)
    elif method=="svd":
        return recommend_svd(user_id,N)
    elif method=="popular":
        return recommend_popular(user_id,N)
    elif method=="random":
        return recommend_random(user_id,N)


In [18]:
# ============================================================================
# 9. EVALUATION FUNCTIONS
# ============================================================================
"""
Implement standard evaluation metrics for top-N recommendation:

1. Precision@K: Fraction of recommended items that are relevant
   - Higher is better (max 1.0)
   - Measures recommendation accuracy
   
2. Recall@K: Fraction of relevant items that are recommended
   - Higher is better (max 1.0)
   - Measures coverage of relevant items
   
3. NDCG@K: Normalized Discounted Cumulative Gain
   - Higher is better (max 1.0)
   - Measures ranking quality (penalizes relevant items lower in list)
"""
def recommend_movies(user_id, N=10, method="popular"):
    """
    Unified recommendation function with movie titles
    """
    # Get recommendations
    if method.lower() == "random":
        recs = recommend_random(user_id, N=N)
    elif method.lower() == "popular":
        recs = recommend_popular(user_id, N=N)
    elif method.lower() == "itemcf":
        recs = recommend_itemcf_train(user_id, N=N)
    elif method.lower() == "svd":
        recs = recommend_svd(user_id, N=N)
    elif method.lower() == "neural":
        if user_id not in user2idx:
            return []
        recs = recommend_neural(user_id, N=N)
    else:
        raise ValueError(f"Unknown method: {method}")
    
    # Ensure recs is a flat list
    if isinstance(recs, np.ndarray):
        recs = recs.flatten().tolist()
    
    # Build result with movie titles
    result = []
    for item in recs:
        # Handle different return formats
        if isinstance(item, (tuple, list)):
            mid = item[0]  # If it's (movie_id, score)
        else:
            mid = item  # If it's just movie_id
        
        # Convert to int if needed
        if isinstance(mid, np.ndarray):
            mid = int(mid.flatten()[0])
        else:
            mid = int(mid)
        
        # Get movie title
        try:
            movie_row = movies[movies['movie_id'] == mid]
            if not movie_row.empty:
                title = movie_row['title'].values[0]
                result.append((mid, title))
        except:
            continue
    
    return result


In [15]:
# ============================================================================
# 10. EVALUATION FUNCTIONS
# ============================================================================
def evaluate_model(method, k=10, n_users=500, wrapper=recommend_movies_eval):
    precisions, recalls, ndcgs = [], [], []
    sampled_users = np.random.choice(list(test_by_user.keys()), size=min(n_users,len(test_by_user)), replace=False)
    
    for uid in sampled_users:
        relevant = test_by_user[uid]
        recs = wrapper(uid,N=k,method=method)
        rec_ids = [mid for mid,_ in recs]
        hits = len(set(rec_ids) & set(relevant))
        precisions.append(hits/k)
        recalls.append(hits/len(relevant) if len(relevant)>0 else 0)
        dcg = sum(1/np.log2(i+2) for i,r in enumerate(rec_ids) if r in relevant)
        idcg = sum(1/np.log2(i+2) for i in range(min(len(relevant),k)))
        ndcgs.append(dcg/idcg if idcg>0 else 0)
    
    return {"Precision@K":np.mean(precisions),
            "Recall@K":np.mean(recalls),
            "NDCG@K":np.mean(ndcgs)}

def evaluate_neural(N=10):
    """Evaluate Neural CF"""
    precision_list = []
    recall_list = []
    ndcg_list = []
    
    for user_id in test_by_user.keys():
        if user_id not in user2idx:
            continue
            
        actual_items = set(test_by_user[user_id])
        if len(actual_items) == 0:
            continue
        
        try:
            pred_items = recommend_neural(user_id, N=N)  # Returns list of movie IDs
        except:
            continue
        
        if len(pred_items) == 0:
            continue
        
        # pred_items is already a list of integers, no unpacking needed
        pred_items_set = set(pred_items)
        hits = len(pred_items_set & actual_items)
        
        precision = hits / N
        recall = hits / len(actual_items)
        
        dcg = sum([1 / np.log2(i+2) for i, item in enumerate(pred_items) if item in actual_items])
        idcg = sum([1 / np.log2(i+2) for i in range(min(len(actual_items), N))])
        ndcg = dcg / idcg if idcg > 0 else 0
        
        precision_list.append(precision)
        recall_list.append(recall)
        ndcg_list.append(ndcg)
    
    return {
        'Precision@K': np.mean(precision_list) if precision_list else 0,
        'Recall@K': np.mean(recall_list) if recall_list else 0,
        'NDCG@K': np.mean(ndcg_list) if ndcg_list else 0
    }


In [16]:
# ============================================================================
# 11. EVALUATION RESULTS
# ============================================================================
print("\n" + "="*50)
print("EVALUATION RESULTS")
print("="*50)
print("Random:", evaluate_model("random",k=10))
print("Popularity:", evaluate_model("popular",k=10))
print("ItemCF:", evaluate_model("itemcf",k=10))
print("SVD:", evaluate_model("svd",k=10))
print("Neural:", evaluate_neural(N=10))


EVALUATION RESULTS
Random: {'Precision@K': 0.010600000000000002, 'Recall@K': 0.0028904381753796027, 'NDCG@K': 0.010929535752877022}
Popularity: {'Precision@K': 0.1968, 'Recall@K': 0.06704184790838692, 'NDCG@K': 0.21951699133470318}
ItemCF: {'Precision@K': 0.32120000000000004, 'Recall@K': 0.133772256259019, 'NDCG@K': 0.3615290254134801}
SVD: {'Precision@K': 0.077, 'Recall@K': 0.024069515666332416, 'NDCG@K': 0.08446872786598195}
Neural: {'Precision@K': 0.07479297780722094, 'Recall@K': 0.023560072267141373, 'NDCG@K': 0.0845653725908253}


In [19]:
# ============================================================================
# 12. SAMPLE RECOMMENDATIONS
# ============================================================================
user_id = 123

print("\n" + "="*50)
print(f"SAMPLE RECOMMENDATIONS FOR USER {user_id}")
print("="*50)
print("Random:", recommend_movies(user_id, N=10, method="random"))
print("Popularity:", recommend_movies(user_id, N=10, method="popular"))
print("ItemCF:", recommend_movies(user_id, N=10, method="itemcf"))
print("SVD:", recommend_movies(user_id, N=10, method="svd"))
print("Neural:", recommend_movies(user_id, N=10, method="neural"))


SAMPLE RECOMMENDATIONS FOR USER 123
Random: [(188, 'Prophecy, The (1995)'), (1551, 'Buddy (1997)'), (3821, 'Nutty Professor II: The Klumps (2000)'), (1381, 'Grease 2 (1982)'), (2290, 'Stardust Memories (1980)'), (535, 'Short Cuts (1993)'), (2519, 'House on Haunted Hill (1958)'), (2053, 'Honey, I Blew Up the Kid (1992)'), (1798, 'Hush (1998)'), (1585, 'Love Serenade (1996)')]
Popularity: [(110, 'Braveheart (1995)'), (2396, 'Shakespeare in Love (1998)'), (1197, 'Princess Bride, The (1987)'), (2997, 'Being John Malkovich (1999)'), (2628, 'Star Wars: Episode I - The Phantom Menace (1999)'), (1, 'Toy Story (1995)'), (3578, 'Gladiator (2000)'), (919, 'Wizard of Oz, The (1939)'), (541, 'Blade Runner (1982)'), (34, 'Babe (1995)')]
ItemCF: [(1036, 'Die Hard (1988)'), (1197, 'Princess Bride, The (1987)'), (733, 'Rock, The (1996)'), (919, 'Wizard of Oz, The (1939)'), (1220, 'Blues Brothers, The (1980)'), (2100, 'Splash (1984)'), (1374, 'Star Trek: The Wrath of Khan (1982)'), (1084, 'Bonnie and C