# 1. Setup / imports (initialization)

In [None]:
# Core data libraries
import pandas as pd            # Data manipulation (DataFrame) and CSV I/O
import numpy as np             # Numerical arrays, random sampling and math utilities

# Surprise (scikit-surprise) — recommender-system algorithms and helpers
# - Dataset/Reader help load pandas DataFrames into Surprise format
# - SVD is the matrix-factorization algorithm used for recommendations
from surprise import Dataset, Reader, SVD
from surprise.model_selection import train_test_split
from surprise import accuracy     # RMSE and other evaluation helpers

# scikit-learn utilities
from sklearn.metrics import ndcg_score  # ranking metric used to evaluate top-k recommendations

# Suppress verbose or harmless warnings in notebook output to keep logs clean
import warnings; warnings.filterwarnings('ignore')

# Quick sanity print showing imports succeeded when this cell runs
print("All libraries imported – ready to go!")

# 2. Data generation (brands + synthetic ratings)

In [None]:
# Define the mountain bike brands and their market share weights
# Weights are based on hypothetical 2024 Pinkbike Community Survey results
# Higher weights (e.g., 0.20) indicate more popular brands that tend to get better ratings
brands = ['Specialized', 'Santa Cruz', 'Transition', 'Trek', 'Giant',
          'Pivot', 'Rocky Mountain', 'Forbidden', 'Commencal', 'Canyon']
popularity_weights = [0.20, 0.18, 0.16, 0.15, 0.05, 0.08, 0.07, 0.04, 0.04, 0.03]

# Generate synthetic rating data:
# - Each user rates 3-5 brands randomly (simulating real-world partial ratings)
# - More popular brands (higher weights) tend to get better ratings
# - Add some random noise to make it realistic
np.random.seed(42)                     # Set seed for reproducible results
num_users = 500                        # Number of synthetic users to generate
data = []

for user_id in range(1, num_users + 1):
    # Each user rates a random number (3-5) of brands
    n = np.random.randint(3, 6)
    rated = np.random.choice(brands, n, replace=False)  # No duplicate ratings per user
    
    for brand in rated:
        # Calculate rating based on brand popularity (weight) plus random noise:
        # - Base rating: 3 (neutral)
        # - Brand effect: 2 * weight (0.06 to 0.40 boost for brand popularity)
        # - Random noise: Normal dist. with σ=0.5
        # - Final rating clipped to valid range [1,5] and rounded
        w = popularity_weights[brands.index(brand)]
        rating = np.clip(np.random.normal(3 + 2*w, 0.5), 1, 5)
        data.append({'user_id': user_id, 
                    'brand': brand,
                    'rating': round(rating)})  # Round to whole numbers

# Convert to DataFrame and show summary
df = pd.DataFrame(data)
print(f"Generated {len(df)} ratings from {num_users} users")
# Display all ratings for example users 1, 10, 50 and 100
selected_users = user_ids if 'user_ids' in globals() else [1, 10, 50, 100]
df[df['user_id'].isin(selected_users)].sort_values(['user_id', 'brand']).reset_index(drop=True)

# 3. Export / summary (optional)

In [None]:
# Export the synthetic ratings dataset to CSV
# This makes it easy to analyze the data outside the notebook
df.to_csv('mtb_ratings.csv', index=False)
print("Dataset exported to mtb_ratings.csv")

# 4. Train & recommend (modeling cell)

In [None]:
# Load the synthetic DataFrame into Surprise's Dataset format
# Reader tells Surprise the rating scale used in the DataFrame
reader = Reader(rating_scale=(1, 5))
# Dataset.load_from_df expects columns in the order: user, item, rating
data = Dataset.load_from_df(df[['user_id', 'brand', 'rating']], reader)

# Split into training and test sets; test_size=0.2 uses 20% of ratings as hold-out
trainset, testset = train_test_split(data, test_size=0.2)

# Train a matrix-factorization model (SVD) on the training set
# SVD here is from surprise and implements a baseline matrix factorization algorithm
model = SVD()
model.fit(trainset)

# ---- Demo recommendations for a few sample users ----
# We choose a representative set of user ids across the range to demonstrate predictions
user_ids = [1, 10, 50, 100]  # Example user ids to inspect recommendations
all_brands = brands  # use the existing brands list generated earlier

# For each user, compute predictions for items the user has NOT rated and show top-3
for user_id in user_ids:
    # Extract brands this user already rated so we avoid recommending them
    already_rated = df[df['user_id'] == user_id]['brand'].tolist()

    # Predict rating for each brand the user hasn't rated yet
    preds = [model.predict(user_id, b) for b in all_brands if b not in already_rated]

    # Sort predictions by estimated rating and pick top 3
    top3 = sorted(preds, key=lambda x: x.est, reverse=True)[:3]

    # Print nicely
    print(f"\nTop-3 recommendations for User {user_id}:")
    for i, p in enumerate(top3, 1):
        # p.iid is the item id (brand), p.est is the predicted rating
        print(f"  {i}. {p.iid}  →  {p.est:.2f}")

# Note: the block below was previously duplicated in the cell. The loop above
# already prints recommendations for the selected users, so we won't repeat it.

# ---- Evaluation on the hold-out test set ----
# Use the trained model to predict ratings for the test set (the hold-out 20%)
predictions = model.test(testset)

# RMSE is a common accuracy metric for rating prediction
rmse = accuracy.rmse(predictions, verbose=False)

# Compute nDCG per user to measure ranking quality
# We'll skip users with only a single test item because nDCG requires at least 2 items
ndcgs = []
for uid in {p.uid for p in predictions}:
    # Collect all predictions for this user
    ups = [p for p in predictions if p.uid == uid]
    if len(ups) > 1:
        # true relevance scores and estimated scores
        true = [p.r_ui for p in ups]
        est  = [p.est  for p in ups]
        # ndcg_score expects a list-of-lists for multiple queries, so wrap in []
        ndcgs.append(ndcg_score([true], [est]))

# 5. Evaluation (evaluation cell)

In [None]:
# ---- Comprehensive Model Evaluation ----
# Test the model on the 20% hold-out set to evaluate performance
predictions = model.test(testset)

# 1. Rating Prediction Accuracy (RMSE)
# - Measures how close our predicted ratings are to actual ratings
# - Lower is better (range: 0 to 4 for 5-star scale)
rmse = accuracy.rmse(predictions, verbose=False)

# 2. Ranking Quality (nDCG)
# - Measures how well we order recommendations
# - Accounts for both relevance and position in ranked list
# - Range: 0 to 1 (1 = perfect ranking)
ndcgs = []
ndcg_at_k = []  # Also compute nDCG@3 for top-K evaluation

# Process predictions per user
for uid in {p.uid for p in predictions}:
    # Get all predictions for this user
    ups = [p for p in predictions if p.uid == uid]
    if len(ups) > 1:  # Need at least 2 items for meaningful ranking
        # Sort by actual and predicted ratings
        true = [p.r_ui for p in ups]
        est = [p.est for p in ups]
        
        # Calculate full nDCG
        ndcgs.append(ndcg_score([true], [est]))
        
        # Calculate nDCG@3 (for top-3 recommendations)
        if len(ups) >= 3:
            ndcg_at_k.append(ndcg_score([true[:3]], [est[:3]]))

# Calculate mean metrics
mean_ndcg = np.mean(ndcgs) if ndcgs else 0.0
mean_ndcg_at_3 = np.mean(ndcg_at_k) if ndcg_at_k else 0.0

# 3. Print Comprehensive Evaluation Results
print("\nModel Evaluation Results (20% hold-out):")
print("\nRating Prediction Accuracy:")
print(f"  RMSE        : {rmse:.3f}    # How accurate are the predicted ratings?")
print(f"                              # Target: < 1.2 for 5-star systems")

print("\nRanking Quality:")
print(f"  Mean nDCG   : {mean_ndcg:.3f}    # How well do we rank all items?")
print(f"  nDCG@3      : {mean_ndcg_at_3:.3f}    # How good are our top-3 recommendations?")
print(f"                              # Target: > 0.5 for useful recommendations")