In [None]:
# Core data libraries
import pandas as pd            # Data manipulation (DataFrame) and CSV I/O
import numpy as np             # Numerical arrays, random sampling and math utilities

# Surprise (scikit-surprise) — recommender-system algorithms and helpers
# - Dataset/Reader help load pandas DataFrames into Surprise format
# - SVD is the matrix-factorization algorithm used for recommendations
from surprise import Dataset, Reader, SVD
from surprise.model_selection import train_test_split
from surprise import accuracy     # RMSE and other evaluation helpers

# scikit-learn utilities
from sklearn.metrics import ndcg_score  # ranking metric used to evaluate top-k recommendations

# Suppress verbose or harmless warnings in notebook output to keep logs clean
import warnings; warnings.filterwarnings('ignore')

# Quick sanity print showing imports succeeded when this cell runs
print("All libraries imported – ready to go!")

In [None]:
# Define the mountain bike brands and their market share weights
# Weights are based on hypothetical 2024 Pinkbike Community Survey results
# Higher weights (e.g., 0.20) indicate more popular brands that tend to get better ratings
brands = ['Specialized', 'Santa Cruz', 'Transition', 'Trek', 'Giant',
          'Pivot', 'Rocky Mountain', 'Forbidden', 'Commencal', 'Canyon']
popularity_weights = [0.20, 0.18, 0.16, 0.15, 0.05, 0.08, 0.07, 0.04, 0.04, 0.03]

# Generate synthetic rating data:
# - Each user rates 3-5 brands randomly (simulating real-world partial ratings)
# - More popular brands (higher weights) tend to get better ratings
# - Add some random noise to make it realistic
np.random.seed(42)                     # Set seed for reproducible results
num_users = 500                        # Number of synthetic users to generate
data = []

for user_id in range(1, num_users + 1):
    # Each user rates a random number (3-5) of brands
    n = np.random.randint(3, 6)
    rated = np.random.choice(brands, n, replace=False)  # No duplicate ratings per user
    
    for brand in rated:
        # Calculate rating based on brand popularity (weight) plus random noise:
        # - Base rating: 3 (neutral)
        # - Brand effect: 2 * weight (0.06 to 0.40 boost for brand popularity)
        # - Random noise: Normal dist. with σ=0.5
        # - Final rating clipped to valid range [1,5] and rounded
        w = popularity_weights[brands.index(brand)]
        rating = np.clip(np.random.normal(3 + 2*w, 0.5), 1, 5)
        data.append({'user_id': user_id, 
                    'brand': brand,
                    'rating': round(rating)})  # Round to whole numbers

# Convert to DataFrame and show summary
df = pd.DataFrame(data)
print(f"Generated {len(df)} ratings from {num_users} users")
df.head()  # Display first few entries

In [None]:
# Load the synthetic DataFrame into Surprise's Dataset format
# Reader tells Surprise the rating scale used in the DataFrame
reader = Reader(rating_scale=(1, 5))
# Dataset.load_from_df expects columns in the order: user, item, rating
data = Dataset.load_from_df(df[['user_id', 'brand', 'rating']], reader)

# Split into training and test sets; test_size=0.2 uses 20% of ratings as hold-out
trainset, testset = train_test_split(data, test_size=0.2)

# Train a matrix-factorization model (SVD) on the training set
# SVD here is from surprise and implements a baseline matrix factorization algorithm
model = SVD()
model.fit(trainset)

# ---- Demo recommendations for a few sample users ----
# We choose a representative set of user ids across the range to demonstrate predictions
user_ids = [1, 42, 100, 250, 500]  # Example user ids to inspect recommendations
all_brands = brands  # use the existing brands list generated earlier

# For each user, compute predictions for items the user has NOT rated and show top-3
for user_id in user_ids:
    # Extract brands this user already rated so we avoid recommending them
    already_rated = df[df['user_id'] == user_id]['brand'].tolist()

    # Predict rating for each brand the user hasn't rated yet
    preds = [model.predict(user_id, b) for b in all_brands if b not in already_rated]

    # Sort predictions by estimated rating and pick top 3
    top3 = sorted(preds, key=lambda x: x.est, reverse=True)[:3]

    # Print nicely
    print(f"\nTop-3 recommendations for User {user_id}:")
    for i, p in enumerate(top3, 1):
        # p.iid is the item id (brand), p.est is the predicted rating
        print(f"  {i}. {p.iid}  →  {p.est:.2f}")

# Note: the block below was previously duplicated in the cell. The loop above
# already prints recommendations for the selected users, so we won't repeat it.

# ---- Evaluation on the hold-out test set ----
# Use the trained model to predict ratings for the test set (the hold-out 20%)
predictions = model.test(testset)

# RMSE is a common accuracy metric for rating prediction
rmse = accuracy.rmse(predictions, verbose=False)

# Compute nDCG per user to measure ranking quality
# We'll skip users with only a single test item because nDCG requires at least 2 items
ndcgs = []
for uid in {p.uid for p in predictions}:
    # Collect all predictions for this user
    ups = [p for p in predictions if p.uid == uid]
    if len(ups) > 1:
        # true relevance scores and estimated scores
        true = [p.r_ui for p in ups]
        est  = [p.est  for p in ups]
        # ndcg_score expects a list-of-lists for multiple queries, so wrap in []
        ndcgs.append(ndcg_score([true], [est]))

mean_ndcg = np.mean(ndcgs) if ndcgs else 0.0

print("\nEvaluation on 20% hold-out:")
print(f"  RMSE      : {rmse:.3f}")
print(f"  Mean nDCG : {mean_ndcg:.3f}")

In [None]:
# ---- Model Evaluation on Hold-out Test Set ----
# Use the trained SVD model to predict ratings for all user-brand pairs in the test set
predictions = model.test(testset)

# Calculate Root Mean Square Error (RMSE)
# RMSE measures the average magnitude of prediction errors:
# - Lower is better (0 would be perfect predictions)
# - Typical values for 5-star rating systems: 0.8-1.2
rmse = accuracy.rmse(predictions, verbose=False)

# Calculate Normalized Discounted Cumulative Gain (nDCG)
# nDCG measures ranking quality:
# - Ranges from 0 to 1 (1 = perfect ranking)
# - Takes into account both relevance and position in ranked list
# - DCG penalizes relevant items appearing lower in the list
ndcgs = []

# Process predictions per user (we need multiple predictions per user for meaningful ranking)
for uid in {p.uid for p in predictions}:
    # Get all predictions for this user from test set
    ups = [p for p in predictions if p.uid == uid]
    
    # Skip users with only one test item (can't evaluate ranking with single item)
    if len(ups) > 1:
        # Extract ground truth ratings and model predictions
        true = [p.r_ui for p in ups]  # r_ui = real user rating
        est  = [p.est  for p in ups]  # est = estimated rating
        
        # Calculate nDCG for this user
        # scikit-learn expects [truth] and [prediction] as lists of lists
        # because it's designed for multiple queries/rankings
        ndcgs.append(ndcg_score([true], [est]))

# Calculate mean nDCG across all eligible users
# If no users had multiple test items, default to 0
mean_ndcg = np.mean(ndcgs) if ndcgs else 0.0

# Print evaluation metrics
print("\nEvaluation on 20% hold-out:")
print(f"  RMSE      : {rmse:.3f}   # How accurate are the predicted ratings?")
print(f"  Mean nDCG : {mean_ndcg:.3f}   # How good is the ranking quality?")