In [None]:
import pandas as pd
import numpy as np
from surprise import Dataset, Reader, SVD
from surprise.model_selection import train_test_split
from surprise import accuracy
from sklearn.metrics import ndcg_score
import warnings; warnings.filterwarnings('ignore')

print("All libraries imported – ready to go!")

In [None]:
# Brands + popularity weights from the 2024 Pinkbike Community Survey
brands = ['Specialized', 'Santa Cruz', 'Transition', 'Trek', 'Giant',
          'Pivot', 'Rocky Mountain', 'Forbidden', 'Commencal', 'Canyon']
popularity_weights = [0.20, 0.18, 0.16, 0.15, 0.05, 0.08, 0.07, 0.04, 0.04, 0.03]

# 500 fake users, each rating 3-5 brands
np.random.seed(42)                     # reproducible
num_users = 500
data = []

for user_id in range(1, num_users + 1):
    n = np.random.randint(3, 6)
    rated = np.random.choice(brands, n, replace=False)
    for brand in rated:
        w = popularity_weights[brands.index(brand)]
        rating = np.clip(np.random.normal(3 + 2*w, 0.5), 1, 5)
        data.append({'user_id': user_id, 'brand': brand,
                     'rating': round(rating)})

df = pd.DataFrame(data)
print(f"Generated {len(df)} ratings from {num_users} users")
df.head()

In [None]:
# Load into Surprise
reader = Reader(rating_scale=(1, 5))
data = Dataset.load_from_df(df[['user_id', 'brand', 'rating']], reader)
trainset, testset = train_test_split(data, test_size=0.2)

# Train matrix-factorization model
model = SVD()
model.fit(trainset)

# ---- Demo recommendation for user 1 ----
user_id = 1
all_brands = df['brand'].unique()
already_rated = df[df['user_id'] == user_id]['brand'].tolist()

preds = [model.predict(user_id, b) for b in all_brands
         if b not in already_rated]
top3 = sorted(preds, key=lambda x: x.est, reverse=True)[:3]

print(f"\nTop-3 recommendations for User {user_id}:")
for i, p in enumerate(top3, 1):
    print(f"  {i}. {p.iid}  →  {p.est:.2f}")

In [None]:
# Predictions on the hold-out test set
predictions = model.test(testset)
rmse = accuracy.rmse(predictions, verbose=False)

# nDCG per user (skip users with only one test item)
ndcgs = []
for uid in {p.uid for p in predictions}:
    ups = [p for p in predictions if p.uid == uid]
    if len(ups) > 1:
        true = [p.r_ui for p in ups]
        est  = [p.est  for p in ups]
        ndcgs.append(ndcg_score([true], [est]))

mean_ndcg = np.mean(ndcgs) if ndcgs else 0.0

print("\nEvaluation on 20% hold-out:")
print(f"  RMSE      : {rmse:.3f}")
print(f"  Mean nDCG : {mean_ndcg:.3f}")