In [1]:
# 1) Popularity baseline (most-used / highest-average items)
import pandas as pd

# ratings: DataFrame with columns user,item,rating
def popularity_recommend(ratings, user_id, N=5):
    seen = set(ratings.loc[ratings.user==user_id, "item"])
    popular = (ratings.groupby("item")["rating"]
                      .agg(["count","mean"])
                      .sort_values(["count","mean"], ascending=[False, False])
                      .index)
    return [i for i in popular if i not in seen][:N]

# usage: popularity_recommend(ratings, user_id=123, N=5)


In [2]:
# 2) User-based CF (cosine similarity, neighborhood prediction)
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

def user_based_recommend(ratings, user_id, N=5, k=5):
    # build user-item matrix
    M = ratings.pivot_table(index="user", columns="item", values="rating").fillna(0)
    users = M.index.to_list(); items = M.columns.to_list()
    uid_to_idx = {u:i for i,u in enumerate(users)}
    user_vecs = M.values
    sim = cosine_similarity(user_vecs)                  # user-user similarity
    uidx = uid_to_idx[user_id]
    sims = sim[uidx]
    sims[uidx] = 0                                      # exclude self
    neigh = sims.argsort()[::-1][:k]
    # weighted score for each item
    scores = user_vecs[neigh].T.dot(sims[neigh])
    rated = set(ratings.loc[ratings.user==user_id, "item"])
    idx_sorted = np.argsort(scores)[::-1]
    recs = [items[i] for i in idx_sorted if items[i] not in rated][:N]
    return recs

# usage: user_based_recommend(ratings, user_id=123, N=5)



In [3]:
# 3) Item-based CF (cosine similarity on item vectors)
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

def item_based_recommend(ratings, user_id, N=5, k=5):
    M = ratings.pivot_table(index="user", columns="item", values="rating").fillna(0)
    items = M.columns.to_list(); users = M.index.to_list()
    item_vecs = M.T.values
    sim = cosine_similarity(item_vecs)                    # item-item similarity
    # user ratings
    user_row = M.loc[user_id].values
    scores = user_row.dot(sim)                           # score for every item
    rated = set(ratings.loc[ratings.user==user_id, "item"])
    idx_sorted = np.argsort(scores)[::-1]
    recs = [items[i] for i in idx_sorted if items[i] not in rated][:N]
    return recs

# usage: item_based_recommend(ratings, user_id=123, N=5)


In [4]:
# 4) Matrix Factorization (SVD via TruncatedSVD) â€” reconstruct scores
import numpy as np
from sklearn.decomposition import TruncatedSVD

def svd_recommend(ratings, user_id, N=5, n_components=20):
    M = ratings.pivot_table(index="user", columns="item", values="rating").fillna(0)
    users = M.index.to_list(); items = M.columns.to_list()
    uid_to_idx = {u:i for i,u in enumerate(users)}
    X = M.values
    svd = TruncatedSVD(n_components=min(n_components, min(X.shape)-1), random_state=42)
    U = svd.fit_transform(X)      # user factors
    SigmaVT = svd.components_     # components (approx V^T)
    X_hat = np.dot(U, SigmaVT)    # approximate ratings matrix
    uidx = uid_to_idx[user_id]
    scores = X_hat[uidx]
    rated = set(ratings.loc[ratings.user==user_id, "item"])
    idx_sorted = np.argsort(scores)[::-1]
    recs = [items[i] for i in idx_sorted if items[i] not in rated][:N]
    return recs

# usage: svd_recommend(ratings, user_id=123, N=5, n_components=20)
