## 协同过滤推荐
使用了三种协同过滤推荐的算法并将三者混合

第一种：简单协同过滤

In [2]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity

df = pd.read_csv("collaborative_data.csv")

# 构造 pivot，模拟为只有一个用户对所有游戏评分
pivot = df.pivot_table(index='name', values='user_score')

# 计算 item-item 相似度
similarity_matrix = cosine_similarity(pivot.fillna(0))
similarity_df = pd.DataFrame(similarity_matrix, index=pivot.index, columns=pivot.index)

def recommend_simple_cf(game_name, top_n=5):
    if game_name not in similarity_df.index:
        return []
    sim_scores = similarity_df[game_name].sort_values(ascending=False)[1:top_n+1]
    return sim_scores.index.tolist()
print('done')

done


第二种：SVD协同过滤

In [5]:
!pip install scikit-surprise

Defaulting to user installation because normal site-packages is not writeable


In [7]:
from surprise import Dataset, Reader, SVD
from surprise.model_selection import train_test_split

# 构造模拟用户数据
df_svd = df.copy()
df_svd['user_id'] = df_svd.index % 1000  # 构造 1000 个模拟用户

reader = Reader(rating_scale=(0, 10))
data = Dataset.load_from_df(df_svd[['user_id', 'name', 'user_score']], reader)
trainset, testset = train_test_split(data, test_size=0.2)

model_svd = SVD()
model_svd.fit(trainset)

def recommend_svd(user_id, top_n=5):
    all_items = df['name'].unique()
    predictions = [model_svd.predict(user_id, iid) for iid in all_items]
    predictions.sort(key=lambda x: x.est, reverse=True)
    return [pred.iid for pred in predictions[:top_n]]
print('done')

done


基于KNN的协同过滤

In [9]:
from surprise import KNNWithMeans

#  清洗评分为0或缺失的行，避免除零错误
df_knn = df.dropna(subset=['user_score'])  # 避免 NaN
df_knn = df_knn[df_knn['user_score'] > 0]  # 避免评分为0

# 模拟用户ID（如之前那样 % 1000）
df_knn['user_id'] = df_knn.index % 1000

from surprise import Dataset, Reader

reader = Reader(rating_scale=(0, 10))
data = Dataset.load_from_df(df_knn[['user_id', 'name', 'user_score']], reader)
trainset = data.build_full_trainset()  # 使用全量数据而非 train_test_split

# 构建 KNN 协同过滤模型
sim_options = {
    'name': 'cosine',
    'user_based': False  # False = item-based
}

model_knn = KNNWithMeans(sim_options=sim_options, verbose=True)
model_knn.fit(trainset)

# 推荐函数：给定模拟用户推荐游戏
def recommend_knn(user_id, top_n=5):
    all_items = df_knn['name'].unique()
    predictions = [model_knn.predict(user_id, iid) for iid in all_items]
    predictions.sort(key=lambda x: x.est, reverse=True)
    return [pred.iid for pred in predictions[:top_n]]

print('done')

Computing the cosine similarity matrix...
Done computing similarity matrix.
done


混合以上三种

In [10]:
def hybrid_recommend(game_name, user_id=None, alpha=0.5, top_n=5):
    # 内容推荐得分
    content_scores = similarity_df[game_name].copy()
    
    # 协同推荐得分（模拟的 SVD 得分）
    all_items = df['name'].unique()
    collab_scores = {iid: model_svd.predict(user_id, iid).est for iid in all_items}
    
    # 加权混合
    hybrid_scores = {}
    for item in all_items:
        content_score = content_scores.get(item, 0)
        collab_score = collab_scores.get(item, 0)
        hybrid_scores[item] = alpha * content_score + (1 - alpha) * collab_score

    # 排序推荐
    ranked = sorted(hybrid_scores.items(), key=lambda x: x[1], reverse=True)
    return [item[0] for item in ranked[:top_n]]
print('done')

done


示例演示：

In [11]:
print("简单协同过滤推荐：", recommend_simple_cf("Wii Sports"))
print("SVD 推荐（用户 10）：", recommend_svd(10))
print("KNN 推荐（用户 10）：", recommend_knn(10))
print("混合推荐（用户 10）：", hybrid_recommend("Wii Sports", user_id=10))

简单协同过滤推荐： ['Rayman DS', 'Red Bull BC One', 'Record of Agarest War Zero', 'Record of Agarest War 2', 'Record of Agarest War']
SVD 推荐（用户 10）： ['The Witcher 3: Wild Hunt', 'Need for Speed: Most Wanted', 'Mega Man Anniversary Collection', 'LEGO Batman: The Videogame', 'Harry Potter and the Chamber of Secrets']
KNN 推荐（用户 10）： ['Breath of Fire III', 'Harvest Moon: Friends of Mineral Town', 'Boktai: The Sun is in Your Hand', 'Golden Sun: The Lost Age', 'MLB SlugFest Loaded']
混合推荐（用户 10）： ['The Witcher 3: Wild Hunt', 'Need for Speed: Most Wanted', 'Mega Man Anniversary Collection', 'LEGO Batman: The Videogame', 'Harry Potter and the Chamber of Secrets']
