# 候选集生成

## 1. 协同过滤

### 1.1 基于项目的协同过滤

In [1]:
import numpy as np
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity


In [2]:
# 创建用户-项目评分矩阵
ratings = {
    'Movie A': [5, 3, 4, 2, 5],
    'Movie B': [3, 1, 3, 1, 5],
    'Movie C': [4, 2, 4, 2, 5],
    'Movie D': [None, 3, 5, None, 4]
}
df = pd.DataFrame(ratings, index=['U1', 'U2', 'U3', 'U4', 'U5'])
print("用户-电影评分矩阵：")
print(df)

用户-电影评分矩阵：
    Movie A  Movie B  Movie C  Movie D
U1        5        3        4      NaN
U2        3        1        2      3.0
U3        4        3        4      5.0
U4        2        1        2      NaN
U5        5        5        5      4.0


In [3]:
# 计算电影之间的余弦相似度
item_similarity = cosine_similarity(df.T.fillna(0))
item_similarity_df = pd.DataFrame(item_similarity, index=df.columns, columns=df.columns)
print("\n电影之间的相似度:")
print(item_similarity_df)


电影之间的相似度:
          Movie A   Movie B   Movie C   Movie D
Movie A  1.000000  0.955994  0.990805  0.779646
Movie B  0.955994  1.000000  0.979970  0.801110
Movie C  0.990805  0.979970  1.000000  0.806893
Movie D  0.779646  0.801110  0.806893  1.000000


In [7]:
# 预测 U1 对电影 D 的评分
user_ratings = df.loc['U1'].dropna()  # 用户 U1 的评分记录
similar_items = item_similarity_df['Movie D'].drop('Movie D')  # 与电影 D 的相似度
weighted_sum = np.dot(user_ratings, similar_items)
similarity_sum = np.sum(similar_items)

print(user_ratings)
print(similar_items)
print(weighted_sum)
print(similarity_sum)
 
predicted_rating = weighted_sum / similarity_sum
print(f"\n预测 U1 对电影 D 的评分为: {predicted_rating:.2f}")

Movie A    5.0
Movie B    3.0
Movie C    4.0
Name: U1, dtype: float64
Movie A    0.779646
Movie B    0.801110
Movie C    0.806893
Name: Movie D, dtype: float64
9.529135225238912
2.3876498568306714

预测 U1 对电影 D 的评分为: 3.99
