In [1]:
from surprise import Dataset, Reader, KNNBasic, accuracy
from surprise.model_selection import train_test_split
import pandas as pd

# 1. 数据准备

In [4]:
# load
df = pd.read_csv('data/userRatings.csv', header = None, names = ['user_id', 'item_id', 'rating'])

# 定义评分范围
reader = Reader(rating_scale = (1, 5))

# 转换为Surprise支持的数据集格式
data = Dataset.load_from_df(df[['user_id', 'item_id', 'rating']], reader)

In [5]:
# 划分训练集和测试集
train_set, test_set = train_test_split(data, test_size = 0.2, random_state = 42)

# 2. 训练UserCF模型

In [6]:
# 配置 UserCF：基于用户的协同过滤
sim_options = {
    'name': 'cosine',  # 相似度计算方式：余弦相似度
    'user_based': True  # True 表示 UserCF，False 表示 ItemCF
}

# 初始化模型
model = KNNBasic(sim_options = sim_options)

# 训练模型
model.fit(train_set)

Computing the cosine similarity matrix...
Done computing similarity matrix.


<surprise.prediction_algorithms.knns.KNNBasic at 0x2a026a599f0>

# 3. 预测和评估

In [8]:
# 对测试集进行预测
preditions = model.test(test_set)

# 计算RMSE和MAE
rmse = accuracy.rmse(preditions)
mae = accuracy.mae(preditions)

# 计算FCP(fraction of concordant pairs, 衡量排序一致性)
fcp = accuracy.fcp(preditions)

print(f'RMSE:{rmse}\n MAE:{mae}\n FCP:{fcp}')

RMSE: 0.9906
MAE:  0.7727
FCP:  0.6438
RMSE:0.9905753942755922
 MAE:0.7726829649410749
 FCP:0.6437823413421992
