# 推荐算法对比实验

本实验对比三种推荐算法在诗词推荐系统上的表现：

1. **Content-Based (CB)** - 基于 TF-IDF 内容相似度
2. **Item-Based CF** - 基于评分矩阵的物品协同过滤
3. **BERTopic Hybrid** - 你的系统 (语义向量 + User-CF + Item-CF)

## 评估指标
- MAE (平均绝对误差)
- Precision@K (准确率)
- Recall@K (召回率)
- F1-Score

In [None]:
import sys
import os
import numpy as np
import pandas as pd
import random
from collections import defaultdict

# 设置工作目录和路径
notebook_dir = os.path.dirname(os.path.abspath('comparison.ipynb'))
project_root = os.path.abspath(os.path.join(notebook_dir, '..', '..'))
sys.path.insert(0, project_root)
os.chdir(project_root)

from backend.core.content_recommender import ContentBasedRecommender
from backend.core.collaborative_filter import ItemBasedCFRecommender
from backend.core.bertopic_recommender import BertopicRecommender

print("依赖加载完成")

## 1. 数据准备

In [None]:
def generate_simulation_data(n_users=50, n_poems=100, n_ratings_per_user=15):
    """生成模拟的诗词评分数据"""
    
    poems = []
    poem_contents = [
        "明月几时有把酒问青天",
        "床前明月光疑是地上霜",
        "春风又绿江南岸明月何时照我还",
        "大漠孤烟直长河落日圆",
        "会当凌绝顶一览众山小",
        "海内存知己天涯若比邻",
        "落红不是无情物化作春泥更护花",
        "春蚕到死丝方尽蜡炬成灰泪始干",
        "山重水复疑无路柳暗花明又一村",
        "欲穷千里目更上一层楼"
    ]
    
    for i in range(n_poems):
        poems.append({
            'id': i,
            'title': f'诗词{i}',
            'content': poem_contents[i % len(poem_contents)] + f' 诗云作品第{i}首'
        })
    
    users = [{'id': i, 'username': f'user_{i}'} for i in range(n_users)]
    
    interactions = []
    
    user_preferences = [
        [0, 1, 2, 10, 11, 20, 21],
        [3, 4, 9, 12, 30, 31],
        [5, 6, 7, 8, 13, 14, 40, 41],
    ]
    
    for user_idx in range(n_users):
        pref_group = user_preferences[user_idx % len(user_preferences)]
        
        rated_items = random.sample(range(n_poems), min(n_ratings_per_user, n_poems))
        for item_idx in rated_items:
            base_rating = 3.0
            if item_idx % len(poem_contents) in pref_group:
                base_rating = 4.5
            
            rating = np.clip(base_rating + random.uniform(-1, 1), 1.0, 5.0)
            interactions.append({
                'user_id': user_idx,
                'poem_id': item_idx,
                'rating': round(rating, 1),
                'liked': rating >= 4.0
            })
    
    print(f"生成数据:")
    print(f"  用户数: {n_users}")
    print(f"  诗歌数: {n_poems}")
    print(f"  评分数: {len(interactions)}")
    
    return users, poems, interactions

users, poems, all_interactions = generate_simulation_data()

## 2. 划分训练集和测试集

In [None]:
def split_train_test(interactions, test_ratio=0.2):
    """划分训练集和测试集"""
    train_interactions = []
    test_interactions = []
    
    user_interactions = defaultdict(list)
    for inter in interactions:
        user_interactions[inter['user_id']].append(inter)
    
    for user_id, user_inters in user_interactions.items():
        n = len(user_inters)
        test_size = max(1, int(n * test_ratio))
        
        indices = list(range(n))
        random.shuffle(indices)
        test_indices = indices[:test_size]
        train_indices = indices[test_size:]
        
        for i in train_indices:
            train_interactions.append(user_inters[i])
        for i in test_indices:
            test_interactions.append(user_inters[i])
    
    return train_interactions, test_interactions

train_interactions, test_interactions = split_train_test(all_interactions)

print(f"训练集大小: {len(train_interactions)}")
print(f"测试集大小: {len(test_interactions)}")

## 3. 训练模型

In [None]:
print("=" * 60)
print("训练 Content-Based 模型...")
cb_recommender = ContentBasedRecommender()
cb_recommender.fit(poems)

print("\n训练 Item-CF 模型...")
item_cf_recommender = ItemBasedCFRecommender()
poem_ids = [p['id'] for p in poems]
item_cf_recommender.fit(train_interactions, poem_ids)

print("\n训练 BERTopic 模型...")
bertopic_recommender = BertopicRecommender()
bertopic_recommender.fit(poems, train_interactions)

print("\n所有模型训练完成!")

## 4. 评估函数

In [None]:
def calculate_metrics(recommender, train_interactions, test_interactions, poems, top_k=10, threshold=4.0):
    """计算评估指标"""
    
    user_test_data = defaultdict(list)
    for inter in test_interactions:
        user_test_data[inter['user_id']].append(inter)
    
    user_train_data = defaultdict(list)
    for inter in train_interactions:
        user_train_data[inter['user_id']].append(inter)
    
    total_mae = []
    total_precision = []
    total_recall = []
    total_f1 = []
    
    poem_id_to_idx = {p['id']: i for i, p in enumerate(poems)}
    
    # 判断推荐器类型
    is_bertopic = hasattr(recommender, 'bertopic_model')
    is_content_based = type(recommender).__name__ == 'ContentBasedRecommender'
    is_item_cf = type(recommender).__name__ == 'ItemBasedCFRecommender'
    
    for user_id, test_items in user_test_data.items():
        train_items = user_train_data.get(user_id, [])
        
        if not train_items:
            continue
        
        relevant_items = set(i['poem_id'] for i in test_items if i['rating'] >= threshold)
        
        if len(relevant_items) == 0:
            continue
        
        exclude_ids = set(i['poem_id'] for i in train_items)
        exclude_ids.update(set(i['poem_id'] for i in test_items))
        
        # 根据推荐器类型调用正确的API
        user_profile = None
        try:
            if is_bertopic:
                # BERTopic: recommend(user_interactions, all_interactions, top_k)
                recs = recommender.recommend(train_items, train_interactions, top_k)
            elif is_content_based:
                # Content-Based: 需要先构建用户画像
                rated_poems = []
                ratings = []
                for item in train_items:
                    poem_id = item['poem_id']
                    if poem_id in poem_id_to_idx:
                        poem = poems[poem_id_to_idx[poem_id]]
                        rated_poems.append(poem)
                        ratings.append(item['rating'])
                
                if rated_poems:
                    user_profile = recommender.get_user_profile(rated_poems, ratings)
                    recs = recommender.recommend(user_profile, exclude_ids, top_k)
                else:
                    recs = []
            elif is_item_cf:
                # Item-CF: recommend(user_interactions, exclude_ids, top_k)
                recs = recommender.recommend(train_items, exclude_ids, top_k)
            else:
                recs = []
        except Exception as e:
            print(f"  推荐异常: {e}")
            recs = []
        
        recommended_items = set(r['poem_id'] for r in recs) if recs else set()
        
        tp = len(recommended_items & relevant_items)
        fp = len(recommended_items - relevant_items)
        fn = len(relevant_items - recommended_items)
        
        precision = tp / (tp + fp) if (tp + fp) > 0 else 0
        recall = tp / (tp + fn) if (tp + fn) > 0 else 0
        f1 = 2 * precision * recall / (precision + recall) if (precision + recall) > 0 else 0
        
        total_precision.append(precision)
        total_recall.append(recall)
        total_f1.append(f1)
        
        # 预测评分计算MAE
        for test_item in test_items:
            try:
                if is_bertopic:
                    pred = recommender.predict_rating(train_items, test_item['poem_id'])
                elif is_content_based:
                    if user_profile is not None:
                        poem_idx = poem_id_to_idx.get(test_item['poem_id'])
                        if poem_idx is not None:
                            pred = recommender.predict_rating(user_profile, poem_idx)
                        else:
                            pred = 3.0
                    else:
                        pred = 3.0
                elif is_item_cf:
                    pred = recommender.predict_rating(train_items, test_item['poem_id'])
                else:
                    pred = 3.0
                    
                total_mae.append(abs(pred - test_item['rating']))
            except Exception as e:
                pass
    
    return {
        'mae': np.mean(total_mae) if total_mae else 0,
        'precision': np.mean(total_precision) if total_precision else 0,
        'recall': np.mean(total_recall) if total_recall else 0,
        'f1': np.mean(total_f1) if total_f1 else 0
    }

## 5. 运行实验

In [None]:
print("=" * 60)
print("开始评估...")
print("=" * 60)

top_k = 10
threshold = 4.0

print("\n[1] Content-Based 评估...")
cb_metrics = calculate_metrics(cb_recommender, train_interactions, test_interactions, poems, top_k, threshold)

print("[2] Item-CF 评估...")
item_cf_metrics = calculate_metrics(item_cf_recommender, train_interactions, test_interactions, poems, top_k, threshold)

print("[3] BERTopic Hybrid 评估...")
bertopic_metrics = calculate_metrics(bertopic_recommender, train_interactions, test_interactions, poems, top_k, threshold)

print("\n评估完成!")

## 6. 结果展示

In [None]:
import matplotlib.pyplot as plt

results = {
    'Content-Based': cb_metrics,
    'Item-CF': item_cf_metrics,
    'BERTopic Hybrid': bertopic_metrics
}

print("=" * 60)
print("实验结果对比")
print("=" * 60)

print(f"\n{'算法':<20} {'MAE':<10} {'Precision':<12} {'Recall':<10} {'F1':<10}")
print("-" * 60)

for name, metrics in results.items():
    print(f"{name:<20} {metrics['mae']:<10.4f} {metrics['precision']:<12.4f} {metrics['recall']:<10.4f} {metrics['f1']:<10.4f}")

print("\n" + "=" * 60)
print("结论: BERTopic Hybrid 在各指标上的表现")
print("=" * 60)

cb_mae = cb_metrics['mae']
item_cf_mae = item_cf_metrics['mae']
bertopic_mae = bertopic_metrics['mae']

if bertopic_mae < cb_mae and bertopic_mae < item_cf_mae:
    print(f"✓ MAE: BERTopic Hybrid 最优 ({bertopic_mae:.4f})")
else:
    print(f"✗ MAE: BERTopic Hybrid 为 {bertopic_mae:.4f}")

if bertopic_metrics['precision'] >= cb_metrics['precision'] and bertopic_metrics['precision'] >= item_cf_metrics['precision']:
    print(f"✓ Precision: BERTopic Hybrid 最优 ({bertopic_metrics['precision']:.4f})")
else:
    print(f"✗ Precision: BERTopic Hybrid 为 {bertopic_metrics['precision']:.4f}")

if bertopic_metrics['recall'] >= cb_metrics['recall'] and bertopic_metrics['recall'] >= item_cf_metrics['recall']:
    print(f"✓ Recall: BERTopic Hybrid 最优 ({bertopic_metrics['recall']:.4f})")
else:
    print(f"✗ Recall: BERTopic Hybrid 为 {bertopic_metrics['recall']:.4f}")

if bertopic_metrics['f1'] >= cb_metrics['f1'] and bertopic_metrics['f1'] >= item_cf_metrics['f1']:
    print(f"✓ F1: BERTopic Hybrid 最优 ({bertopic_metrics['f1']:.4f})")
else:
    print(f"✗ F1: BERTopic Hybrid 为 {bertopic_metrics['f1']:.4f}")

In [None]:
# 可视化对比
fig, axes = plt.subplots(1, 4, figsize=(16, 4))

metrics_names = ['mae', 'precision', 'recall', 'f1']
titles = ['MAE (越低越好)', 'Precision@K', 'Recall@K', 'F1-Score']

for idx, (metric, title) in enumerate(zip(metrics_names, titles)):
    ax = axes[idx]
    values = [results['Content-Based'][metric], results['Item-CF'][metric], results['BERTopic Hybrid'][metric]]
    colors = ['#3498db', '#e74c3c', '#2ecc71']
    
    bars = ax.bar(['CB', 'Item-CF', 'BERTopic'], values, color=colors)
    ax.set_title(title)
    ax.set_ylabel('Score')
    
    for bar, val in zip(bars, values):
        ax.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.01, 
                f'{val:.3f}', ha='center', va='bottom', fontsize=10)

plt.tight_layout()
plt.savefig('experiment_results.png', dpi=150, bbox_inches='tight')
plt.show()

print("\n图表已保存至 experiment_results.png")

## 7. 总结

本实验对比了三种推荐算法：

1. **Content-Based (CB)**: 基于 TF-IDF 特征的内容推荐
   - 优点: 冷启动问题小，不需要其他用户数据
   - 缺点: 只能推荐与历史相似的物品，难以发现新兴趣

2. **Item-Based CF**: 基于评分矩阵的物品协同过滤
   - 优点: 可以发现用户的潜在兴趣
   - 缺点: 冷启动问题严重，新物品难以被推荐

3. **BERTopic Hybrid**: 混合推荐系统
   - 结合语义向量 (BERTopic)
   - 用户协同过滤 (User-CF)
   - 物品协同过滤 (Item-CF)
   - 根据用户活跃度动态调整权重
   - **优势**: 兼顾内容相似性和用户行为相似性