# Wide & Deep 电影推荐系统 (Kaggle 版本)

本系统使用 PyTorch 实现 Wide & Deep 推荐算法

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# 导入推荐系统
from wide_deep_recommender import MovieRecommender

# 设置显示
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 100)

print("导入成功！")

In [None]:
# Kaggle 数据路径
DATA_PATH = '/kaggle/input/ml-10m/ml-10M100K'

# 创建推荐系统
recommender = MovieRecommender(DATA_PATH)

# 准备数据
ratings, movies, train_data, user_stats, movie_features, all_genres = recommender.prepare_data()

In [None]:
print("="*60)
print("数据概览")
print("="*60)
print(f"用户数量: {ratings['user_id'].nunique():,}")
print(f"电影数量: {movies['movie_id'].nunique():,}")
print(f"评分数量: {len(ratings):,}")
print(f"电影类型: {len(all_genres)}")

## 构建和训练 Wide & Deep 模型

In [None]:
# 训练模型（Kaggle GPU 加速）
history = recommender.build_and_train(
    train_data, 
    user_stats, 
    movie_features, 
    all_genres,
    epochs=5,
    batch_size=2048
)

In [None]:
# 可视化训练结果
fig, axes = plt.subplots(1, 3, figsize=(18, 5))

axes[0].plot(history.history['loss'], label='训练损失', marker='o')
axes[0].plot(history.history['val_loss'], label='验证损失', marker='s')
axes[0].set_title('模型损失')
axes[0].set_xlabel('Epoch')
axes[0].set_ylabel('Loss')
axes[0].legend()
axes[0].grid(True, alpha=0.3)

axes[1].plot(history.history['accuracy'], label='训练准确率', marker='o')
axes[1].plot(history.history['val_accuracy'], label='验证准确率', marker='s')
axes[1].set_title('模型准确率')
axes[1].set_xlabel('Epoch')
axes[1].set_ylabel('Accuracy')
axes[1].legend()
axes[1].grid(True, alpha=0.3)

axes[2].plot(history.history['auc'], label='训练 AUC', marker='o')
axes[2].plot(history.history['val_auc'], label='验证 AUC', marker='s')
axes[2].set_title('模型 AUC')
axes[2].set_xlabel('Epoch')
axes[2].set_ylabel('AUC')
axes[2].legend()
axes[2].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

print(f"\n验证集 AUC: {history.history['val_auc'][-1]:.4f}")

## 生成推荐

In [None]:
# 设置推荐引擎
recommender.setup_engines(ratings, movies, movie_features, user_stats, all_genres)

# 为用户生成推荐
test_user_id = 1
recommended_movies, scores = recommender.recommend(test_user_id, top_k=10)

print(f"为用户 {test_user_id} 推荐的电影:")
for idx, (movie_id, score) in enumerate(zip(recommended_movies, scores), 1):
    movie_info = movies[movies['movie_id'] == movie_id].iloc[0]
    print(f"{idx}. {movie_info['title']} - 预测评分: {score:.4f}")

In [None]:
# 保存模型
recommender.save(
    model_path='wide_deep_model.pth',
    processor_path='processor.pkl'
)
print("模型保存成功！")