In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

In [None]:
import os
import sys

# run this block once only
project_root = os.path.abspath("../../")  # 根据文件层级调整路径
os.chdir(project_root)

if project_root not in sys.path:
    sys.path.insert(0, project_root)

In [None]:
import pandas as pd
import numpy as np

# 读取数据
anime_data = pd.read_csv("anime_data.csv")
user_ratings = pd.read_csv("anime_info.csv")

# 清理 anime_data 数据
anime_data["popularity"] = anime_data["popularity"].str.replace("#", "").astype(int)  # 去掉 `#`
anime_data["members"] = anime_data["members"].str.replace(",", "").astype(int)  # 去掉 `,` 转为整数
anime_data["favorites"] = anime_data["favorites"].str.replace(",", "").astype(int)  # 去掉 `,` 转为整数

# 为番剧分配唯一 ID
anime_data["anime_id"] = anime_data.index

# 处理 genres 列，将每个类别分配一个整数 ID
all_genres = set(g for genre_list in anime_data["genres"].dropna() for g in genre_list.split(", "))
genre_to_id = {genre: idx for idx, genre in enumerate(all_genres)}
anime_data["genre_ids"] = anime_data["genres"].apply(
    lambda x: [genre_to_id[g] for g in x.split(", ")] if pd.notna(x) else []
)

# 清理 user_ratings 数据
user_ratings = user_ratings[user_ratings["rating"] != "-"]  # 移除无效评分
user_ratings["rating"] = user_ratings["rating"].astype(float)  # 转换评分为浮点数

# 为用户分配唯一 ID
user_to_id = {user: idx for idx, user in enumerate(user_ratings["username"].unique())}
user_ratings["user_id"] = user_ratings["username"].map(user_to_id)

# 为番剧名映射对应的 anime_id
anime_name_to_id = dict(zip(anime_data["title"], anime_data["anime_id"]))
user_ratings["anime_id"] = user_ratings["anime"].map(anime_name_to_id)

# 移除无效的番剧映射
user_ratings = user_ratings.dropna(subset=["anime_id"])
user_ratings["anime_id"] = user_ratings["anime_id"].astype(int)

# 准备用户特征
user_features = pd.DataFrame({
    "user_id": user_ratings["user_id"].unique(),
    "age": np.random.randint(18, 40, size=user_ratings["user_id"].nunique())  # 随机生成用户年龄
})

# 准备模型输入
model_input = user_ratings.merge(anime_data, on="anime_id", how="left")

# 提取用户特征和番剧特征
X = {
    "user_id": model_input["user_id"].values,
    "user_age": user_features.set_index("user_id").loc[model_input["user_id"]]["age"].values.reshape(-1, 1),
    "anime_id": model_input["anime_id"].values,
    "anime_meta": model_input[["score", "members", "favorites"]].values,
    "genre_id": model_input["genre_ids"].values
}

# 提取目标变量 (评分)
y = model_input["rating"].values

In [2]:
class AnimeRecommendationModel(nn.Module):
    def __init__(self, num_users, num_animes, genres_dim=10, embed_dim=32):
        super(AnimeRecommendationModel, self).__init__()
        
        # 用户和番剧的嵌入层
        self.user_embedding = nn.Embedding(num_users, embed_dim)  # 用户ID的embedding层
        self.anime_embedding = nn.Embedding(num_animes, embed_dim)  # 番剧ID的embedding层
        
        # 用户的其他特征 (如年龄) 全连接层
        self.user_age_fc = nn.Linear(1, 16)  # 将年龄映射到16维
        
        # 番剧的其他特征 (如评分、收藏数、成员数) 全连接层
        self.anime_meta_fc = nn.Linear(3, 16)  # 将番剧评分，收藏数，成员数映射到16维
        
        # 类别特征嵌入
        self.genre_embedding = nn.Embedding(genres_dim, 8)  # 假设有10类番剧类型，每个映射到8维
        
        # 全连接层
        self.fc1 = nn.Linear(embed_dim * 2 + 16 * 2 + 8, 128)  # 拼接后的输入size：用户和番剧嵌入，其他特征
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 1)  # 输出层，预测评分
        
    def forward(self, user_id, user_age, anime_id, anime_meta, genre_id):
        # 用户嵌入特征
        user_embed = self.user_embedding(user_id)  # (batch_size, embed_dim)
        user_age_embed = F.relu(self.user_age_fc(user_age))  # (batch_size, 16)
        
        # 番剧嵌入特征
        anime_embed = self.anime_embedding(anime_id)  # (batch_size, embed_dim)
        anime_meta_embed = F.relu(self.anime_meta_fc(anime_meta))  # (batch_size, 16)
        
        # 番剧类型嵌入
        genre_embed = self.genre_embedding(genre_id).mean(dim=1)  # (batch_size, 8), 对多个genre取均值
        
        # 拼接所有特征
        concat_features = torch.cat([user_embed, user_age_embed, anime_embed, anime_meta_embed, genre_embed], dim=1)
        
        # 全连接层
        x = F.relu(self.fc1(concat_features))
        x = F.relu(self.fc2(x))
        output = self.fc3(x)  # 输出评分
        
        return output