# Setup

In [2]:
## Import libraries

import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics import mean_squared_error
import numpy as np

## Read CSV

# Why using ISO-8859-1?
# Having invalid start byte (not utf-8 caharacters) 
reviews = pd.read_csv("data/reviews.csv", encoding="ISO-8859-1")

animes = pd.read_csv("data/animes.csv", encoding="utf-8")

## Keep only needed columns

reviews = reviews[['profile', 'anime_uid', 'rating']].dropna()
animes = animes[['uid', 'title']]

## Make connect map from review to animes

# To get anime title by using UID
anime_map = dict(zip(animes['uid'].astype(str), animes['title']))

## Replace Anime_UID with title

reviews['title'] = reviews['anime_uid'].astype(str).map(anime_map)

# Drop missing titles
reviews = reviews.dropna(subset=['title'])

reviews['rating'] = pd.to_numeric(reviews['rating'], errors='coerce')

anime_user_matrix = reviews.pivot_table(
    index='title', 
    columns='profile', 
    values='rating'
).fillna(0)

anime_similarity = cosine_similarity(anime_user_matrix)
anime_similarity_df = pd.DataFrame(
    anime_similarity, 
    index=anime_user_matrix.index, 
    columns=anime_user_matrix.index
)

print("Setup - Done")

Done


# Precision/Recall/F1

In [15]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score, f1_score
import random

random.seed(42)
np.random.seed(42)

def leave_one_out_split(reviews):
    train, test = [], []
    for user, group in reviews.groupby("profile"):
        if len(group) < 2:  # 只有一条评分的用户直接全放训练集
            train.extend(group.to_dict("records"))
            continue
        # 随机抽 1 条作为测试，其余作为训练
        test_idx = random.choice(range(len(group)))
        for i, row in enumerate(group.to_dict("records")):
            if i == test_idx:
                test.append(row)
            else:
                train.append(row)
    return pd.DataFrame(train), pd.DataFrame(test)

def get_recommendations(anime_title, similarity_df, top_n=10):
    if anime_title not in similarity_df.index:
        return []
    sim_scores = similarity_df[anime_title].sort_values(ascending=False)
    return sim_scores.index[1:top_n+1].tolist()


def evaluate(reviews, anime_similarity_df, threshold=7, top_n=10):
    train, test = leave_one_out_split(reviews)

    y_true, y_pred = [], []

    for user, group in test.groupby("profile"):
        liked_animes = set(group[group["rating"] >= threshold]["title"])
        if not liked_animes:
            continue

        # 训练集里用户喜欢的作品作为推荐起点
        user_train_likes = train[(train["profile"] == user) & (train["rating"] >= threshold)]["title"].tolist()
        if not user_train_likes:
            continue

        recommended = set()
        for anime in user_train_likes:
            recommended.update(get_recommendations(anime, anime_similarity_df, top_n=top_n))

        # ground truth
        for anime in liked_animes:
            y_true.append(1)
            y_pred.append(1 if anime in recommended else 0)

    precision = precision_score(y_true, y_pred)
    recall = recall_score(y_true, y_pred)
    f1 = f1_score(y_true, y_pred)

    return precision, recall, f1

precision, recall, f1 = evaluate(reviews, anime_similarity_df, threshold=7, top_n=10)
print("Precision:", precision)
print("Recall:", recall)
print("F1:", f1)

Precision: 1.0
Recall: 0.16486005500728038
F1: 0.28305555555555556


# Get RMSE

In [16]:
from sklearn.metrics import mean_squared_error
import numpy as np

def evaluate_rmse(reviews, similarity_df, test_size=0.2):
    # 这里还是你之前的 train/test split 和预测逻辑...
    train, test = [], []
    for user, group in reviews.groupby("profile"):
        if len(group) < 2:
            train.extend(group.to_dict("records"))
            continue
        test_idx = np.random.choice(range(len(group)), size=max(1, int(len(group) * test_size)), replace=False)
        for i, row in enumerate(group.to_dict("records")):
            if i in test_idx:
                test.append(row)
            else:
                train.append(row)
    
    train = pd.DataFrame(train)
    test = pd.DataFrame(test)

    y_true, y_pred = [], []
    for _, row in test.iterrows():
        pred = predict_rating(row["profile"], row["title"], train, similarity_df)
        if not np.isnan(pred):
            y_true.append(row["rating"])
            y_pred.append(pred)

    # 手动算 RMSE
    mse = mean_squared_error(y_true, y_pred)
    rmse = np.sqrt(mse)
    return mse, rmse

# 调用
mse, rmse = evaluate_rmse(reviews, anime_similarity_df, test_size=0.2)
print("MSE:", mse)
print("RMSE:", rmse)

MSE: 4.997104233661449
RMSE: 2.235420370682313


# Overall

1. Precision = 1.0
- Almost all recommended anime were truly liked by users.
- The system did not include unrelated anime.

2. Recall ≈ 0.165
- Only around 16.5% of the anime that users actually liked were recommended.
- System’s coverage is relatively low.

3. F1 ≈ 0.283
- The F1 score reflects a balance between Precision and Recall.
- Although recommendations are highly accurate
- System cannot capture all of the user’s preferences.

4. MSE ≈ 5.00 & RMSE ≈ 2.24
- The difference between predicted and actual ratings is about 2.2 points on a 10-point scale.
- Rating prediction performance is only moderate
- The error is relatively large.