# Collarative Filtering - On

# <u> Item-Based Collaborative Filtering </u>

## Import libraries

In [None]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics import mean_squared_error
import numpy as np

## Read CSV

In [None]:
# Why using ISO-8859-1?
# Having invalid start byte (not utf-8 caharacters) 
reviews = pd.read_csv("data/reviews.csv", encoding="ISO-8859-1")
print(reviews)

print("-------------------------------------------------------------------------")

animes = pd.read_csv("data/animes.csv", encoding="utf-8")
print(animes)

## Keep only needed columns

In [None]:
reviews = reviews[['profile', 'anime_uid', 'rating']].dropna()
print(reviews)

print("---------------------------------------------------------")
animes = animes[['uid', 'title']]
print(animes)

## Make connect map from review to animes

In [None]:
# To get anime title by using UID
anime_map = dict(zip(animes['uid'].astype(str), animes['title']))

## Replace Anime_UID with title

In [None]:
reviews['title'] = reviews['anime_uid'].astype(str).map(anime_map)

# Drop missing titles
reviews = reviews.dropna(subset=['title'])

print(reviews)

## Create Anime x User Rating Matrix

In [None]:
reviews['rating'] = pd.to_numeric(reviews['rating'], errors='coerce')

anime_user_matrix = reviews.pivot_table(
    index='title', 
    columns='profile', 
    values='rating'
).fillna(0)

print("Matrix shape:", anime_user_matrix.shape)

## Calculate Similarity

In [None]:
anime_similarity = cosine_similarity(anime_user_matrix)
anime_similarity_df = pd.DataFrame(
    anime_similarity, 
    index=anime_user_matrix.index, 
    columns=anime_user_matrix.index
)

print(anime_similarity_df)

## Recommender Function

In [None]:
print(animes.columns)

In [None]:
def recommend_similar_anime(anime_title, n=10):
    # Check the anime is in the df or not
    if anime_title not in anime_similarity_df.index:
        return pd.DataFrame(columns=["title", "similarity", "genre", "score", "synopsis", "link"])

    # Find Similarity
    # drop - remove self
    sim_scores = anime_similarity_df[anime_title].sort_values(ascending=False)
    sim_scores = sim_scores.drop(anime_title)

    # Get top - n
    # Avoid having error in display (missing or wrong text)
    top = sim_scores.head(n).reset_index() 
    top.columns = ["title", "similarity"]

    # Get extra data
    # Title, Genre, Score (Rating), Synopsis, Link
    available_cols = [c for c in ['title','genre','score','synopsis','link'] if c in animes.columns]
    anime_info = animes[available_cols].drop_duplicates(subset="title")

    # Combine similarity and Title
    result = top.merge(anime_info, on="title", how="left")
    return result

# Example
Anime_Selected = "Toradora!"
print("Recommendations, if you choose", Anime_Selected)
print(recommend_similar_anime(Anime_Selected, 10))

# Full Version
- Cosine Similarity
- Item-Based Collaborative Filtering

In [None]:
## Import libraries

import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics import mean_squared_error
import numpy as np

## Read CSV

# Why using ISO-8859-1?
# Having invalid start byte (not utf-8 caharacters) 
reviews = pd.read_csv("data/reviews.csv", encoding="ISO-8859-1")

animes = pd.read_csv("data/animes.csv", encoding="utf-8")

## Keep only needed columns

reviews = reviews[['profile', 'anime_uid', 'rating']].dropna()
animes = animes[['uid', 'title']]

## Make connect map from review to animes

# To get anime title by using UID
anime_map = dict(zip(animes['uid'].astype(str), animes['title']))

## Replace Anime_UID with title

reviews['title'] = reviews['anime_uid'].astype(str).map(anime_map)

# Drop missing titles
reviews = reviews.dropna(subset=['title'])

reviews['rating'] = pd.to_numeric(reviews['rating'], errors='coerce')

anime_user_matrix = reviews.pivot_table(
    index='title', 
    columns='profile', 
    values='rating'
).fillna(0)

anime_similarity = cosine_similarity(anime_user_matrix)
anime_similarity_df = pd.DataFrame(
    anime_similarity, 
    index=anime_user_matrix.index, 
    columns=anime_user_matrix.index
)

## Recommender Function

def recommend_similar_anime(anime_title, n=10):
    # Check the anime is in the df or not
    if anime_title not in anime_similarity_df.index:
        return pd.DataFrame(columns=["title", "similarity", "genre", "score", "synopsis", "link"])

    # Find Similarity
    # drop - remove self
    sim_scores = anime_similarity_df[anime_title].sort_values(ascending=False)
    sim_scores = sim_scores.drop(anime_title)

    # Get top - n
    # Avoid having error in display (missing or wrong text)
    top = sim_scores.head(n).reset_index() 
    top.columns = ["title", "similarity"]

    # Get extra data
    # Title, Genre, Score (Rating), Synopsis, Link
    available_cols = [c for c in ['title','genre','score','synopsis','link'] if c in animes.columns]
    anime_info = animes[available_cols].drop_duplicates(subset="title")

    # Combine similarity and Title
    result = top.merge(anime_info, on="title", how="left")
    return result

# Example
Anime_Selected = "Toradora!"
print("Recommendations, if you choose", Anime_Selected)
print(recommend_similar_anime(Anime_Selected, 10))

# <u> User-Based Collaborative Filtering </u>

In [208]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity

# 1. Load data
df = pd.read_csv("data/reviews.csv", encoding="ISO-8859-1")
anime_df = pd.read_csv("data/animes.csv", encoding="utf-8")

# 2. 清洗用户评分数据
df = df[['profile', 'anime_uid', 'rating']].rename(columns={
    'profile': 'user_id',
    'anime_uid': 'anime_id'
})
df['rating'] = pd.to_numeric(df['rating'], errors='coerce')
df = df.dropna(subset=['rating'])

# 3. 创建用户-动漫评分矩阵
user_anime_matrix = df.pivot_table(index='user_id', columns='anime_id', values='rating')
user_anime_matrix = user_anime_matrix.fillna(0)

# 4. 计算用户相似度（余弦相似度）
user_similarity = cosine_similarity(user_anime_matrix)
user_similarity_df = pd.DataFrame(user_similarity, index=user_anime_matrix.index, columns=user_anime_matrix.index)

# 5. 清洗 anime 数据，确保 UID 是唯一且为 int 类型
anime_df['uid'] = pd.to_numeric(anime_df['uid'], errors='coerce')
anime_df = anime_df.dropna(subset=['uid'])
anime_df['uid'] = anime_df['uid'].astype(int)
anime_df = anime_df.drop_duplicates(subset='uid')  # ✅ 避免推荐重复标题

# 6. 推荐函数
def recommend_anime_for_user(target_user_id, top_k_similar_users=5, top_n_recommendations=5):
    if target_user_id not in user_anime_matrix.index:
        return f"❌ 用户 {target_user_id} 不存在。"

    # 找到最相似的前 K 个用户
    similar_users = user_similarity_df[target_user_id].drop(index=target_user_id).sort_values(ascending=False)
    top_similar_users = similar_users.head(top_k_similar_users)

    # 获取目标用户已经看过的 anime_id
    watched = user_anime_matrix.loc[target_user_id]
    watched_anime_ids = watched[watched > 0].index.tolist()

    # 计算加权推荐分数
    scores = pd.Series(dtype=float)
    for other_user, sim_score in top_similar_users.items():
        other_ratings = user_anime_matrix.loc[other_user]
        for anime_id in user_anime_matrix.columns:
            if anime_id not in watched_anime_ids:
                scores[anime_id] = scores.get(anime_id, 0) + other_ratings[anime_id] * sim_score

    if scores.empty:
        return "⚠️ 没有找到任何可以推荐的动漫。"

    # 创建推荐列表
    recommendations_df = scores.sort_values(ascending=False).head(top_n_recommendations).reset_index().round(2)
    recommendations_df.columns = ['uid', 'score']
    recommendations_df['uid'] = pd.to_numeric(recommendations_df['uid'], errors='coerce').astype(int)

    # 合并标题信息
    merged = pd.merge(recommendations_df, anime_df[['uid', 'title', 'img_url', 'link']], on='uid', how='left')
    merged = merged.dropna(subset=['title'])  # 有些 uid 可能在 anime_df 没找到，过滤掉
    merged = merged.drop_duplicates(subset='title')  # 再次保险过滤重复标题

    return merged[['title', 'score']].reset_index(drop=True)

# ✅ 示例调用：为用户 DesolatePsyche 推荐动漫
print(recommend_anime_for_user("DesolatePsyche"))


                                               title  score
0  Shuumatsu Nani Shitemasu ka? Isogashii Desu ka...   2.12
1                                 Shokugeki no Souma   1.68
2                                            Another   1.03
3                        Shingeki! Kyojin Chuugakkou   0.71
4                     Fairy Tail Movie 2: Dragon Cry   0.71
