# Collarative Filtering - On

# <u> Item-Based Collaborative Filtering </u>

## Import libraries

In [None]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics import mean_squared_error
import numpy as np

## Read CSV

In [None]:
# Why using ISO-8859-1?
# Having invalid start byte (not utf-8 caharacters) 
reviews = pd.read_csv("data/reviews.csv", encoding="ISO-8859-1")
print(reviews)

print("-------------------------------------------------------------------------")

animes = pd.read_csv("data/animes.csv", encoding="utf-8")
print(animes)

## Keep only needed columns

In [None]:
reviews = reviews[['profile', 'anime_uid', 'rating']].dropna()
print(reviews)

print("---------------------------------------------------------")
animes = animes[['uid', 'title']]
print(animes)

## Make connect map from review to animes

In [None]:
# To get anime title by using UID
anime_map = dict(zip(animes['uid'].astype(str), animes['title']))

## Replace Anime_UID with title

In [None]:
reviews['title'] = reviews['anime_uid'].astype(str).map(anime_map)

# Drop missing titles
reviews = reviews.dropna(subset=['title'])

print(reviews)

## Create Anime x User Rating Matrix

In [None]:
reviews['rating'] = pd.to_numeric(reviews['rating'], errors='coerce')

anime_user_matrix = reviews.pivot_table(
    index='title', 
    columns='profile', 
    values='rating'
).fillna(0)

print("Matrix shape:", anime_user_matrix.shape)

## Calculate Similarity

In [None]:
anime_similarity = cosine_similarity(anime_user_matrix)
anime_similarity_df = pd.DataFrame(
    anime_similarity, 
    index=anime_user_matrix.index, 
    columns=anime_user_matrix.index
)

print(anime_similarity_df)

## Recommender Function

In [None]:
print(animes.columns)

In [None]:
def recommend_similar_anime(anime_title, n=10):
    # Check the anime is in the df or not
    if anime_title not in anime_similarity_df.index:
        return pd.DataFrame(columns=["title", "similarity", "genre", "score", "synopsis", "link"])

    # Find Similarity
    # drop - remove self
    sim_scores = anime_similarity_df[anime_title].sort_values(ascending=False)
    sim_scores = sim_scores.drop(anime_title)

    # Get top - n
    # Avoid having error in display (missing or wrong text)
    top = sim_scores.head(n).reset_index() 
    top.columns = ["title", "similarity"]

    # Get extra data
    # Title, Genre, Score (Rating), Synopsis, Link
    available_cols = [c for c in ['title','genre','score','synopsis','link'] if c in animes.columns]
    anime_info = animes[available_cols].drop_duplicates(subset="title")

    # Combine similarity and Title
    result = top.merge(anime_info, on="title", how="left")
    return result

# Example
Anime_Selected = "Toradora!"
print("Item-Based Collaborative Filtering, if you choose", Anime_Selected)
print(recommend_similar_anime(Anime_Selected, 10))

# Setup
- Cosine Similarity

In [None]:
## Import libraries

import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics import mean_squared_error
import numpy as np

## Read CSV

# Why using ISO-8859-1?
# Having invalid start byte (not utf-8 caharacters) 
reviews = pd.read_csv("data/reviews.csv", encoding="ISO-8859-1")

animes = pd.read_csv("data/animes.csv", encoding="utf-8")

## Keep only needed columns

reviews = reviews[['profile', 'anime_uid', 'rating']].dropna()
animes = animes[['uid', 'title']]

## Make connect map from review to animes

# To get anime title by using UID
anime_map = dict(zip(animes['uid'].astype(str), animes['title']))

## Replace Anime_UID with title

reviews['title'] = reviews['anime_uid'].astype(str).map(anime_map)

# Drop missing titles
reviews = reviews.dropna(subset=['title'])

reviews['rating'] = pd.to_numeric(reviews['rating'], errors='coerce')

anime_user_matrix = reviews.pivot_table(
    index='title', 
    columns='profile', 
    values='rating'
).fillna(0)

anime_similarity = cosine_similarity(anime_user_matrix)
anime_similarity_df = pd.DataFrame(
    anime_similarity, 
    index=anime_user_matrix.index, 
    columns=anime_user_matrix.index
)

### Function
- Item-Based Collaborative Filtering

In [51]:
def recommend_based_on_anime_genre(anime_title, n=10):
    # Check the anime is in the df or not
    if anime_title not in anime_similarity_df.index:
        return pd.DataFrame(columns=["title", "similarity", "genre", "score", "synopsis", "link"])

    # Find Similarity
    # drop - remove self
    sim_scores = anime_similarity_df[anime_title].sort_values(ascending=False)
    sim_scores = sim_scores.drop(anime_title)

    # Get top - n
    # Avoid having error in display (missing or wrong text)
    top = sim_scores.head(n).reset_index() 
    top.columns = ["title", "similarity"]

    # Get extra data
    # Title, Genre, Score (Rating), Synopsis, Link
    available_cols = [c for c in ['title','genre','score','synopsis','link'] if c in animes.columns]
    anime_info = animes[available_cols].drop_duplicates(subset="title")

    # Combine similarity and Title
    result = top.merge(anime_info, on="title", how="left")
    return result

### Try Run Item-Based Collaborative Filtering

In [None]:
Anime_Selected = "Another"
print("Item-Based Collaborative Filtering, if you choose", Anime_Selected)
print(recommend_similar_anime(Anime_Selected, 10))

# <u> User-Based Collaborative Filtering </u>

### Function
- User-Based Collaborative Filtering

In [None]:
def recommend_based_on_anime_viewers(anime_titles, top_n_recommendations=10):
    
    # Find related anime_id
    anime_ids = []
    for title in anime_titles:
        anime_row = anime_df[anime_df['title'].str.lower() == title.lower()]
        if anime_row.empty:
            return f"❌ {title} not found."
        anime_ids.append(anime_row['uid'].values[0])

    # Set all anime id to Int64
    df['anime_id'] = pd.to_numeric(df['anime_id'], errors='coerce').astype('Int64')
    
    # Find all user that have viewed selected anime
    viewers_sets = []
    for anime_id in anime_ids:
        viewers = set(df[(df['anime_id'] == anime_id) & (df['rating'] > 0)]['user_id'].unique())
        viewers_sets.append(viewers)
    common_viewers = set.intersection(*viewers_sets)

    # If no user view the selected anime
    if not common_viewers:
        return "⚠️ No user viewed the selected anime"
    
    sampled_users = list(common_viewers) 
    
    # Recommend to user
    all_recommendations = pd.DataFrame()
    for user in sampled_users:
        result = recommend_anime_for_user(user, top_k_similar_users=5, top_n_recommendations=top_n_recommendations)
        if isinstance(result, pd.DataFrame):
            all_recommendations = pd.concat([all_recommendations, result], ignore_index=True)
    
    if all_recommendations.empty:
        return "⚠️ No recommend result"
    
    # remove repeated anime 
    grouped = all_recommendations.groupby('title').agg({'score': 'mean'}).reset_index()
    final = grouped.sort_values(by='score', ascending=False).head(top_n_recommendations)
    
    return final.reset_index(drop=True)

### Try Run User-Based Collaborative Filtering

In [None]:
print("User-Based Collaborative Filtering")
Selected_anime = ['Toradora!','Another']
print(Selected_anime)
print(recommend_based_on_anime_viewers(Selected_anime, 10))