# Content Filtering Testing Notebook

# 0. Imports and Data Loading

In [8]:
import sys
import os

module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from database.db_helper import connect_to_db, close_connection

In [3]:
# Load the dataset from the database, postgre
def load_dataset():
    conn = connect_to_db()
    query = "SELECT * FROM anime_data.anime;"
    df = pd.read_sql(query, conn)
    close_connection(conn)
    return df

df = load_dataset()

Database connection established successfully.
Database connection closed.


  df = pd.read_sql(query, conn)


In [5]:
df.head()

Unnamed: 0,mal_id,title,synopsis,score,popularity,genres,themes,demographics,studios,producers,content_string
0,48824,Blue Archive PVs,Animated promotions for the mobile game Blue A...,7.46,7773,[Fantasy],[],[],[Yostar Pictures],[],Fantasy Animated promotions mobile game Blue...
1,2768,CLAMP in Wonderland 2,A short animation which features clips from al...,7.42,5155,"[Action, Comedy, Drama, Fantasy, Romance]",[Music],[],[Madhouse],[Victor Entertainment],Action Comedy Drama Fantasy Romance Music shor...
2,8514,Break Blade Movie 2: Ketsubetsu no Michi,Second Break Blade movie.,7.7,3017,"[Action, Fantasy]","[Adult Cast, Mecha, Military]",[Shounen],"[Production I.G, Xebec]","[KlockWorx, Flex Comix, Fields]",Action Fantasy Adult Cast Mecha Military Secon...
3,27633,Gekkan Shoujo Nozaki-kun Specials,Specials bundled with the Blu-ray/DVDs.,7.7,1753,"[Comedy, Romance]",[School],[],[Doga Kobo],[Media Factory],Comedy Romance School Specials bundled Blu ray...
4,57995,Bai Yao Pu 4th Season,Fourth season of Bai Yao Pu.,7.58,11702,"[Adventure, Mystery, Supernatural]",[Historical],[],[CMC Media],[bilibili],Adventure Mystery Supernatural Historical Four...


# 1. Calculate Similarity Between Anime

First convert text of content string into vectors.

In [6]:
tfidf = TfidfVectorizer(min_df=5, max_df=0.8, stop_words='english')
tfidf_matrix = tfidf.fit_transform(df['content_string'])
print("TF-IDF matrix shape:", tfidf_matrix.shape)

TF-IDF matrix shape: (2500, 5247)


Next calculate cosine similarity matrix.

In [7]:
cosine_sim = cosine_similarity(tfidf_matrix)
print("Cosine similarity matrix shape:", cosine_sim.shape)

Cosine similarity matrix shape: (2500, 2500)


# 2. Create a Recomendation Function

In [10]:
indices = pd.Series(df.index, index=df['mal_id'])
def get_recommendations(liked_ids, cosine_sim, data=df):
    liked_indices = [indices[mal_id] for mal_id in liked_ids if mal_id in indices]
    
    combined_scores = np.zeros(data.shape[0])
    for idx in liked_indices:
        combined_scores += cosine_sim[idx]

    sim_scores_with_indices = list(enumerate(combined_scores))
    sim_scores_with_indices = sorted(sim_scores_with_indices, key=lambda x: x[1], reverse=True)
    top_anime_indices = [i[0] for i in sim_scores_with_indices][:15] 
    recommended_indices = [idx for idx in top_anime_indices if idx not in liked_indices][:10]
    return data.iloc[recommended_indices][['mal_id', 'title', 'content_string']]

# 3. Testing

In [14]:
user_likes = [1, 15, 80, 52, 123, 20, 123, 45, 67, 89]  # Example user likes (mal_id)
user_recommendations = get_recommendations(user_likes, cosine_sim)
print(user_recommendations)

      mal_id                                              title  \
1645   50613     Rurouni Kenshin: Meiji Kenkaku Romantan (2023)   
805       82  Kidou Senshi Gundam 0080: Pocket no Naka no Se...   
1485      87             Kidou Senshi Gundam: Gyakushuu no Char   
986       85                           Kidou Senshi Zeta Gundam   
600    39592  Kidou Senshi Gundam: The Origin - Zenya Akai S...   
880    31973                    Kidou Senshi Gundam Thunderbolt   
1550      90                       Shin Kidou Senki Gundam Wing   
1334      93                           Kidou Senshi Gundam SEED   

                                         content_string  
1645  Action Historical Samurai late 19th century cr...  
805   Action Drama Sci Fi Mecha Military Space Alfre...  
1485  Action Drama Sci Fi Mecha Military Space year ...  
986   Action Drama Romance Sci Fi Mecha Military Spa...  
600   Action Sci Fi Mecha Military Space tragedy dec...  
880   Action Drama Sci Fi Mecha Military Space U