## Project - Animes Recomendation System

In [2]:
# Imports
import os 
import openai
import pandas as pd
import numpy as np
from dotenv import load_dotenv, find_dotenv

In [3]:
# Auth
load_dotenv(find_dotenv(), override=True)
openai.api_key = os.getenv('OPENAI_API_KEY')
from openai import OpenAI
client = OpenAI()

In [4]:
# Loading the Dataset into Pandas DataFrame

df = pd.read_csv('./anime.csv')
df.dropna(inplace=True)
df = df.sort_values('synopsis', ascending=False).head(2000)
df

Unnamed: 0,anime_id,title,score,rank,popularity,members,synopsis,start_date,end_date,type,episodes,image_url
7991,23499,Gakumon!: Ookami Shoujo wa Kujikenai,6.39,7994,11386,2686,"”Wereman"" Shushu's life is turned upside down ...",2014-01-01,2014-01-01,ONA,3.0,https://cdn.myanimelist.net/images/anime/10/79...
725,14807,Kara no Kyoukai Movie: Mirai Fukuin,7.98,730,2004,124165,"​Shiki Ryougi, Mikiya Kokutou, and Touko Aozak...",2013-01-01,2013-01-01,Movie,1.0,https://cdn.myanimelist.net/images/anime/6/566...
8774,12967,Arcana Famiglia,6.26,8779,1247,218656,"​On the island of Regalo, a group of supernatu...",2012-01-01,2012-01-01,TV,12.0,https://cdn.myanimelist.net/images/anime/9/394...
8532,34565,Jikan no Shihaisha,6.30,8535,2874,71153,"​Like many in her class, Koyuki Honda looks fo...",2017-01-01,2017-01-01,TV,13.0,https://cdn.myanimelist.net/images/anime/3/867...
1379,34914,New Game!!,7.69,1380,1017,269162,​It has been a year since Aoba Suzukaze starte...,2017-01-01,2017-01-01,TV,12.0,https://cdn.myanimelist.net/images/anime/4/867...
...,...,...,...,...,...,...,...,...,...,...,...,...
3862,34642,Shingeki no Bahamut: Genesis - Short Story,7.13,3869,4525,30071,The first short story focuses on Favaro and Ka...,2016-01-01,2017-01-01,ONA,2.0,https://cdn.myanimelist.net/images/anime/1581/...
6482,6069,Gregory Horror Show,6.64,6489,9316,4999,The first series is a set of 25 stories about ...,1999-01-01,2000-01-01,TV,25.0,https://cdn.myanimelist.net/images/anime/1811/...
7782,336,Ginyuu Mokushiroku Meine Liebe,6.43,7786,6136,14056,The first season begins with introducing the c...,2004-01-01,2005-01-01,TV,13.0,https://cdn.myanimelist.net/images/anime/11/75...
7332,58015,"One Piece: Dai Tannou Kikaku! ""Shi no Gekai"" T...",6.50,7339,10241,3734,The first recap special of Egghead arc that co...,2024-01-01,2024-01-01,TV Special,1.0,https://cdn.myanimelist.net/images/anime/1868/...


In [5]:
# Embedding Cost Calculator
import tiktoken
enc = tiktoken.encoding_for_model('text-embedding-3-small')
synopsis = list(df['synopsis'])
total_tokens = sum([len(enc.encode(item)) for item in synopsis])
print(f'Total Tokens:  {total_tokens}')
cost = total_tokens * (0.0004/1000)
print(f'Estimated cost in USD:  {cost:.10f}')

Total Tokens:  245813
Estimated cost in USD:  0.0983252000


In [6]:
# Calcute the embedding and cache locally

def get_embeddings_batch(texts, model="text-embedding-3-small"):
    response = client.embeddings.create(
        input=texts,
        model=model
    )
    return [item.embedding for item in response.data]


def get_embeddings_and_save(embedding_cache_file):
    texts = df["synopsis"].astype(str).tolist()
    embeddings = get_embeddings_batch(texts)

    df["embedding"] = embeddings
    df.to_csv(embedding_cache_file, index=False)

In [7]:
embedding_cache_file = "anime_embeddings.csv"
get_embeddings_and_save(embedding_cache_file)

In [8]:
# Load the Embeddings

embedding_cache_file = 'anime_embeddings.csv'
df_embeddings = pd.read_csv(embedding_cache_file)
df_embeddings['embedding'] = df_embeddings['embedding'].apply(eval).apply(np.array)

df_embeddings

Unnamed: 0,anime_id,title,score,rank,popularity,members,synopsis,start_date,end_date,type,episodes,image_url,embedding
0,23499,Gakumon!: Ookami Shoujo wa Kujikenai,6.39,7994,11386,2686,"”Wereman"" Shushu's life is turned upside down ...",2014-01-01,2014-01-01,ONA,3.0,https://cdn.myanimelist.net/images/anime/10/79...,"[0.026876891031861305, 0.04433649405837059, -0..."
1,14807,Kara no Kyoukai Movie: Mirai Fukuin,7.98,730,2004,124165,"​Shiki Ryougi, Mikiya Kokutou, and Touko Aozak...",2013-01-01,2013-01-01,Movie,1.0,https://cdn.myanimelist.net/images/anime/6/566...,"[0.011536391451954842, -0.002720263786613941, ..."
2,12967,Arcana Famiglia,6.26,8779,1247,218656,"​On the island of Regalo, a group of supernatu...",2012-01-01,2012-01-01,TV,12.0,https://cdn.myanimelist.net/images/anime/9/394...,"[0.022150903940200806, 0.004030340351164341, -..."
3,34565,Jikan no Shihaisha,6.30,8535,2874,71153,"​Like many in her class, Koyuki Honda looks fo...",2017-01-01,2017-01-01,TV,13.0,https://cdn.myanimelist.net/images/anime/3/867...,"[0.006677896715700626, 0.030547602102160454, -..."
4,34914,New Game!!,7.69,1380,1017,269162,​It has been a year since Aoba Suzukaze starte...,2017-01-01,2017-01-01,TV,12.0,https://cdn.myanimelist.net/images/anime/4/867...,"[0.0857013463973999, -0.011099456809461117, 0...."
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1995,34642,Shingeki no Bahamut: Genesis - Short Story,7.13,3869,4525,30071,The first short story focuses on Favaro and Ka...,2016-01-01,2017-01-01,ONA,2.0,https://cdn.myanimelist.net/images/anime/1581/...,"[0.0036711657885462046, -0.002555224345996976,..."
1996,6069,Gregory Horror Show,6.64,6489,9316,4999,The first series is a set of 25 stories about ...,1999-01-01,2000-01-01,TV,25.0,https://cdn.myanimelist.net/images/anime/1811/...,"[-0.02772725746035576, 0.015176418237388134, 0..."
1997,336,Ginyuu Mokushiroku Meine Liebe,6.43,7786,6136,14056,The first season begins with introducing the c...,2004-01-01,2005-01-01,TV,13.0,https://cdn.myanimelist.net/images/anime/11/75...,"[-0.013687578029930592, 0.0686635822057724, -0..."
1998,58015,"One Piece: Dai Tannou Kikaku! ""Shi no Gekai"" T...",6.50,7339,10241,3734,The first recap special of Egghead arc that co...,2024-01-01,2024-01-01,TV Special,1.0,https://cdn.myanimelist.net/images/anime/1868/...,"[0.05437568575143814, 0.029113752767443657, 0...."


In [9]:
# Get Recommendation from Title

def cosine_similarity(a, b):
    return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))


def get_recommendation_from_title(df_embeddings, title, k=5):

    # Verifica se o título existe
    if title not in df_embeddings["title"].values:
        print("Título não encontrado.")
        return None

    # Pega embedding do anime escolhido
    target_embedding = df_embeddings.loc[
        df_embeddings["title"] == title, "embedding"
    ].values[0]

    # Converter string para lista se estiver salvo como texto no CSV
    if isinstance(target_embedding, str):
        target_embedding = np.array(eval(target_embedding))
    else:
        target_embedding = np.array(target_embedding)

    similarities = []

    for idx, row in df_embeddings.iterrows():
        emb = row["embedding"]

        if isinstance(emb, str):
            emb = np.array(eval(emb))
        else:
            emb = np.array(emb)

        sim = cosine_similarity(target_embedding, emb)
        similarities.append((row["title"], sim))

    # Ordena por similaridade (maior primeiro)
    similarities = sorted(similarities, key=lambda x: x[1], reverse=True)

    # Remove o próprio anime
    similarities = [item for item in similarities if item[0] != title]

    return similarities[:k]


In [10]:
get_recommendation_from_title(df_embeddings, 'Boku', 10)

Título não encontrado.
