In [1]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfVectorizer
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import warnings
warnings.filterwarnings('ignore')

pd.set_option('display.max_columns', None)

  from .autonotebook import tqdm as notebook_tqdm





In [2]:
import kagglehub
path = kagglehub.dataset_download("CooperUnion/anime-recommendations-database")
print("Path to dataset files:", path)
file_path = f"{path}/anime.csv"
df_anime = pd.read_csv(file_path)
df_anime.head()

Path to dataset files: C:\Users\ASUS\.cache\kagglehub\datasets\CooperUnion\anime-recommendations-database\versions\1


Unnamed: 0,anime_id,name,genre,type,episodes,rating,members
0,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,9.37,200630
1,5114,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV,64,9.26,793665
2,28977,Gintama°,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.25,114262
3,9253,Steins;Gate,"Sci-Fi, Thriller",TV,24,9.17,673572
4,9969,Gintama&#039;,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.16,151266


menghapus nilai nan

In [3]:
df_anime['genre'] = df_anime['genre'].fillna('')

Encode di kolom genre

In [4]:
model = SentenceTransformer('all-MiniLM-L6-v2')
genre_embeddings = model.encode(df_anime['genre'].tolist(), convert_to_tensor=True)

In [11]:
genre_embeddings

tensor([[ 0.0279, -0.1049,  0.0457,  ..., -0.0214,  0.0039,  0.0108],
        [ 0.0158, -0.0546, -0.0091,  ..., -0.0429, -0.0500,  0.0712],
        [-0.0754, -0.1026, -0.0215,  ..., -0.0219,  0.0050,  0.0343],
        ...,
        [-0.0443, -0.0638,  0.0439,  ...,  0.0337,  0.0472, -0.0632],
        [-0.0443, -0.0638,  0.0439,  ...,  0.0337,  0.0472, -0.0632],
        [-0.0443, -0.0638,  0.0439,  ...,  0.0337,  0.0472, -0.0632]])

In [5]:
similarity_matrix = cosine_similarity(genre_embeddings.cpu().numpy())

In [6]:
def recommend_anime(anime_name, df_anime, similarity_matrix, top_n=5):
    index = df_anime[df_anime['name'] == anime_name].index
    if len(index) == 0:
        return f"Anime '{anime_name}' tidak ditemukan."
    
    index = index[0]

    sim_scores = list(enumerate(similarity_matrix[index]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:top_n+1] 
    
    return df_anime.iloc[[i[0] for i in sim_scores]][['name', 'genre']]


In [15]:
recommend_anime("Steins;Gate", df_anime, similarity_matrix)

Unnamed: 0,name,genre
59,Steins;Gate Movie: Fuka Ryouiki no Déjà vu,"Sci-Fi, Thriller"
126,Steins;Gate: Oukoubakko no Poriomania,"Sci-Fi, Thriller"
196,Steins;Gate: Kyoukaimenjou no Missing Link - D...,"Sci-Fi, Thriller"
10898,Steins;Gate 0,"Sci-Fi, Thriller"
5525,Loups=Garous,"Mystery, Sci-Fi, Thriller"


In [16]:
from langchain.chat_models import ChatOpenAI
from langchain.prompts import PromptTemplate

llm = ChatOpenAI(temperature=0.3, model_name="gpt-4")

def explain_recommendation(anime_name, df_anime, similarity_matrix, top_n=5):
    top_df = recommend_anime(anime_name, df_anime, similarity_matrix, top_n)
    if isinstance(top_df, str):
        return top_df 

    rekomendasi_teks = "\n".join([
        f"{row['name']} (Genre: {row['genre']})"
        for _, row in top_df.iterrows()
    ])

    # Buat prompt untuk LLM
    prompt = f"""
Kamu adalah ahli anime.

User menyukai anime berjudul: "{anime_name}".

Berikut adalah beberapa anime lain yang mirip secara genre dan gaya:

{rekomendasi_teks}

🎯 Jelaskan secara singkat kenapa anime-anime ini cocok direkomendasikan untuk penggemar "{anime_name}".
"""
    # Panggil LLM
    response = llm.invoke(prompt)
    return response.content


In [18]:
print(explain_recommendation("Steins;Gate", df_anime, similarity_matrix))

Anime-anime ini cocok direkomendasikan untuk penggemar "Steins;Gate" karena mereka memiliki genre dan gaya yang sama. 

Steins;Gate Movie: Fuka Ryouiki no Déjà vu, Steins;Gate: Oukoubakko no Poriomania, dan Steins;Gate: Kyoukaimenjou no Missing Link - Divide By Zero adalah bagian dari seri Steins;Gate, jadi mereka pasti akan menarik bagi penggemar anime aslinya. Mereka semua memiliki elemen sci-fi dan thriller yang sama yang membuat Steins;Gate begitu menarik, termasuk teori konspirasi, perjalanan waktu, dan misteri ilmiah.

Steins;Gate 0 adalah prekuel dari Steins;Gate dan memberikan lebih banyak konteks dan latar belakang untuk cerita dan karakter-karakternya. Jadi, jika penggemar Steins;Gate ingin mendalami lebih jauh dunia dan karakter yang mereka sukai, ini adalah pilihan yang bagus.

Loups=Garous juga adalah anime dengan genre Mystery, Sci-Fi, dan Thriller. Meskipun bukan bagian dari seri Steins;Gate, gaya dan tema yang sama dapat menarik bagi penggemar Steins;Gate. Anime ini ber