In [2]:
import os
import requests
import pandas as pd
import torch
from sentence_transformers import SentenceTransformer

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
# Load the multilingual model
model = SentenceTransformer('intfloat/multilingual-e5-large-instruct')

In [4]:
def get_anime_id(anime_name):
    """
    Query the AniList API to find the anime ID for the given anime name.
    """
    url = "https://graphql.anilist.co"
    query = '''
    query ($search: String) {
      Media(search: $search, type: ANIME) {
        id
      }
    }
    '''
    variables = {'search': anime_name}
    response = requests.post(url, json={'query': query, 'variables': variables})
    if response.status_code == 200:
        data = response.json()
        return data.get('data', {}).get('Media', {}).get('id', None)
    else:
        print("Error querying AniList API for ID")
        return None

def get_anime_name(anime_id):
    """
    Query the AniList API to get the anime name given an anime id.
    Convert the anime_id to a standard Python int to avoid JSON serialization issues.
    """
    url = "https://graphql.anilist.co"
    query = '''
    query ($id: Int) {
      Media(id: $id, type: ANIME) {
        title {
          romaji
          english
          native
        }
      }
    }
    '''
    variables = {'id': int(anime_id)}  # Convert to Python int to ensure JSON serialization
    response = requests.post(url, json={'query': query, 'variables': variables})
    if response.status_code == 200:
        data = response.json()
        title_info = data.get('data', {}).get('Media', {}).get('title', {})
        # Prefer English title if available, else romaji, then native
        anime_name = title_info.get('english') or title_info.get('romaji') or title_info.get('native')
        return anime_name
    else:
        print("Error querying AniList API for anime name")
        return None

# Using the review to make recomendation

In [6]:
# Load the DataFrame which has columns: anime_id and top_review
df = pd.read_csv('./data/data.csv')


# Pre-compute or load review embeddings
embedding_file = './data/review_embeddings.pt'
review_texts = df['top_review'].tolist()

if os.path.exists(embedding_file):
    print("Loading pre-computed review embeddings...")
    review_embeddings = torch.load(embedding_file)
else:
    print("Computing review embeddings...")
    review_embeddings = model.encode(review_texts, convert_to_tensor=True, normalize_embeddings=True)
    torch.save(review_embeddings, embedding_file)

# Prompt the user for an anime name
user_anime = input("Enter an anime name: ")

# Use AniList API to get the anime id for the given anime name
anime_id = get_anime_id(user_anime)
if anime_id is None:
    print(f"Anime '{user_anime}' not found on AniList.")
else:
    # Check if the anime id exists in the DataFrame
    if anime_id not in df['anime_id'].values:
        print(f"Anime '{user_anime}' with id {anime_id} not found in the dataframe.")
    else:
        # Get the index and review corresponding to the anime_id in the DataFrame
        selected_index = df.index[df['anime_id'] == anime_id][0]
        query_review = df.loc[selected_index, 'top_review']
        
        # Encode the review for the selected anime
        query_embedding = model.encode([query_review], convert_to_tensor=True, normalize_embeddings=True)
        
        # Compute cosine similarity scores between the selected review and all other reviews
        scores = (query_embedding @ review_embeddings.T) * 100  # cosine similarity scaled by 100
        scores_list = scores.tolist()[0]
        
        # Prepare similarity results, excluding the selected anime itself
        similarity_results = []
        for idx, score in enumerate(scores_list):
            if idx != selected_index:
                similarity_results.append((df.loc[idx, 'anime_id'], score))
        
        # Sort the results by similarity score in descending order
        similarity_results.sort(key=lambda x: x[1], reverse=True)
        
        # Get the top 20 most similar anime and use the API to get their names
        top_20 = similarity_results[:20]
        print(f"\nTop 20 similar anime to '{user_anime}' (Anime id {anime_id}):")
        for other_anime_id, score in top_20:
            anime_name_result = get_anime_name(other_anime_id)
            if anime_name_result:
                print(f"{anime_name_result} (ID: {other_anime_id}) - Similarity: {score:.2f}")
            else:
                print(f"Anime ID {other_anime_id} - Similarity: {score:.2f} (Name not found)")


Loading pre-computed review embeddings...

Top 20 similar anime to 'attack on titan' (Anime id 16498):
Attack on Titan Final Season (ID: 110277) - Similarity: 89.84
Attack on Titan Season 3 Part 2 (ID: 104578) - Similarity: 89.23
Attack on Titan Season 3 (ID: 99147) - Similarity: 88.16
JoJo's Bizarre Adventure (TV) (ID: 14719) - Similarity: 87.93
Vinland Saga (ID: 101348) - Similarity: 87.91
Fullmetal Alchemist: Brotherhood (ID: 5114) - Similarity: 87.82
Tokyo Ghoul (ID: 20605) - Similarity: 87.48
Hunter x Hunter (2011) (ID: 11061) - Similarity: 87.44
Attack on Titan Final Season THE FINAL CHAPTERS Special 2 (ID: 162314) - Similarity: 87.38
Attack on Titan Season 2 (ID: 20958) - Similarity: 87.04
Steins;Gate (ID: 9253) - Similarity: 86.92
Attack on Titan Final Season THE FINAL CHAPTERS Special 1 (ID: 146984) - Similarity: 86.20
The Promised Neverland (ID: 101759) - Similarity: 86.02
Neon Genesis Evangelion (ID: 30) - Similarity: 85.96
Black Clover (ID: 97940) - Similarity: 85.89
Assass

# Using description to make recomendation

In [5]:
# Load the DataFrame which has columns: anime_id and description
df = pd.read_csv('./data/desc_data.csv')


# Pre-compute or load review embeddings
embedding_file = './data/review_embeddings2.pt'
review_texts = df['description'].tolist()

if os.path.exists(embedding_file):
    print("Loading pre-computed description embeddings...")
    review_embeddings = torch.load(embedding_file)
else:
    print("Computing description embeddings...")
    review_embeddings = model.encode(review_texts, convert_to_tensor=True, normalize_embeddings=True)
    torch.save(review_embeddings, embedding_file)

# Prompt the user for an anime name
user_anime = input("Enter an anime name: ")

# Use AniList API to get the anime id for the given anime name
anime_id = get_anime_id(user_anime)
if anime_id is None:
    print(f"Anime '{user_anime}' not found on AniList.")
else:
    # Check if the anime id exists in the DataFrame
    if anime_id not in df['anime_id'].values:
        print(f"Anime '{user_anime}' with id {anime_id} not found in the dataframe.")
    else:
        # Get the index and review corresponding to the anime_id in the DataFrame
        selected_index = df.index[df['anime_id'] == anime_id][0]
        query_review = df.loc[selected_index, 'description']
        
        # Encode the review for the selected anime
        query_embedding = model.encode([query_review], convert_to_tensor=True, normalize_embeddings=True)
        
        # Compute cosine similarity scores between the selected review and all other reviews
        scores = (query_embedding @ review_embeddings.T) * 100  # cosine similarity scaled by 100
        scores_list = scores.tolist()[0]
        
        # Prepare similarity results, excluding the selected anime itself
        similarity_results = []
        for idx, score in enumerate(scores_list):
            if idx != selected_index:
                similarity_results.append((df.loc[idx, 'anime_id'], score))
        
        # Sort the results by similarity score in descending order
        similarity_results.sort(key=lambda x: x[1], reverse=True)
        
        # Get the top 20 most similar anime and use the API to get their names
        top_20 = similarity_results[:20]
        print(f"\nTop 20 similar anime to '{user_anime}' (Anime id {anime_id}):")
        for other_anime_id, score in top_20:
            anime_name_result = get_anime_name(other_anime_id)
            if anime_name_result:
                print(f"{anime_name_result} (ID: {other_anime_id}) - Similarity: {score:.2f}")
            else:
                print(f"Anime ID {other_anime_id} - Similarity: {score:.2f} (Name not found)")


Computing description embeddings...

Top 20 similar anime to 'attack on titan' (Anime id 16498):
Attack on Titan Season 2 (ID: 20958) - Similarity: 91.52
Attack on Titan Final Season THE FINAL CHAPTERS Special 1 (ID: 146984) - Similarity: 89.61
Tokyo Revengers (ID: 120120) - Similarity: 89.53
Attack on Titan Final Season THE FINAL CHAPTERS Special 2 (ID: 162314) - Similarity: 88.81
Demon Slayer: Kimetsu no Yaiba (ID: 101922) - Similarity: 88.69
Attack on Titan: No Regrets (ID: 20811) - Similarity: 88.66
Attack on Titan Final Season (ID: 110277) - Similarity: 88.32
DARLING in the FRANXX (ID: 99423) - Similarity: 87.98
Attack on Titan Season 3 Part 2 (ID: 104578) - Similarity: 87.85
Dr. STONE (ID: 105333) - Similarity: 87.60
ALDNOAH.ZERO (ID: 20632) - Similarity: 87.43
A Lull in the Sea (Nagi-Asu: Nagi no Asukara) (ID: 16067) - Similarity: 87.31
God Eater (ID: 20849) - Similarity: 87.21
Record of Ragnarok (ID: 127399) - Similarity: 87.16
Haibane Renmei (ID: 387) - Similarity: 87.12
The W

In [7]:
import gc
import torch

# Remove the model reference
del model

# Force garbage collection
gc.collect()

# If you're using GPU, you can free unused GPU memory
torch.cuda.empty_cache()