In [1]:
import pandas as pd
import numpy as np
from sentence_transformers import SentenceTransformer

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Load the multilingual model
model = SentenceTransformer('intfloat/multilingual-e5-large-instruct')

In [3]:
# Example DataFrame with anime titles and reviews
data = {
    'anime_title': ['Naruto', 'One Piece', 'Attack on Titan', 'Bleach'],
    'review': [
        "Amazing action sequences and deep character development.",
        "A fun, adventurous journey with strong friendship themes.",
        "A dark and intense narrative with impressive visuals.",
        "Unique world-building and memorable character arcs."
    ]
}
df = pd.DataFrame(data)

# Pre-encode all anime reviews into embeddings
review_texts = df['review'].tolist()
review_embeddings = model.encode(review_texts, convert_to_tensor=True, normalize_embeddings=True)

# Prompt the user for an anime name
user_anime = input("Enter an anime name: ")

# Check if the anime is in the DataFrame
if user_anime not in df['anime_title'].values:
    print(f"Anime '{user_anime}' not found in the dataframe.")
else:
    # Get the index and review of the selected anime
    selected_index = df.index[df['anime_title'] == user_anime][0]
    query_review = df.loc[selected_index, 'review']
    
    # Encode the selected anime's review
    query_embedding = model.encode([query_review], convert_to_tensor=True, normalize_embeddings=True)
    
    # Compute similarity scores between the selected review and all reviews
    scores = (query_embedding @ review_embeddings.T) * 100  # cosine similarity scaled by 100
    scores_list = scores.tolist()[0]
    
    # Prepare similarity results excluding the selected anime itself
    similarity_results = []
    for idx, score in enumerate(scores_list):
        if idx != selected_index:
            similarity_results.append((df.loc[idx, 'anime_title'], score))
    
    # Sort results by similarity score in descending order
    similarity_results.sort(key=lambda x: x[1], reverse=True)
    
    # Display the similarity scores
    print(f"\nSimilarity scores between '{user_anime}' review and other anime reviews:")
    for title, score in similarity_results:
        print(f"{title}: {score:.2f}")



Similarity scores between 'Naruto' review and other anime reviews:
Bleach: 92.60
Attack on Titan: 90.72
One Piece: 86.78
