In [2]:
# Required libraries
import pandas as pd
import spacy
from nltk.stem import WordNetLemmatizer
from nltk.corpus import stopwords
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from tabulate import tabulate
import nltk

# Downloads (only run once)
nltk.download('wordnet')
nltk.download('stopwords')

[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [3]:
# Load tools
stop_words = set(stopwords.words('english'))
lemmatizer = WordNetLemmatizer()
nlp = spacy.load("en_core_web_sm")

# Load data
data = pd.read_csv('anime-dataset-2023.csv')
data = data[["Name", "Synopsis", "Genres"]].dropna()
data = data.set_index('Name')

# Preprocess genres into sets
data['Genre_Set'] = data['Genres'].apply(lambda x: set(g.strip().lower() for g in x.split(',')))

# Preprocess plot with spaCy + NLTK lemmatization
def preprocess_sentences(text):
    text = text.lower()
    doc = nlp(text)
    temp_sent = []
    for token in doc:
        if token.lemma_ not in stop_words and token.is_alpha:
            if token.pos_ == "VERB":
                lemmatized = lemmatizer.lemmatize(token.text, 'v')
            else:
                lemmatized = lemmatizer.lemmatize(token.text)
            temp_sent.append(lemmatized)
    return ' '.join(temp_sent)

data["Processed_Plot"] = data["Synopsis"].apply(preprocess_sentences)

# TF-IDF vectorization of processed plots
tfidfvec = TfidfVectorizer()
tfidf_matrix = tfidfvec.fit_transform(data["Processed_Plot"])

# Cosine similarity (plot)
plot_similarity = cosine_similarity(tfidf_matrix, tfidf_matrix)

# Index lookup
indices = pd.Series(range(len(data)), index=data.index)

# Genre similarity using Jaccard index
def jaccard_similarity(set1, set2):
    return len(set1 & set2) / len(set1 | set2) if set1 | set2 else 0

# Recommendation function
def recommend_anime(title, alpha=0.7):
    title_lower = title.lower()
    matched_titles = [name for name in data.index if name.lower() == title_lower]

    if not matched_titles:
        print(f"\n\033[1;31mAnime title '{title}' not found in dataset.\033[0m")
        return []

    base_title = matched_titles[0]
    base_index = indices[base_title]
    base_genres = data.iloc[base_index]['Genre_Set']

    combined_scores = []

    for i in range(len(data)):
        if i == base_index:
            continue
        candidate_title = data.index[i]

        # Filter out titles with the same base word (franchise)
        if title_lower in candidate_title.lower():
            continue

        genre_sim = jaccard_similarity(base_genres, data.iloc[i]['Genre_Set'])
        plot_sim = plot_similarity[base_index][i]
        total_sim = alpha * plot_sim + (1 - alpha) * genre_sim
        combined_scores.append((candidate_title, total_sim))

    # Sort and return top 10
    combined_scores.sort(key=lambda x: x[1], reverse=True)
    return [title for title, _ in combined_scores[:10]]


In [4]:
# Take input
anime_title = input("\033[1;34mEnter the anime title: \033[0m")
recommendations = recommend_anime(anime_title)

# Display
if recommendations:
    table_data = [[i + 1, title] for i, title in enumerate(recommendations)]
    print(f"\n\033[1;32mRecommendations for \033[1;36m{anime_title}\033[1;32m:\033[0m")
    print("\033[1;37m" + "=" * 50 + "\033[0m")
    print(tabulate(table_data, headers=["#", "Anime Title"], tablefmt="fancy_grid"))
    print("\033[1;37m" + "=" * 50 + "\033[0m")


[1;34mEnter the anime title: [0mnaruto

[1;32mRecommendations for [1;36mnaruto[1;32m:[0m
╒═════╤═══════════════════════════════════════════════════════════════════╕
│   # │ Anime Title                                                       │
╞═════╪═══════════════════════════════════════════════════════════════════╡
│   1 │ Jiang Ziya                                                        │
├─────┼───────────────────────────────────────────────────────────────────┤
│   2 │ Kogane no Hana                                                    │
├─────┼───────────────────────────────────────────────────────────────────┤
│   3 │ Nanatsu no Taizai Movie 1: Tenkuu no Torawarebito                 │
├─────┼───────────────────────────────────────────────────────────────────┤
│   4 │ Dahufa                                                            │
├─────┼───────────────────────────────────────────────────────────────────┤
│   5 │ Nanatsu no Taizai: Funnu no Shinpan                          