In [4]:
# AnimeTrends Version 1.0 (2025-07-14)

# Object-Oriented approach for cleaner structure + reuse
# AmimeFetcher used from Jikan API to fetch anime data
# AnimeRecommender used from AnimeFetcher (CSV) to recommend anime using its model (similarity with genre and synopsis)
# will update classes both with inheritance (sub-classes) when adding more features + optimizing data fetching!
from tqdm import tqdm
import requests
import pandas as pd
import time

class AnimeFetcher:
    def __init__(self):
        self.BASE_URL = "https://api.jikan.moe/v4"

    def safe_request(self, url, retries=3, delay=1):    # Retry mechanism for network requests
        for attempt in range(retries):
            try:
                response = requests.get(url, timeout=10)
                response.raise_for_status()
                return response
            except requests.exceptions.RequestException as e:
                print(f"Error: {e}. Retrying in {delay}s...")
                time.sleep(delay)
        print(f'Failed after {retries} attempts: {url}')
        return None

    def fetch_anime_details(self, anime_id):     # Fetch detailed information about a specific anime
        try:
            url = f"{self.BASE_URL}/anime/{anime_id}"
            response = self.safe_request(url)
            if response is None:
                return None

            data = response.json()['data']
            return {
                "mal_id": anime_id,
                "title": data['title'],
                "title_english": data.get('title_english', ''),
                "genres": ', '.join([genre['name'] for genre in data['genres']]),
                "score": data['score'],
                "popularity": data['popularity'],
                "episodes": data['episodes'],
                "status": data['status'],
                "synopsis": data['synopsis'],
                "year": data['year'],
                "studios": ', '.join([studio['name'] for studio in data['studios']]),
                "source": data['source'],
                "duration": data['duration'],
                "image_url": data['images']['jpg']['image_url']
            }
        except Exception as e:
            print(f"Error fetching anime {anime_id}: {e}")
            return None
    
    def fetch_top_anime(self, limit=100, deep_fetch=True):      # Fetch top anime from Jikan API
        anime_list = []
        page = 1
        print(f"📥 Fetching top {limit} anime (deep_fetch={deep_fetch})...")
        while len(anime_list) < limit:
            try:
                response = self.safe_request(f"{self.BASE_URL}/top/anime?page={page}")
                if response is None:
                    break
                top_anime = response.json()['data']

                for anime in tqdm(top_anime, desc=f"Page {page}", leave=True):
                    if deep_fetch:
                        # Fetch extra details per anime
                        details = self.fetch_anime_details(anime['mal_id'])
                    else:
                        # Use shallow data from top_anime
                        details = {
                            "mal_id": anime['mal_id'],
                            "title": anime['title'],
                            "title_english": anime.get('title_english', ''),
                            "genres": ', '.join([genre['name'] for genre in anime['genres']]),
                            "score": anime.get('score', 0),
                            "popularity": anime.get('popularity', 0),
                            "episodes": anime.get('episodes', 0),
                            "status": anime.get('status', ''),
                            "synopsis": anime.get('synopsis', ''),
                            "year": anime.get('year', ''),
                            "studios": ', '.join([studio['name'] for studio in anime.get('studios', [])]),
                            "source": anime.get('source', ''),
                            "duration": anime.get('duration', ''),
                            "image_url": anime['images']['jpg']['image_url']
                        }

                    if details:
                        anime_list.append(details)

                    # Avoid rate limiting
                    time.sleep(0.5)

                    if len(anime_list) >= limit:
                        break
                page += 1
            except Exception as e:
                print(f"⚠️ Error fetching top anime page {page}: {e}")
                break

        # Ensure the loop continues properly
        df = pd.DataFrame(anime_list)
        df.insert(0, 'rank', range(1, len(df) + 1)) # add rank column by 1 so it doesn't start at 0
        return df
    
    def save_dataset(self, df, filename='anime_dataset.csv'):     # Save the DataFrame to a CSV file
        df.to_csv(filename, index=False)
        print(f"Dataset saved as {filename}")

In [5]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.metrics.pairwise import cosine_similarity
from difflib import get_close_matches

class AnimeRecommender:
    def __init__(self, dataset_path="test_anime_dataset.csv"):
        # to load
        self.df = pd.read_csv(dataset_path)
        self.tfidf_matrix = None
        self.synopsis_similarity = None
        self.genre_similarity = None
        self.hybrid_similarity = None
    
    def build_model(self, synopsis_weight=0.7, genre_weight=0.3):  # weights for hybrid model (70% synopsis, 30% genre)
        print("Building recommendation model...")
        # synposis similarity (TF-IDF)
        tfidf = TfidfVectorizer(stop_words='english')
        self.tfidf_matrix = tfidf.fit_transform(self.df['synopsis'].fillna(''))
        self.synopsis_similarity = cosine_similarity(self.tfidf_matrix, self.tfidf_matrix)

        # genre similarity (one-hot encoding)
        self.df['genre_list'] = self.df['genres'].fillna('').apply(lambda x: [g.strip() for g in x.split(',') if g.strip() != ''])
        mlb = MultiLabelBinarizer()
        genre_matrix = mlb.fit_transform(self.df['genre_list'])
        self.genre_similarity = cosine_similarity(genre_matrix, genre_matrix)

        # hybrid similarity (weighted average)
        self.hybrid_similarity = (
            synopsis_weight * self.synopsis_similarity +
            genre_weight * self.genre_similarity
        )
    
        print("Model built successfully.")

    def fuzzy_search(self, query, cutoff=0.6): # finds close matches (spell errors or missing characters)
        # Combine titles and filter out None or NaN values
        titles = self.df['title'].dropna().tolist() + self.df['title_english'].dropna().tolist()
        matches = get_close_matches(query, titles, n=5, cutoff=cutoff)
        return matches[0] if matches else None
        
    def recommend(self, anime_title, top_n=5):
        # run fuzzy search first
        matched_title = self.fuzzy_search(anime_title)
        if not matched_title:
            return f"Anime '{anime_title}' not found in dataset (even with fuzzy search :( )"
        print(f"Found match! '{matched_title}' for input '{anime_title}'")
      
        # finds recommendations in the dataset (index)
        idx_row = self.df[
            (self.df['title'] == matched_title) | 
            (self.df['title_english'] == matched_title)
            ]
        if idx_row.empty:
            return f"Anime '{anime_title}' not found in dataset."
        idx = idx_row.index[0]

        # similarity scores
        scores = list(enumerate(self.hybrid_similarity[idx]))
        # sort by similarity (high to low)
        scores = sorted(scores, key=lambda x: x[1], reverse=True)[1:top_n+1]  # skip the first one (itself)

        # recommend anime titles
        recommendations = [self.df.iloc[i[0]]['title'] for i in scores]
        return recommendations

In [26]:
# tests below ⬇️

In [None]:
# Test the AnimeFetcher class

fetcher = AnimeFetcher()
# Fetch top 10 anime and save to CSV
anime_df = fetcher.fetch_top_anime(limit=50, deep_fetch=True) 
fetcher.save_dataset(anime_df, 'test_anime_dataset.csv')
# Save the fetched dataset to a CSV file
pd.read_csv('test_anime_dataset.csv')

📥 Fetching top 10 anime (deep_fetch=True)...


Page 1:  36%|███▌      | 9/25 [00:08<00:15,  1.01it/s]

Dataset saved as test_anime_dataset.csv





Unnamed: 0,rank,mal_id,title,title_english,genres,score,popularity,episodes,status,synopsis,year,studios,source,duration,image_url
0,1,52991,Sousou no Frieren,Frieren: Beyond Journey's End,"Adventure, Drama, Fantasy",9.3,137,28,Finished Airing,During their decade-long quest to defeat the D...,2023.0,Madhouse,Manga,24 min per ep,https://cdn.myanimelist.net/images/anime/1015/...
1,2,5114,Fullmetal Alchemist: Brotherhood,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy",9.1,3,64,Finished Airing,After a horrific alchemy experiment goes wrong...,2009.0,Bones,Manga,24 min per ep,https://cdn.myanimelist.net/images/anime/1208/...
2,3,9253,Steins;Gate,Steins;Gate,"Drama, Sci-Fi, Suspense",9.07,14,24,Finished Airing,Eccentric scientist Rintarou Okabe has a never...,2011.0,White Fox,Visual novel,24 min per ep,https://cdn.myanimelist.net/images/anime/1935/...
3,4,38524,Shingeki no Kyojin Season 3 Part 2,Attack on Titan Season 3 Part 2,"Action, Drama, Suspense",9.05,21,10,Finished Airing,Seeking to restore humanity's diminishing hope...,2019.0,Wit Studio,Manga,23 min per ep,https://cdn.myanimelist.net/images/anime/1517/...
4,5,28977,Gintama°,Gintama Season 4,"Action, Comedy, Sci-Fi",9.05,343,51,Finished Airing,"Gintoki, Shinpachi, and Kagura return as the f...",2015.0,Bandai Namco Pictures,Manga,24 min per ep,https://cdn.myanimelist.net/images/anime/3/720...
5,6,39486,Gintama: The Final,Gintama: The Very Final,"Action, Comedy, Drama, Sci-Fi",9.05,1524,1,Finished Airing,Two years have passed following the Tendoshuu'...,,Bandai Namco Pictures,Manga,1 hr 44 min,https://cdn.myanimelist.net/images/anime/1245/...
6,7,60022,One Piece Fan Letter,,"Action, Adventure, Fantasy",9.04,2031,1,Finished Airing,Although the golden age of piracy is about to ...,,Toei Animation,Light novel,24 min,https://cdn.myanimelist.net/images/anime/1455/...
7,8,11061,Hunter x Hunter (2011),Hunter x Hunter,"Action, Adventure, Fantasy",9.03,8,148,Finished Airing,Hunters devote themselves to accomplishing haz...,2011.0,Madhouse,Manga,23 min per ep,https://cdn.myanimelist.net/images/anime/1337/...
8,9,9969,Gintama',Gintama Season 2,"Action, Comedy, Sci-Fi",9.02,404,51,Finished Airing,"After a one-year hiatus, Shinpachi Shimura ret...",2011.0,Sunrise,Manga,24 min per ep,https://cdn.myanimelist.net/images/anime/4/503...
9,10,15417,Gintama': Enchousen,Gintama: Enchousen,"Action, Comedy, Sci-Fi",9.02,752,13,Finished Airing,"While Gintoki Sakata was away, the Yorozuya fo...",2012.0,Sunrise,Manga,24 min per ep,https://cdn.myanimelist.net/images/anime/1452/...


In [20]:
# Test the AnimeRecommender class
recommender = AnimeRecommender("test_anime_dataset.csv")
recommender.build_model()
print(recommender.recommend("gintama", top_n=5))

Building recommendation model...
Model built successfully.
Found match! 'Gintama°' for input 'gintama'
["Gintama'", "Gintama': Enchousen", 'Gintama: The Final', 'Shingeki no Kyojin Season 3 Part 2', 'One Piece Fan Letter']
