In [12]:
!pip install wordcloud -q


[notice] A new release of pip is available: 24.2 -> 24.3.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [14]:
### Basic libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import pickle
import warnings
warnings.filterwarnings(action='ignore')

# Data Preprocessing
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import LabelEncoder

## Import necessary modules for content-based filtering
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel


# Data Processing

#### Reading the whole dataset

In [15]:
df_anime=pd.read_csv('./dataset/anime_cleaned.csv')

#### Now I want my model to recommend only those animes that have been rated by at least a certain number of users, which I will set as the threshold. This threshold helps ensure that the recommended anime titles have received a sufficient number of ratings, indicating a certain level of popularity or user engagement.

In [56]:
popularity_threshold = 5000
df_anime = df_anime.query('members >= @popularity_threshold') # only give rating of those movies for which atleast 600 user have rated
# IMPORTANT! reindexes the dataframe
df_anime = df_anime.drop_duplicates('anime_id').reset_index(drop=True);
print(df_anime.shape)
df_anime.head(3)    

(4174, 34)


Unnamed: 0,index,anime_id,title,title_english,title_japanese,title_synonyms,image_url,type,source,episodes,...,broadcast,related,producer,licensor,studio,genre,opening_theme,ending_theme,duration_min,aired_from_year
0,0,11013,Inu x Boku SS,Inu X Boku Secret Service,妖狐×僕SS,Youko x Boku SS,https://myanimelist.cdn-dena.com/images/anime/...,TV,Manga,12,...,Fridays at Unknown,"{'Adaptation': [{'mal_id': 17207, 'type': 'man...","Aniplex, Square Enix, Mainichi Broadcasting Sy...",Sentai Filmworks,David Production,"Comedy, Supernatural, Romance, Shounen","['""Nirvana"" by MUCC']","['#1: ""Nirvana"" by MUCC (eps 1, 11-12)', '#2: ...",24.0,2012.0
1,1,2104,Seto no Hanayome,My Bride is a Mermaid,瀬戸の花嫁,The Inland Sea Bride,https://myanimelist.cdn-dena.com/images/anime/...,TV,Manga,26,...,Unknown,"{'Adaptation': [{'mal_id': 759, 'type': 'manga...","TV Tokyo, AIC, Square Enix, Sotsu",Funimation,Gonzo,"Comedy, Parody, Romance, School, Shounen","['""Romantic summer"" by SUN&LUNAR']","['#1: ""Ashita e no Hikari (明日への光)"" by Asuka Hi...",24.0,2007.0
2,2,5262,Shugo Chara!! Doki,Shugo Chara!! Doki,しゅごキャラ！！どきっ,"Shugo Chara Ninenme, Shugo Chara! Second Year",https://myanimelist.cdn-dena.com/images/anime/...,TV,Manga,51,...,Unknown,"{'Adaptation': [{'mal_id': 101, 'type': 'manga...","TV Tokyo, Sotsu",,Satelight,"Comedy, Magic, School, Shoujo","['#1: ""Minna no Tamago (みんなのたまご)"" by Shugo Cha...","['#1: ""Rottara Rottara (ロッタラ ロッタラ)"" by Buono! ...",24.0,2008.0


# Model training (content-based filtering)

In [79]:
# Create a term frequency inverse document frequency
tfidf = TfidfVectorizer(stop_words='english')
# Define a generator to compute TF-IDF matrix on the fly
tfidf_matrix_generator = tfidf.fit_transform((genre for genre in df_anime['genre'].to_numpy().astype('U')))

# Compute cosine similarity matrix as a sparse matrix
cosine_sim_sparse = linear_kernel(tfidf_matrix_generator, tfidf_matrix_generator)

# 2: Content-Based Recommendation

In [89]:
# Function to get recommendations based on cosine similarity, genre, and ratings based on score
# show_type can be None, Movie, OVA, Special, TV
def get_recommendations(title, cosine_sim, df, n=10, show_type=None):
    idx = df[df['title'] == title].index[0]
    print("Genres of title", df.iloc[idx]['genre'])
    # Compute the similarity scores between the anime at the given index and all other animes
    sim_scores = list(enumerate(cosine_sim[idx]))
    #print(sim_scores)
    # Filter out animes with unknown scores
    valid_scores = [x for x in sim_scores if df.iloc[x[0]]['score'] != "UNKNOWN"]
    if show_type:
        valid_scores = [x for x in valid_scores if df.iloc[x[0]]['type'] == show_type]
    
    # Sort the valid anime similarity scores based on the cosine similarity and ratings score in descending order
    sorted_scores = sorted(valid_scores, key=lambda x: (x[1], df.iloc[x[0]]['score']), reverse=True)
    
    # Get the top 10 similar animes (excluding the anime itself)
    top_animes = [x for x in sorted_scores if x[0] != idx][:n]
    #print(top_animes)
    # Extract the indices of the recommended animes
    recommended_indices = [idx for idx, _ in top_animes]
    recommended_animes = df.iloc[recommended_indices][['title', 'genre', 'score', 'type']]
    return recommended_animes

In [90]:
# Example usage: Get recommendations for an anime
anime_title = 'Kono Subarashii Sekai ni Shukufuku wo!'
recommendations = get_recommendations(anime_title, cosine_sim_sparse, df_anime, n=10, show_type='Movie')
print(f'Recommendations for "{anime_title}":')
recommendations

Genres of title Adventure, Comedy, Parody, Supernatural, Magic, Fantasy
Recommendations for "Kono Subarashii Sekai ni Shukufuku wo!":


Unnamed: 0,title,genre,score,type
1394,Slayers Gorgeous,"Adventure, Comedy, Fantasy, Magic, Supernatural",7.4,Movie
3528,Mahou Sensei Negima! Anime Final,"Adventure, Magic, Supernatural",6.88,Movie
3343,Mary to Majo no Hana,"Adventure, Fantasy, Magic",7.45,Movie
3377,Gedo Senki,"Adventure, Fantasy, Magic",7.13,Movie
696,Slayers Return,"Adventure, Comedy, Fantasy, Magic, Shounen",7.4,Movie
964,Slayers Great,"Adventure, Comedy, Fantasy, Magic, Shounen",7.39,Movie
2438,Slayers: The Motion Picture,"Adventure, Comedy, Magic, Fantasy, Shounen",7.36,Movie
2385,Slayers Premium,"Adventure, Comedy, Fantasy, Magic, Shounen",7.29,Movie
3653,Fate/stay night Movie: Heaven&#039;s Feel - I....,"Action, Fantasy, Magic, Supernatural",8.47,Movie
98,Fate/stay night Movie: Unlimited Blade Works,"Action, Fantasy, Magic, Supernatural",7.53,Movie


In [92]:
anime_title = 'Kimi no Na wa.'
recommendations = get_recommendations(anime_title, cosine_sim_sparse, df_anime, show_type='Movie')
print(f'Recommendations for "{anime_title}":')
recommendations

Genres of title Supernatural, Drama, Romance, School
Recommendations for "Kimi no Na wa.":


Unnamed: 0,title,genre,score,type
4165,Aura: Maryuuin Kouga Saigo no Tatakai,"Supernatural, Drama, Romance, School",7.6,Movie
2063,Air Movie,"Drama, Romance, Supernatural",7.35,Movie
2209,Kokoro ga Sakebitagatterunda.,"Drama, Romance, School",8.18,Movie
4048,Suki ni Naru Sono Shunkan wo.: Kokuhaku Jikkou...,"Comedy, Drama, Romance, School",7.21,Movie
2000,Da Yu Hai Tang,"Adventure, Drama, Romance, Supernatural",7.82,Movie
618,Clannad Movie,"Drama, Romance, Fantasy, School",7.32,Movie
3446,Momo e no Tegami,"Supernatural, Drama",7.73,Movie
3041,Zutto Mae kara Suki deshita.: Kokuhaku Jikkou ...,"Romance, School",7.42,Movie
1315,Taifuu no Noruda,"Drama, School, Sci-Fi, Supernatural",6.33,Movie
4075,Hotarubi no Mori e,"Drama, Romance, Shoujo, Supernatural",8.55,Movie
