In [1]:
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')

In [2]:
anime_data_cleaned = pd.read_csv('csv/2020/anime_2020_cleaned.csv')

In [3]:
anime_data_cleaned['Genres'] = anime_data_cleaned['Genres'].apply(lambda x: x.replace(' ', '').replace(',', ' '))

## Basic Content-based by Genre using CountVectorizer

In [4]:
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [5]:
# Create a CountVectorizer to one-hot encode the genres
vectorizer = CountVectorizer()
genre_matrix = vectorizer.fit_transform(anime_data_cleaned['Genres'])

In [6]:
# Compute the cosine similarity between anime based on their genre vectors
genre_similarity = cosine_similarity(genre_matrix, genre_matrix)

In [27]:
# Function to get the top-k most similar anime for a given anime
def get_recommendations(anime_title, similarity_matrix, anime_data, k=10):
    index = anime_data[anime_data['Name'] == anime_title].index[0]
    scores = list(enumerate(similarity_matrix[index]))
    scores = sorted(scores, key=lambda x: x[1], reverse=True)
    scores = scores[1:k+1]  # exclude the anime itself
    indices = [i[0] for i in scores]
    return anime_data['Name'].iloc[indices]

In [28]:
title = 'Naruto'
get_recommendations(title, genre_similarity, anime_data_cleaned)

[794, 448, 1735, 2937, 3324, 162, 518, 3827, 106, 149]

## Content-based by Genre and Studio

In [11]:
anime_data_cleaned['Genres_and_Studio'] = anime_data_cleaned['Genres'] + ', ' + anime_data_cleaned['Studios']

In [12]:
# Create a CountVectorizer to one-hot encode the genres
vectorizer = CountVectorizer()
genre_studio_matrix = vectorizer.fit_transform(anime_data_cleaned['Genres_and_Studio'])

In [13]:
# Compute the cosine similarity between anime based on their genre vectors
genre_studio_similarity = cosine_similarity(genre_studio_matrix, genre_studio_matrix)

In [14]:
title = 'Naruto'
get_recommendations(title, genre_studio_similarity, anime_data_cleaned)

794                  Naruto: Shippuuden
162                      Rekka no Honoo
3827    Boruto: Naruto Next Generations
184                              Bleach
1739                     Moeru! Oniisan
272                     Yuu☆Yuu☆Hakusho
1205           Chiisana Kyojin Microman
769                        Duel Masters
1496                Duel Masters Charge
3545                  Duel Masters VSRF
Name: Name, dtype: object

In [20]:
anime_titles = ['Naruto', 'Rekka no Honoo']
# for loop titles and print Studio and Genres column in anime_data_cleaned for each title
for title in anime_titles:
    print(title)
    print(anime_data_cleaned[anime_data_cleaned['Name'] == title][['Studios', 'Genres']])
    print()


Naruto
          Studios                                             Genres
9  Studio Pierrot  Action Adventure Comedy SuperPower MartialArts...

Rekka no Honoo
            Studios                                           Genres
162  Studio Pierrot  Action Adventure MartialArts Shounen SuperPower



In [25]:
get_recommendations('InuYasha', genre_studio_similarity, anime_data_cleaned)

1831                      InuYasha: Kanketsu-hen
4811    Hanyou no Yashahime: Sengoku Otogizoushi
737                               Kekkaishi (TV)
125                            Tsubasa Chronicle
31                                 Chrno Crusade
538                            Majutsushi Orphen
590                                  Slayers Try
1817                             Kuroshitsuji II
2879                Shingeki no Bahamut: Genesis
3328            Shingeki no Bahamut: Virgin Soul
Name: Name, dtype: object

## Content-based with multiple titles

In [21]:
import collections
def get_recommendations_for_multiple_titles(titles, cosine_sim, anime_df, top_k=10):
    # Find the indices of the input titles
    title_indices = [anime_df[anime_df['Name'] == title].index[0] for title in titles]

    # Get the cosine similarity scores for all titles in the input list
    sim_scores = [list(enumerate(cosine_sim[title_index])) for title_index in title_indices]

    # Combine the similarity scores
    combined_scores = collections.defaultdict(float)
    for score_list in sim_scores:
        for i, score in score_list:
            combined_scores[i] += score

    # Sort the combined scores and get the top k indices
    sorted_scores = sorted(combined_scores.items(), key=lambda x: x[1], reverse=True)
    top_k_indices = [i for i, _ in sorted_scores[:top_k]]

    # Get the top k recommendations based on the combined scores
    return anime_df.iloc[top_k_indices]

In [23]:
titles = ['Kill la Kill', 'Code Geass: Hangyaku no Lelouch', 'Shingeki no Kyojin', 'Tengen Toppa Gurren Lagann', 
          'Mahou Shoujo Madoka★Magica', 'Ga-Rei: Zero', 'JoJo no Kimyou na Bouken Part 5: Ougon no Kaze']
get_recommendations_for_multiple_titles(titles, genre_similarity, anime_data_cleaned, 5)

Unnamed: 0,MAL_ID,Name,Score,Genres,Episodes,Premiered,Studios,Members,Completed,Dropped rate,Genres_and_Studio
145,209,R.O.D: The TV,7.55,Action Sci-Fi Adventure Comedy SuperPower Drama,26,Fall 2003,J.C.Staff,61604,32941,0.06,Action Sci-Fi Adventure Comedy SuperPower Dram...
306,479,Ueki no Housoku,7.76,Action Adventure Comedy SuperPower Supernatura...,51,Spring 2005,Studio Deen,97521,51123,0.07,Action Adventure Comedy SuperPower Supernatura...
718,1575,Code Geass: Hangyaku no Lelouch,8.72,Action Military Sci-Fi SuperPower Drama Mecha ...,25,Fall 2006,Sunrise,1583882,1209288,0.02,Action Military Sci-Fi SuperPower Drama Mecha ...
10,21,One Piece,8.52,Action Adventure Comedy SuperPower Drama Fanta...,Unknown,Fall 1999,Toei Animation,1352724,33,0.11,Action Adventure Comedy SuperPower Drama Fanta...
118,168,s.CRY.ed,7.38,Action Sci-Fi Adventure SuperPower Drama,26,Summer 2001,Sunrise,72311,44099,0.05,"Action Sci-Fi Adventure SuperPower Drama, Sunrise"


In [24]:
get_recommendations_for_multiple_titles(titles, genre_studio_similarity, anime_data_cleaned, 5)

Unnamed: 0,MAL_ID,Name,Score,Genres,Episodes,Premiered,Studios,Members,Completed,Dropped rate,Genres_and_Studio
718,1575,Code Geass: Hangyaku no Lelouch,8.72,Action Military Sci-Fi SuperPower Drama Mecha ...,25,Fall 2006,Sunrise,1583882,1209288,0.02,Action Military Sci-Fi SuperPower Drama Mecha ...
1146,2904,Code Geass: Hangyaku no Lelouch R2,8.91,Action Military Sci-Fi SuperPower Drama Mecha,25,Spring 2008,Sunrise,1268320,1060730,0.01,"Action Military Sci-Fi SuperPower Drama Mecha,..."
118,168,s.CRY.ed,7.38,Action Sci-Fi Adventure SuperPower Drama,26,Summer 2001,Sunrise,72311,44099,0.05,"Action Sci-Fi Adventure SuperPower Drama, Sunrise"
145,209,R.O.D: The TV,7.55,Action Sci-Fi Adventure Comedy SuperPower Drama,26,Fall 2003,J.C.Staff,61604,32941,0.06,Action Sci-Fi Adventure Comedy SuperPower Dram...
306,479,Ueki no Housoku,7.76,Action Adventure Comedy SuperPower Supernatura...,51,Spring 2005,Studio Deen,97521,51123,0.07,Action Adventure Comedy SuperPower Supernatura...
