In [158]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import OneHotEncoder 
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import CountVectorizer

In [91]:
data = pd.read_csv('anime.csv')

In [92]:
data.head()

Unnamed: 0,anime_id,name,genre,type,episodes,rating,members
0,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,9.37,200630
1,5114,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV,64,9.26,793665
2,28977,Gintama°,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.25,114262
3,9253,Steins;Gate,"Sci-Fi, Thriller",TV,24,9.17,673572
4,9969,Gintama&#039;,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.16,151266


# Data Preprocessing:

In [93]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12294 entries, 0 to 12293
Data columns (total 7 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   anime_id  12294 non-null  int64  
 1   name      12294 non-null  object 
 2   genre     12232 non-null  object 
 3   type      12269 non-null  object 
 4   episodes  12294 non-null  object 
 5   rating    12064 non-null  float64
 6   members   12294 non-null  int64  
dtypes: float64(1), int64(2), object(4)
memory usage: 672.5+ KB


In [94]:
data.describe()

Unnamed: 0,anime_id,rating,members
count,12294.0,12064.0,12294.0
mean,14058.221653,6.473902,18071.34
std,11455.294701,1.026746,54820.68
min,1.0,1.67,5.0
25%,3484.25,5.88,225.0
50%,10260.5,6.57,1550.0
75%,24794.5,7.18,9437.0
max,34527.0,10.0,1013917.0


In [95]:
#Handle missing values
data.isnull().sum()

anime_id      0
name          0
genre        62
type         25
episodes      0
rating      230
members       0
dtype: int64

In [96]:
data['genre'] = data['genre'].fillna('Unknown')
data['type'] = data['type'].fillna('Unknown')

In [97]:
data['rating'] = data['rating'].fillna(data['rating'].median())

In [98]:
data.isnull().sum()

anime_id    0
name        0
genre       0
type        0
episodes    0
rating      0
members     0
dtype: int64

# Feature Extraction:

In [99]:
data['anime_id'].value_counts()

anime_id
32281    1
30404    1
26013    1
26017    1
15787    1
        ..
12455    1
28789    1
373      1
2089     1
26081    1
Name: count, Length: 12294, dtype: int64

In [100]:
data['genre'].value_counts()

genre
Hentai                                                  823
Comedy                                                  523
Music                                                   301
Kids                                                    199
Comedy, Slice of Life                                   179
                                                       ... 
Adventure, Drama, Fantasy, Game, Sci-Fi                   1
Adventure, Demons, Fantasy, Historical                    1
Action, Comedy, Drama, Mecha, Music, Sci-Fi, Shounen      1
Action, Comedy, Fantasy, Mecha, Sci-Fi, Shounen           1
Hentai, Slice of Life                                     1
Name: count, Length: 3265, dtype: int64

In [101]:
data['name'].value_counts()

name
Shi Wan Ge Leng Xiaohua                           2
Saru Kani Gassen                                  2
Bakabon Osomatsu no Karee wo Tazunete Sansenri    1
Backkom Meogeujan Yeohaeng                        1
Backkom Mission Impossible                        1
                                                 ..
Yoroiden Samurai Troopers Kikoutei Densetsu       1
Yuu☆Yuu☆Hakusho: Mu Mu Hakusho                    1
3-gatsu no Lion meets Bump of Chicken             1
Bannou Bunka Neko-Musume                          1
Yasuji no Pornorama: Yacchimae!!                  1
Name: count, Length: 12292, dtype: int64

In [102]:
data['rating'].value_counts()

rating
6.57    283
6.00    141
7.00     99
6.50     90
6.25     84
       ... 
3.47      1
3.71      1
3.87      1
3.91      1
3.14      1
Name: count, Length: 598, dtype: int64

In [136]:
# Normalization of rating and coverting categorical numric (genre)
gerne_enco = data['genre'].str.get_dummies(sep=', ')

scal = StandardScaler()
norm_rating = scal.fit_transform(data[['rating']])

# combine into a single DataFrame
feature = pd.concat([gerne_enco, pd.DataFrame(norm_rating, columns=['norm_rating'])])
feature.head()

Unnamed: 0,Action,Adventure,Cars,Comedy,Dementia,Demons,Drama,Ecchi,Fantasy,Game,...,Space,Sports,Super Power,Supernatural,Thriller,Unknown,Vampire,Yaoi,Yuri,norm_rating
0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,
1,1.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
2,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,
4,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,


In [None]:
genre_enco =data['genre'].str.get_dummies(sep=',')

In [159]:
# Initialize the CountVectorizer
vectorizer = CountVectorizer(tokenizer=lambda x: x.split(', '))
genre_matrix = vectorizer.fit_transform(data['genre'])



In [160]:
# computing cosine similarity
from sklearn.metrics.pairwise import cosine_similarity
cosine_sim = cosine_similarity(genre_matrix,genre_matrix)

In [161]:
# normalising of rating.
scal = StandardScaler()
norm_rating = scal.fit_transform(data[['rating']])

In [162]:
# combining freature
feature = pd.concat([gerne_enco, pd.DataFrame(norm_rating, columns=['norm_rating'])],axis=1)

# Recommendation System:

In [167]:
from sklearn.metrics.pairwise import cosine_similarity
similar = cosine_similarity(feature)

In [168]:
# Recommendation function
def recommend_anime(title, similar=similar, data=data):
    idx = data[data['name'] == title].index[0]
    similarity_scores = list(enumerate(similar[idx]))
    similarity_scores = sorted(similarity_scores, key=lambda x: x[1], reverse=True)
    similar_anime_indices = [i[0] for i in similarity_scores[1:11]]
    return data['name'].iloc[similar_anime_indices]

In [169]:
call = recommend_anime("Steins;Gate", similar, data)
print("recommedation for 'Steins;Gate': ")
print(call)

recommedation for 'Steins;Gate': 
59              Steins;Gate Movie: Fuka Ryouiki no Déjà vu
126                  Steins;Gate: Oukoubakko no Poriomania
196      Steins;Gate: Kyoukaimenjou no Missing Link - D...
9091                           Kaitei Toshi no Dekiru made
10414    Subarashii Sekai Ryokou: New York Tabi &quot;C...
1578              Sakasama no Patema: Beginning of the Day
1594                                    Mai-Otome 0: S.ifr
169                                                Shelter
225                                            Summer Wars
9595                                      Mogura no Motoro
Name: name, dtype: object


In [170]:
call = recommend_anime("Gintama°", similar, data)
print("recommedation for 'Gintama°': ")
print(call)

recommedation for 'Gintama°': 
4                                          Gintama&#039;
9                               Gintama&#039;: Enchousen
8      Gintama Movie: Kanketsu-hen - Yorozuya yo Eien...
12                                               Gintama
63           Gintama: Yorinuki Gintama-san on Theater 2D
65                Gintama Movie: Shinyaku Benizakura-hen
216                     Gintama: Shinyaku Benizakura-hen
306                     Gintama: Jump Festa 2014 Special
361                     Gintama: Jump Festa 2015 Special
380    Gintama: Nanigoto mo Saiyo ga Kanjin nano de T...
Name: name, dtype: object
