# Content Based Recommendation System

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
# Data PreProcessing

In [3]:
df_anime = pd.read_csv('anime.csv')
df_rating = pd.read_csv('rating.csv')

In [4]:
df_anime.drop(['rating','members'],axis=1,inplace=True)

In [5]:
df_anime.head()

Unnamed: 0,anime_id,name,genre,type,episodes
0,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1
1,5114,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV,64
2,28977,Gintama°,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51
3,9253,Steins;Gate,"Sci-Fi, Thriller",TV,24
4,9969,Gintama&#039;,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51


In [6]:
df_rating.head()

Unnamed: 0,user_id,anime_id,rating
0,1,20,-1
1,1,24,-1
2,1,79,-1
3,1,226,-1
4,1,241,-1


In [7]:
df = pd.merge(df_rating,df_anime,left_on='anime_id',right_on='anime_id',how='outer')

In [8]:
df.head()

Unnamed: 0,user_id,anime_id,rating,name,genre,type,episodes
0,1.0,20,-1.0,Naruto,"Action, Comedy, Martial Arts, Shounen, Super P...",TV,220
1,3.0,20,8.0,Naruto,"Action, Comedy, Martial Arts, Shounen, Super P...",TV,220
2,5.0,20,6.0,Naruto,"Action, Comedy, Martial Arts, Shounen, Super P...",TV,220
3,6.0,20,-1.0,Naruto,"Action, Comedy, Martial Arts, Shounen, Super P...",TV,220
4,10.0,20,-1.0,Naruto,"Action, Comedy, Martial Arts, Shounen, Super P...",TV,220


In [9]:
# EDA

In [10]:
df.isnull().sum()

user_id     1097
anime_id       0
rating      1097
name          10
genre        150
type          38
episodes      10
dtype: int64

In [11]:
df.dropna(inplace=True)

In [12]:
df.head()

Unnamed: 0,user_id,anime_id,rating,name,genre,type,episodes
0,1.0,20,-1.0,Naruto,"Action, Comedy, Martial Arts, Shounen, Super P...",TV,220
1,3.0,20,8.0,Naruto,"Action, Comedy, Martial Arts, Shounen, Super P...",TV,220
2,5.0,20,6.0,Naruto,"Action, Comedy, Martial Arts, Shounen, Super P...",TV,220
3,6.0,20,-1.0,Naruto,"Action, Comedy, Martial Arts, Shounen, Super P...",TV,220
4,10.0,20,-1.0,Naruto,"Action, Comedy, Martial Arts, Shounen, Super P...",TV,220


In [13]:
# Top Anime Ratings

In [14]:
df.groupby('name')['rating'].mean().sort_values(ascending=False).head(10)

name
STAR BEAT!: Hoshi no Kodou                                          10.000000
Shiroi Zou                                                          10.000000
Warui no wo Taose!! Salaryman Man                                   10.000000
Choegang Top Plate                                                  10.000000
Tang Lang Bu Chan                                                    9.000000
Yakushiji Ryouko no Kaiki Jikenbo: Hamachou, Voice &amp; Fiction     9.000000
Shiranpuri                                                           9.000000
Doukyuusei                                                           9.000000
Kimi no Na wa.                                                       8.297863
Ginga Eiyuu Densetsu                                                 8.239203
Name: rating, dtype: float64

In [15]:
# No. of users

In [16]:
ratings = pd.DataFrame(df.groupby('name')['rating'].mean())
ratings['num of ratings'] = pd.DataFrame(df.groupby('name')['rating'].count())

In [17]:
ratings.head(20)

Unnamed: 0_level_0,rating,num of ratings
name,Unnamed: 1_level_1,Unnamed: 2_level_1
&quot;0&quot;,2.769231,26
"&quot;Aesop&quot; no Ohanashi yori: Ushi to Kaeru, Yokubatta Inu",0.0,2
&quot;Bungaku Shoujo&quot; Kyou no Oyatsu: Hatsukoi,5.774936,782
&quot;Bungaku Shoujo&quot; Memoire,6.155748,809
&quot;Bungaku Shoujo&quot; Movie,6.45798,1535
&quot;Eiji&quot;,5.0,17
.hack//G.U. Returner,4.80411,730
.hack//G.U. Trilogy,5.347943,1118
.hack//G.U. Trilogy: Parody Mode,4.589235,353
.hack//Gift,4.675581,860


In [18]:
df.groupby('name')['rating'].count().sort_values(ascending=False).head(10)

name
Death Note                          39340
Sword Art Online                    30583
Shingeki no Kyojin                  29584
Code Geass: Hangyaku no Lelouch     27718
Elfen Lied                          27506
Angel Beats!                        27183
Naruto                              25925
Fullmetal Alchemist                 25032
Fullmetal Alchemist: Brotherhood    24574
Toradora!                           24283
Name: rating, dtype: int64

In [19]:
genre = pd.DataFrame(data=df_anime[['name','genre']])
genre.set_index('name',inplace=True)

In [20]:
genre.head(10)

Unnamed: 0_level_0,genre
name,Unnamed: 1_level_1
Kimi no Na wa.,"Drama, Romance, School, Supernatural"
Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili..."
Gintama°,"Action, Comedy, Historical, Parody, Samurai, S..."
Steins;Gate,"Sci-Fi, Thriller"
Gintama&#039;,"Action, Comedy, Historical, Parody, Samurai, S..."
Haikyuu!!: Karasuno Koukou VS Shiratorizawa Gakuen Koukou,"Comedy, Drama, School, Shounen, Sports"
Hunter x Hunter (2011),"Action, Adventure, Shounen, Super Power"
Ginga Eiyuu Densetsu,"Drama, Military, Sci-Fi, Space"
Gintama Movie: Kanketsu-hen - Yorozuya yo Eien Nare,"Action, Comedy, Historical, Parody, Samurai, S..."
Gintama&#039;: Enchousen,"Action, Comedy, Historical, Parody, Samurai, S..."


In [21]:
genre.dropna(inplace=True)

In [22]:
# Training the recommendation system

In [23]:
anime_name = list(df_anime.name)

def fullname(partial):
    return_list=[]
    for name in anime_name:
        a = partial.lower().split()
        count = 0
        for i in a:
            if name.lower().find(i) > -1:
                count = count+1
        if count==len(a):
            return_list.append(name)
    if partial in return_list:
        return partial
    else:
        return return_list[0]

In [24]:
def get_recommendation(partial_name):
    
    name = fullname(partial_name)
    anime_genre = genre.loc[name].values[0].split(', ')
    
    # Generating list of anime names with atleast one same genre
    same_genre_name = []
    for i in range(len(genre)):
        for j in anime_genre:
            if j in genre.values[i].tolist()[0].split(', '):
                same_genre_name.append(genre.iloc[i].name)
                break
                
    # Creating matrix based on generated list
    animetable = df[df['name'].isin(same_genre_name)].pivot_table(index='user_id',columns='name',values='rating')
    
    # Creating Correlation table
    anime_rating = animetable[name]
    recomm = animetable.corrwith(anime_rating)
    final = pd.DataFrame(recomm,columns=['Correlation'])
    final = final.join(ratings['num of ratings'])
    final.dropna(inplace=True)
    final = final[(final.index==name) | (final['num of ratings']>8000)].sort_values('Correlation',ascending=False)
    return final.head(11)

In [26]:
a = get_recommendation('monster')


  c = cov(x, y, rowvar)
  c *= np.true_divide(1, fact)


Unnamed: 0_level_0,Correlation,num of ratings
name,Unnamed: 1_level_1,Unnamed: 2_level_1
Monster,1.0,4594
NHK ni Youkoso!,0.709339,10023
Cowboy Bebop,0.70084,15509
Clannad: After Story,0.69622,17854
Kokoro Connect,0.690214,9076
Black Lagoon: The Second Barrage,0.68672,9735
Suzumiya Haruhi no Shoushitsu,0.6865,9576
Baccano!,0.684462,11070
Chuunibyou demo Koi ga Shitai!,0.682976,13880
Fate/Zero 2nd Season,0.682536,12047


In [36]:
import dill as pickle

In [50]:
pickle_out = open("recomm.pkl","wb")
pickle.dump_session(pickle_out)

In [51]:
pickle_out.close()

In [47]:
print(list(a.index))

['Monster', 'NHK ni Youkoso!', 'Cowboy Bebop', 'Clannad: After Story', 'Kokoro Connect', 'Black Lagoon: The Second Barrage', 'Suzumiya Haruhi no Shoushitsu', 'Baccano!', 'Chuunibyou demo Koi ga Shitai!', 'Fate/Zero 2nd Season', 'Great Teacher Onizuka']
