In [2]:
import numpy as np
import pandas as pd
import warnings
from sklearn.metrics.pairwise import cosine_similarity
warnings.filterwarnings('ignore')

In [3]:
anime = pd.read_csv("animeItembased.csv")
anime.head()

Unnamed: 0,anime_id,name,genre_x,type_x,episodes_x,members_x,image_url,type_y,episodes_y,score,members_y,background
0,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,200630,https://cdn.myanimelist.net/images/anime/5/870...,Movie,1,9.19,730076,
1,5114,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV,64,793665,https://cdn.myanimelist.net/images/anime/5/474...,TV,64,9.25,1199091,Fullmetal Alchemist: Brotherhood is an alterna...
2,28977,Gintama°,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,114262,https://cdn.myanimelist.net/images/anime/3/720...,TV,51,9.15,198744,This is a fourth season of Gintama. In the ep...
3,9253,Steins;Gate,"Sci-Fi, Thriller",TV,24,673572,https://cdn.myanimelist.net/images/anime/5/731...,TV,24,9.14,1010330,Steins;Gate is based on 5pb. and Nitroplus&#03...
4,9969,Gintama&#039;,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,151266,https://cdn.myanimelist.net/images/anime/4/503...,TV,51,9.11,215527,This is the second season of Gintama. In the e...


In [4]:
# only select tv show and movie
print(anime.shape)
anime = anime[(anime['type_x'] == 'TV') | (anime['type_x'] == 'Movie')]
print(anime.shape)

(12277, 12)
(6127, 12)


In [5]:
# only select famous anime, 75% percentile
m = anime['members_x'].quantile(0.75)
anime = anime[(anime['members_x'] >= m)]
anime.shape

(1532, 12)

In [6]:
rating = pd.read_csv("rating.csv")
rating.head()

Unnamed: 0,user_id,anime_id,rating
0,1,20,-1
1,1,24,-1
2,1,79,-1
3,1,226,-1
4,1,241,-1


In [7]:
rating.shape

(7813737, 3)

In [8]:
# reduce the rows of rating, because my memory is not enough
rating = rating[rating['user_id'] <= 10000]

In [9]:
rating.shape

(1042697, 3)

In [10]:
rating.loc[rating.rating == -1, 'rating'] = np.NaN
rating.head()

Unnamed: 0,user_id,anime_id,rating
0,1,20,
1,1,24,
2,1,79,
3,1,226,
4,1,241,


In [11]:
anime_index = pd.Series(anime.index, index=anime.name)
anime_index.head()

name
Kimi no Na wa.                      0
Fullmetal Alchemist: Brotherhood    1
Gintama°                            2
Steins;Gate                         3
Gintama&#039;                       4
dtype: int64

In [12]:
joined = anime.merge(rating, how='inner', on='anime_id')
joined.head()

Unnamed: 0,anime_id,name,genre_x,type_x,episodes_x,members_x,image_url,type_y,episodes_y,score,members_y,background,user_id,rating
0,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,200630,https://cdn.myanimelist.net/images/anime/5/870...,Movie,1,9.19,730076,,99,5.0
1,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,200630,https://cdn.myanimelist.net/images/anime/5/870...,Movie,1,9.19,730076,,152,10.0
2,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,200630,https://cdn.myanimelist.net/images/anime/5/870...,Movie,1,9.19,730076,,244,10.0
3,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,200630,https://cdn.myanimelist.net/images/anime/5/870...,Movie,1,9.19,730076,,271,10.0
4,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,200630,https://cdn.myanimelist.net/images/anime/5/870...,Movie,1,9.19,730076,,278,


In [13]:
joined = joined[['user_id', 'name', 'rating']]

pivot = pd.pivot_table(joined, index='name', columns='user_id', values='rating')
pivot.head()

user_id,1,2,3,5,7,8,9,10,11,12,...,9991,9992,9993,9994,9995,9996,9997,9998,9999,10000
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
&quot;Bungaku Shoujo&quot; Movie,,,,,,,,,,,...,,,,,,,,,,
.hack//Roots,,,,,,,,,,,...,,,,,,,,,,
.hack//Sign,,,,,,,,,,,...,,,,,,,,,,
.hack//Tasogare no Udewa Densetsu,,,,,,,,,,,...,,,,,,,,,,
07-Ghost,,,,,,,,,,,...,,,,,,,6.0,,,


In [14]:
pivot.shape

(1488, 9412)

In [15]:
pivot.dropna(axis=1, how='all', inplace=True)
pivot.head()

user_id,1,2,3,5,7,8,9,10,11,12,...,9991,9992,9993,9994,9995,9996,9997,9998,9999,10000
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
&quot;Bungaku Shoujo&quot; Movie,,,,,,,,,,,...,,,,,,,,,,
.hack//Roots,,,,,,,,,,,...,,,,,,,,,,
.hack//Sign,,,,,,,,,,,...,,,,,,,,,,
.hack//Tasogare no Udewa Densetsu,,,,,,,,,,,...,,,,,,,,,,
07-Ghost,,,,,,,,,,,...,,,,,,,6.0,,,


In [16]:
pivot.shape

(1488, 9412)

In [17]:
pivot_norm = pivot.apply(lambda x: x - np.nanmean(x), axis=1)
pivot_norm.head()

user_id,1,2,3,5,7,8,9,10,11,12,...,9991,9992,9993,9994,9995,9996,9997,9998,9999,10000
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
&quot;Bungaku Shoujo&quot; Movie,,,,,,,,,,,...,,,,,,,,,,
.hack//Roots,,,,,,,,,,,...,,,,,,,,,,
.hack//Sign,,,,,,,,,,,...,,,,,,,,,,
.hack//Tasogare no Udewa Densetsu,,,,,,,,,,,...,,,,,,,,,,
07-Ghost,,,,,,,,,,,...,,,,,,,-1.598527,,,


In [18]:
# fill NaN with 0
pivot_norm.fillna(0, inplace=True)
pivot_norm.head()

user_id,1,2,3,5,7,8,9,10,11,12,...,9991,9992,9993,9994,9995,9996,9997,9998,9999,10000
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
&quot;Bungaku Shoujo&quot; Movie,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
.hack//Roots,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
.hack//Sign,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
.hack//Tasogare no Udewa Densetsu,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
07-Ghost,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,-1.598527,0.0,0.0,0.0


In [19]:
cosine_sim = cosine_similarity(pivot_norm, pivot_norm)

In [20]:
item_dif = pd.DataFrame(cosine_similarity(pivot_norm, pivot_norm), index=pivot_norm.index, columns=pivot_norm.index)

In [21]:
cosine_sim

array([[1.        , 0.02926919, 0.02146522, ..., 0.02890355, 0.06238533,
        0.03041928],
       [0.02926919, 1.        , 0.2787704 , ..., 0.03887541, 0.06653732,
        0.05823338],
       [0.02146522, 0.2787704 , 1.        , ..., 0.06488693, 0.03790816,
        0.06408499],
       ...,
       [0.02890355, 0.03887541, 0.06488693, ..., 1.        , 0.53990812,
        0.2635016 ],
       [0.06238533, 0.06653732, 0.03790816, ..., 0.53990812, 1.        ,
        0.30515154],
       [0.03041928, 0.05823338, 0.06408499, ..., 0.2635016 , 0.30515154,
        1.        ]])

In [22]:
anime_index = pd.Series(anime.index, index=anime.name).drop_duplicates()

In [23]:
anime_index

name
Kimi no Na wa.                                   0
Fullmetal Alchemist: Brotherhood                 1
Gintama°                                         2
Steins;Gate                                      3
Gintama&#039;                                    4
                                             ...  
Fate/stay night Movie: Heaven&#039;s Feel    10901
No Game No Life Movie                        10904
Little Witch Academia (TV)                   10905
Evangelion: 3.0+1.0                          10961
Kizumonogatari III: Reiketsu-hen             11007
Length: 1532, dtype: int64

In [24]:
def get_recommendation(anime_name, similarity=cosine_sim):
    idx = anime_index[anime_name]
    
    # Get the pairwsie similarity scores of all anime with that anime
    sim_scores = list(enumerate(cosine_sim[idx]))

    # Sort the anime based on the similarity scores
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    # Get the scores of the 10 most similar anime
    sim_scores = sim_scores[0:11]

    # Get the anime indices
    anime_indices = [i[0] for i in sim_scores]

    # Return the top 10 most similar anime
    result = anime[['name', 'genre_x', 'score']].iloc[anime_indices]
    return result

In [25]:
def get_recommendation(anime_name):
    if anime_name not in pivot_norm.index:
        return None, None
    else:
        sim_score = item_dif.sort_values(by=anime_name, ascending=False).loc[:, anime_name].tolist()[1:]
        sim_animes = item_dif.sort_values(by=anime_name, ascending=False).index[1:]
    
    return sim_animes, sim_score

In [26]:
animes, score = get_recommendation("Naruto")
for x,y in zip(animes[:10], score[:10]):
    index = anime[anime['name']== x ].index.values
    image = anime[anime['name']== x ]['image_url']
    #index = anime.index(x)
    #image = anime.iloc[test].image_url
    print("{} with similarity of {} {} {}".format(x, y, index, image))


Naruto: Shippuuden Movie 1 with similarity of 0.24503246421251143 [1827] 1827    https://cdn.myanimelist.net/images/anime/6/797...
Name: image_url, dtype: object
Naruto Movie 1: Dai Katsugeki!! Yuki Hime Shinobu Houjou Dattebayo! with similarity of 0.23762559103749778 [2756] 2756    https://cdn.myanimelist.net/images/anime/6/760...
Name: image_url, dtype: object
Bleach with similarity of 0.22835181794336248 [582] 582    https://cdn.myanimelist.net/images/anime/3/404...
Name: image_url, dtype: object
Naruto Movie 3: Dai Koufun! Mikazuki Jima no Animaru Panikku Dattebayo! with similarity of 0.21523904158857035 [3559] 3559    https://cdn.myanimelist.net/images/anime/6/760...
Name: image_url, dtype: object
Naruto Movie 2: Dai Gekitotsu! Maboroshi no Chiteiiseki Dattebayo! with similarity of 0.2129183900457425 [3449] 3449    https://cdn.myanimelist.net/images/anime/11/76...
Name: image_url, dtype: object
Naruto: Shippuuden Movie 2 - Kizuna with similarity of 0.19459638626494785 [1828] 1828 

In [27]:
print(anime[anime['name']=='Bleach'].index.values)

[582]


In [28]:
get_recommendation("Naruto")

(Index(['Naruto: Shippuuden Movie 1',
        'Naruto Movie 1: Dai Katsugeki!! Yuki Hime Shinobu Houjou Dattebayo!',
        'Bleach',
        'Naruto Movie 3: Dai Koufun! Mikazuki Jima no Animaru Panikku Dattebayo!',
        'Naruto Movie 2: Dai Gekitotsu! Maboroshi no Chiteiiseki Dattebayo!',
        'Naruto: Shippuuden Movie 2 - Kizuna', 'Dragon Ball Z',
        'Naruto: Shippuuden Movie 3 - Hi no Ishi wo Tsugu Mono', 'Death Note',
        'Sword Art Online',
        ...
        'Qualidea Code', 'Seikai no Monshou', 'Macross Δ', 'Memories',
        'Rakuen Tsuihou: Expelled from Paradise',
        'Mahou Shoujo Lyrical Nanoha StrikerS', 'Kyousou Giga (TV)',
        'Girls und Panzer der Film', 'Sekkou Boys', 'Show By Rock!!'],
       dtype='object', name='name', length=1487),
 [0.24503246421251143,
  0.23762559103749778,
  0.22835181794336248,
  0.21523904158857035,
  0.2129183900457425,
  0.19459638626494785,
  0.19329323816174693,
  0.18714814120742027,
  0.18009544100899344,
  0.

In [29]:
anime.head()

Unnamed: 0,anime_id,name,genre_x,type_x,episodes_x,members_x,image_url,type_y,episodes_y,score,members_y,background
0,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,200630,https://cdn.myanimelist.net/images/anime/5/870...,Movie,1,9.19,730076,
1,5114,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV,64,793665,https://cdn.myanimelist.net/images/anime/5/474...,TV,64,9.25,1199091,Fullmetal Alchemist: Brotherhood is an alterna...
2,28977,Gintama°,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,114262,https://cdn.myanimelist.net/images/anime/3/720...,TV,51,9.15,198744,This is a fourth season of Gintama. In the ep...
3,9253,Steins;Gate,"Sci-Fi, Thriller",TV,24,673572,https://cdn.myanimelist.net/images/anime/5/731...,TV,24,9.14,1010330,Steins;Gate is based on 5pb. and Nitroplus&#03...
4,9969,Gintama&#039;,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,151266,https://cdn.myanimelist.net/images/anime/4/503...,TV,51,9.11,215527,This is the second season of Gintama. In the e...


In [30]:
import pickle
pickle.dump(item_dif,open('anime_itembased.pkl','wb'))
pickle.dump(anime,open('anime.pkl','wb'))
pickle.dump(pivot_norm,open('pivot_norm.pkl','wb'))
