In [2]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

In [3]:
rating_data = pd.read_csv("animelist.csv")
anima_data = pd.read_csv("anime.csv")

# To save my Pc time I made a new data where i just fetch anime_id(MAL_ID) and Name so that i can use merge() function on it.
anima_data = anima_data.rename(columns={"Unnamed: 0": "anime_id"})
rating_data = rating_data.rename(columns={"Unnamed: 0": "anime_id"})
anima_contact_data = anima_data[["anime_id", "Name"]]

In [4]:
rating_data = rating_data.merge(anima_contact_data, left_on = 'anime_id', right_on = 'anime_id', how = 'left')
rating_data = rating_data[["user_id", "Name", "anime_id","rating", "watching_status", "watched_episodes"]]
rating_data.head()

Unnamed: 0,user_id,Name,anime_id,rating,watching_status,watched_episodes
0,0,Basilisk: Kouga Ninpou Chou,1318,9,1,1
1,0,Fairy Tail,649,7,1,4
2,0,Kuroshitsuji,1797,0,1,1
3,0,One Piece,2710,10,1,0
4,0,School Rumble,585,9,1,5


In [5]:
rating_data.shape

(66671514, 6)

In [6]:
count = rating_data['user_id'].value_counts()
count1 = rating_data['anime_id'].value_counts()
rating_data = rating_data[rating_data['user_id'].isin(count[count >= 500].index)].copy()
rating_data = rating_data[rating_data['anime_id'].isin(count1[count1 >= 200].index)].copy()

In [7]:
rating_data.shape

(18818098, 6)

In [8]:
combine_movie_rating = rating_data.dropna(axis = 0, subset = ['Name'])
movie_ratingCount = (combine_movie_rating.
     groupby(by = ['Name'])['rating'].
     count().
     reset_index()
     [['Name', 'rating']]
    )
movie_ratingCount.head()

Unnamed: 0,Name,rating
0,"""Bungaku Shoujo"" Kyou no Oyatsu: Hatsukoi",3520
1,"""Bungaku Shoujo"" Memoire",4501
2,.hack//G.U. Returner,2218
3,.hack//G.U. Trilogy,2835
4,.hack//Gift,2037


In [9]:
rating_data = combine_movie_rating.merge(movie_ratingCount, left_on = 'Name', right_on = 'Name', how = 'left')
rating_data = rating_data.drop(columns = "rating_x")
rating_data = rating_data.rename(columns={"rating_y": "rating"})
rating_data

Unnamed: 0,user_id,Name,anime_id,watching_status,watched_episodes,rating
0,17,Black Clover,2515,1,167,15947
1,17,Dr. Stone: Stone Wars,2489,1,9,9983
2,17,Jaku-Chara Tomozaki-kun,2505,1,10,5918
3,17,Kaifuku Jutsushi no Yarinaoshi,2528,1,8,8060
4,17,King's Raid: Ishi wo Tsugumono-tachi,2503,1,24,3024
...,...,...,...,...,...,...
18818093,353398,"Suisei no Gargantia: Meguru Kouro, Haruka",489,6,0,5130
18818094,353398,Tales of the Abyss,765,6,0,8966
18818095,353398,Uchouten Kazoku,110,6,0,13352
18818096,353398,Urara Meirochou,1437,6,0,9842


In [10]:
# Encoding categorical data
user_ids = rating_data["user_id"].unique().tolist()
user2user_encoded = {x: i for i, x in enumerate(user_ids)}
user_encoded2user = {i: x for i, x in enumerate(user_ids)}
rating_data["user"] = rating_data["user_id"].map(user2user_encoded)
n_users = len(user2user_encoded)

anime_ids = rating_data["anime_id"].unique().tolist()
anime2anime_encoded = {x: i for i, x in enumerate(anime_ids)}
anime_encoded2anime = {i: x for i, x in enumerate(anime_ids)}
rating_data["anime"] = rating_data["anime_id"].map(anime2anime_encoded)
n_animes = len(anime2anime_encoded)

print("Num of users: {}, Num of animes: {}".format(n_users, n_animes))
print("Min total rating: {}, Max total rating: {}".format(min(rating_data['rating']), max(rating_data['rating'])))

Num of users: 26706, Num of animes: 2262
Min total rating: 94, Max total rating: 25705


In [11]:
g = rating_data.groupby('user_id')['rating'].count()
top_users = g.dropna().sort_values(ascending=False)[:20]
top_r = rating_data.join(top_users, rsuffix='_r', how='inner', on='user_id')

g = rating_data.groupby('anime_id')['rating'].count()
top_animes = g.dropna().sort_values(ascending=False)[:20]
top_r = top_r.join(top_animes, rsuffix='_r', how='inner', on='anime_id')

pivot = pd.crosstab(top_r.user_id, top_r.anime_id, top_r.rating, aggfunc=np.sum)

In [12]:
pivot.fillna(0, inplace=True)
pivot

anime_id,51,280,648,685,724,786,798,1088,1090,1132,1203,1373,1445,1492,1700,1805,1824,1829,1957,2032
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
20807,24569,24005,25054,24436,25381,24261,24156,24274,24455,24080,25175,25266,24027,25563,25494,24629,25705,23963,24280,24559
22022,24569,24005,25054,24436,25381,24261,24156,24274,24455,24080,25175,25266,24027,25563,25494,24629,25705,23963,24280,24559
50485,24569,24005,25054,24436,25381,24261,24156,24274,24455,24080,25175,25266,24027,25563,25494,24629,25705,23963,24280,24559
63900,24569,24005,25054,24436,25381,24261,24156,24274,24455,24080,25175,25266,24027,25563,25494,24629,25705,23963,24280,24559
68042,24569,24005,25054,24436,25381,24261,24156,24274,24455,24080,25175,25266,24027,25563,25494,24629,25705,23963,24280,24559
85472,24569,24005,25054,24436,25381,24261,24156,24274,24455,24080,25175,25266,24027,25563,25494,24629,25705,23963,24280,24559
122341,24569,24005,25054,24436,25381,24261,24156,24274,24455,24080,25175,25266,24027,25563,25494,24629,25705,23963,24280,24559
131988,24569,24005,25054,24436,25381,24261,24156,24274,24455,24080,25175,25266,24027,25563,25494,24629,25705,23963,24280,24559
140590,24569,24005,25054,24436,25381,24261,24156,24274,24455,24080,25175,25266,24027,25563,25494,24629,25705,23963,24280,24559
147331,24569,24005,25054,24436,25381,24261,24156,24274,24455,24080,25175,25266,24027,25563,25494,24629,25705,23963,24280,24559


In [13]:
piviot_table = rating_data.pivot_table(index="Name",columns="user_id", values="rating").fillna(0)
piviot_table

user_id,17,19,47,60,111,145,146,147,153,172,...,353302,353304,353311,353318,353326,353331,353365,353390,353395,353398
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
"""Bungaku Shoujo"" Kyou no Oyatsu: Hatsukoi",0.0,0.0,0.0,0.0,0.0,0.0,3520.0,3520.0,3520.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"""Bungaku Shoujo"" Memoire",0.0,0.0,0.0,0.0,0.0,0.0,4501.0,4501.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
.hack//G.U. Returner,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
.hack//G.U. Trilogy,0.0,0.0,0.0,0.0,0.0,2835.0,0.0,0.0,2835.0,0.0,...,0.0,2835.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
.hack//Gift,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
number24,0.0,0.0,0.0,0.0,0.0,0.0,1791.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
xxxHOLiC,14067.0,0.0,0.0,14067.0,0.0,0.0,14067.0,0.0,14067.0,0.0,...,0.0,14067.0,0.0,0.0,0.0,0.0,0.0,14067.0,14067.0,0.0
xxxHOLiC Kei,8103.0,0.0,0.0,0.0,0.0,0.0,8103.0,0.0,8103.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,8103.0,8103.0,0.0
xxxHOLiC Rou,5575.0,0.0,0.0,0.0,0.0,0.0,5575.0,0.0,5575.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5575.0,0.0,0.0


In [14]:
from scipy.sparse import csr_matrix
piviot_table_matrix = csr_matrix(piviot_table.values)

In [15]:
from sklearn.neighbors import NearestNeighbors
model = NearestNeighbors(metric="cosine", algorithm="brute")
model.fit(piviot_table_matrix)

NearestNeighbors(algorithm='brute', metric='cosine')

In [16]:
uncodingID = {k: v for k, v in zip(anima_data.Name.values, anima_data.anime_id.values)}

In [17]:
piviot_table.shape[0]

2262

In [18]:
result = []
for id in range(2262):
    name  = uncodingID[piviot_table.index[id]]
    list_ = []
    query = piviot_table.iloc[id, :].values.reshape(1, -1)
    distance, suggestions = model.kneighbors(query, n_neighbors=21)
    for i in range(1, len(distance.flatten())):
        
        list_.append((id, int(uncodingID[piviot_table.index[suggestions.flatten()[i]]]), i))
    result += list_

In [19]:
import sqlite3


In [27]:
result = [(a, int(b), c) for a, b, c in result]

In [29]:
db = sqlite3.connect('anime.db')
cursor = db.cursor()
cursor.executemany('INSERT INTO recommendation values(?, ?, ?)', result)
db.commit()
cursor.close()
db.close()

In [28]:
type(result[0][1])

int

In [70]:
def predict():
    # random_anime = np.random.choice(piviot_table.shape[0]) # This will choose a random anime name and our model will predict on it.
    random_anime = 2261
    query = piviot_table.iloc[random_anime, :].values.reshape(1, -1)
    distance, suggestions = model.kneighbors(query, n_neighbors=21)
    
    for i in range(0, len(distance.flatten())):
        if i == 0:
            print('Recommendations for {0}:\n'.format(piviot_table.index[random_anime]))
        else:
            print('{0}: {1}, with distance of {2}:'.format(i, uncodingID[piviot_table.index[suggestions.flatten()[i]]], distance.flatten()[i]))

In [71]:
predict()

Recommendations for xxxHOLiC Shunmuki:

1: 288, with distance of 0.08727244807510381:
2: 270, with distance of 0.17203662621064686:
3: 1028, with distance of 0.36576118124334045:
4: 350, with distance of 0.3883057520442791:
5: 351, with distance of 0.4264763125592457:
6: 352, with distance of 0.43878224960471424:
7: 353, with distance of 0.4692186725574542:
8: 354, with distance of 0.48433566192360455:
9: 878, with distance of 0.4915215787057374:
10: 877, with distance of 0.5035942055683253:
11: 876, with distance of 0.5106108804996692:
12: 1867, with distance of 0.5150678765562047:
13: 1561, with distance of 0.5213835397367725:
14: 860, with distance of 0.5243805372336702:
15: 1866, with distance of 0.5245643204297412:
16: 465, with distance of 0.5255706357711083:
17: 466, with distance of 0.5298737117784784:
18: 471, with distance of 0.5304304609276581:
19: 1445, with distance of 0.5312407918969912:
20: 690, with distance of 0.5313272594534193:


In [52]:
piviot_table.shape

(2262, 26706)

In [50]:
np.random.choice(piviot_table.shape[0])

1432

In [27]:
predict()

Recommendations for Dungeon ni Deai wo Motomeru no wa Machigatteiru Darou ka II:

1: Dungeon ni Deai wo Motomeru no wa Machigatteiru Darou ka III, with distance of 0.17225582946679763:
2: Tate no Yuusha no Nariagari, with distance of 0.18374967215995974:
3: Tensei shitara Slime Datta Ken, with distance of 0.18938755582963296:
4: Goblin Slayer, with distance of 0.19669191572490685:
5: Dungeon ni Deai wo Motomeru no wa Machigatteiru Darou ka, with distance of 0.20246344903712632:


In [41]:
import mysql.connector

Unnamed: 0,anime_id,Name,Score,Genres,English name,Japanese name,Type,Episodes,Aired,Premiered,...,Score-10,Score-9,Score-8,Score-7,Score-6,Score-5,Score-4,Score-3,Score-2,Score-1
584,421,Ookami-san to Shichinin no Nakama-tachi,7.22,"Comedy, Parody, Romance",Okami-San and Her Seven Companions,オオカミさんと七人の仲間たち,TV,12,"Jul 1, 2010 to Sep 16, 2010",Summer 2010,...,6575.0,12733.0,28299.0,37290.0,17540.0,8041.0,2693.0,994.0,458.0,310.0
