In [1]:
import numpy as np
import pandas as pd

from sklearn.metrics.pairwise import cosine_similarity

from scipy.sparse import csr_matrix

## Load the data from the Excel sheets

In [2]:
ANIME_CSV_PATH = "./datasets/anime.csv"
RATING_CSV_PATH = "./datasets/rating.csv"

In [3]:
anime_df = pd.read_csv(ANIME_CSV_PATH)
rating_df = pd.read_csv(RATING_CSV_PATH)

## Preprocess the data

In [4]:
rating_df.rating.replace({-1: np.nan}, inplace=True)

In [5]:
anime_tv_df = anime_df[anime_df['type'] == 'TV']

In [6]:
merged_df = pd.merge(anime_tv_df, rating_df, on='anime_id', suffixes=['_anime', '_user'])

In [7]:
merged_df = merged_df[['user_id', 'name', 'rating_user']]

In [8]:
merged_limit_df = merged_df[merged_df.user_id <= 20000]

create pivot table for merged data

In [9]:
user_anime_matrix = merged_limit_df.pivot_table(index=['user_id'], columns=['name'], values='rating_user')

In [10]:
norm_user_anime_matrix = user_anime_matrix.apply(lambda x: (x-np.mean(x))/(np.max(x)-np.min(x)), axis=1)

In [11]:
norm_user_anime_matrix.fillna(0, inplace=True)
norm_user_anime_matrix = norm_user_anime_matrix.T
norm_user_anime_matrix = norm_user_anime_matrix.loc[:, (norm_user_anime_matrix != 0).any(axis=0)]

create sparse matrix for more efficient computation

In [12]:
anime_user_csr = csr_matrix(norm_user_anime_matrix.values)

## Pearson Correlation

In [13]:
item_similarity = norm_user_anime_matrix.T.corr(method='pearson')
user_similarity = norm_user_anime_matrix.corr(method='pearson')

In [14]:
item_sim_df = pd.DataFrame(item_similarity, index=norm_user_anime_matrix.index, columns=norm_user_anime_matrix.index)
user_sim_df = pd.DataFrame(user_similarity, index=norm_user_anime_matrix.columns, columns=norm_user_anime_matrix.columns)

In [15]:
item_sim_df

name,.hack//Roots,.hack//Sign,.hack//Tasogare no Udewa Densetsu,009-1,07-Ghost,11eyes,12-sai.: Chicchana Mune no Tokimeki,3 Choume no Tama: Uchi no Tama Shirimasenka?,30-sai no Hoken Taiiku,91 Days,...,"Zone of the Enders: Dolores, I",Zukkoke Knight: Don De La Mancha,ef: A Tale of Melodies.,ef: A Tale of Memories.,gdgd Fairies,gdgd Fairies 2,iDOLM@STER Xenoglossia,s.CRY.ed,xxxHOLiC,xxxHOLiC Kei
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
.hack//Roots,1.000000,0.208654,0.292160,0.010615,0.035165,0.042690,0.017219,0.009871,0.023029,-0.010295,...,-0.004435,0.0,-0.023743,-0.016155,0.000000,0.000000,0.013154,0.019365,-0.026734,-0.026476
.hack//Sign,0.208654,1.000000,0.171018,0.024322,0.041544,0.038648,0.002105,0.000986,0.035500,-0.014581,...,0.015337,0.0,-0.021272,-0.004311,-0.015866,-0.010922,-0.014005,0.004488,-0.018745,-0.023892
.hack//Tasogare no Udewa Densetsu,0.292160,0.171018,1.000000,0.035243,0.020486,0.036884,-0.000492,0.006363,0.003612,0.000884,...,0.019789,0.0,-0.016518,-0.021403,-0.013637,0.000000,0.000472,0.011293,-0.043897,-0.032401
009-1,0.010615,0.024322,0.035243,1.000000,0.005419,0.028593,0.029127,0.000000,0.051093,-0.035763,...,0.003867,0.0,-0.003745,-0.003192,0.000000,0.000000,-0.002042,-0.011801,0.013786,0.008747
07-Ghost,0.035165,0.041544,0.020486,0.005419,1.000000,0.085772,0.009688,0.002562,0.045095,-0.011170,...,0.001849,0.0,-0.025648,-0.025221,0.000030,0.000029,-0.002626,0.021899,-0.015388,-0.011613
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
gdgd Fairies 2,0.000000,-0.010922,0.000000,0.000000,0.000029,0.000413,0.000000,0.000000,0.000000,-0.006458,...,0.000000,0.0,0.000380,-0.002512,0.470379,1.000000,-0.001134,-0.006767,-0.004298,0.000000
iDOLM@STER Xenoglossia,0.013154,-0.014005,0.000472,-0.002042,-0.002626,0.012366,0.000000,0.010641,0.026475,0.003081,...,0.020443,0.0,-0.006085,-0.005432,-0.001202,-0.001134,1.000000,-0.022674,-0.040151,-0.028124
s.CRY.ed,0.019365,0.004488,0.011293,-0.011801,0.021899,0.011968,0.000000,-0.002741,-0.007845,-0.009019,...,0.003345,0.0,0.000787,-0.002283,-0.007174,-0.006767,-0.022674,1.000000,-0.004967,-0.018335
xxxHOLiC,-0.026734,-0.018745,-0.043897,0.013786,-0.015388,-0.041023,-0.011261,0.006558,-0.015582,-0.010608,...,0.004333,0.0,0.041459,0.039645,0.011552,-0.004298,-0.040151,-0.004967,1.000000,0.553464


In [16]:
user_sim_df

user_id,3,5,7,8,10,11,12,14,16,17,...,19989,19990,19992,19993,19994,19995,19996,19997,19998,20000
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
3,1.000000,0.063993,0.167627,0.020365,0.125977,4.523309e-02,0.108629,0.005399,0.077388,0.248132,...,0.101602,0.155584,0.000000,0.074104,0.032734,0.085043,-0.052886,-0.016283,0.003794,-0.020254
5,0.063993,1.000000,0.072457,0.016326,0.031464,5.346814e-03,0.094016,0.106924,0.007067,0.130260,...,0.057186,-0.024680,0.011694,0.082730,0.114210,0.053155,-0.026337,-0.029083,0.040447,-0.005546
7,0.167627,0.072457,1.000000,-0.013548,0.020401,-1.515745e-02,0.061591,0.122852,0.048526,0.165473,...,-0.035113,0.052594,-0.064844,0.051428,0.065741,0.032318,0.000000,0.000000,0.046984,0.018660
8,0.020365,0.016326,-0.013548,1.000000,-0.052705,1.892828e-02,0.002786,-0.055035,0.000000,0.001459,...,0.019713,0.000000,0.000000,0.034493,0.007149,-0.010015,0.000000,0.000000,0.046962,-0.069722
10,0.125977,0.031464,0.020401,-0.052705,1.000000,-1.665335e-16,0.008811,0.044415,0.000000,0.071511,...,0.190080,0.213980,0.000000,0.000000,0.108901,0.138879,0.000000,0.000000,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19995,0.085043,0.053155,0.032318,-0.010015,0.138879,3.134752e-02,0.179424,0.137927,0.129857,0.208684,...,0.022598,0.160449,-0.031884,0.040449,0.078178,1.000000,-0.000719,0.000000,0.180156,0.026246
19996,-0.052886,-0.026337,0.000000,0.000000,0.000000,8.952602e-03,-0.171130,-0.031370,0.000000,-0.002204,...,-0.155184,0.000000,0.109382,-0.006593,0.085547,-0.000719,1.000000,0.000000,0.000000,0.044492
19997,-0.016283,-0.029083,0.000000,0.000000,0.000000,0.000000e+00,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.074125,0.000000,0.000000,0.000000,0.000000,0.000000,1.000000,0.000000,0.000000
19998,0.003794,0.040447,0.046984,0.046962,0.000000,-4.831490e-03,0.208504,0.096730,-0.088719,0.083251,...,-0.083078,0.013459,0.000000,-0.001466,0.029458,0.180156,0.000000,0.000000,1.000000,-0.039621
