In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import pairwise_distances
from scipy.spatial.distance import cosine, correlation
from sklearn.model_selection import train_test_split    

### 1. Data preprocessing

In [3]:
anime = pd.read_csv('C:\\Users\\hp\\Desktop\\EXCELR\\EXCELR\\Recommendation System\\anime.csv')
anime

Unnamed: 0,anime_id,name,genre,type,episodes,rating,members
0,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,9.37,200630
1,5114,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV,64,9.26,793665
2,28977,Gintama°,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.25,114262
3,9253,Steins;Gate,"Sci-Fi, Thriller",TV,24,9.17,673572
4,9969,Gintama&#039;,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.16,151266
...,...,...,...,...,...,...,...
12289,9316,Toushindai My Lover: Minami tai Mecha-Minami,Hentai,OVA,1,4.15,211
12290,5543,Under World,Hentai,OVA,1,4.28,183
12291,5621,Violence Gekiga David no Hoshi,Hentai,OVA,4,4.88,219
12292,6133,Violence Gekiga Shin David no Hoshi: Inma Dens...,Hentai,OVA,1,4.98,175


In [5]:
anime.drop(['anime_id', 'type', 'episodes', 'members'],axis=1,inplace=True)
anime.head()

Unnamed: 0,name,genre,rating
0,Kimi no Na wa.,"Drama, Romance, School, Supernatural",9.37
1,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",9.26
2,Gintama°,"Action, Comedy, Historical, Parody, Samurai, S...",9.25
3,Steins;Gate,"Sci-Fi, Thriller",9.17
4,Gintama&#039;,"Action, Comedy, Historical, Parody, Samurai, S...",9.16


In [7]:
anime.shape

(12294, 3)

In [9]:
anime.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12294 entries, 0 to 12293
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   name    12294 non-null  object 
 1   genre   12232 non-null  object 
 2   rating  12064 non-null  float64
dtypes: float64(1), object(2)
memory usage: 288.3+ KB


#### Handle missing values

In [11]:
anime.isnull().sum()

name        0
genre      62
rating    230
dtype: int64

In [13]:
anime[anime.isnull().any(axis=1)].shape

(277, 3)

In [15]:
anime.dropna(inplace=True)

In [17]:
anime.reset_index(drop=True, inplace=True)

#### Explore the data

In [19]:
anime.describe()

Unnamed: 0,rating
count,12017.0
mean,6.478264
std,1.023857
min,1.67
25%,5.89
50%,6.57
75%,7.18
max,10.0


In [21]:
print(len(anime['name'].unique()))
anime['name'].value_counts()

12015


name
Shi Wan Ge Leng Xiaohua                                                                            2
Saru Kani Gassen                                                                                   2
CCW: Crazy Clay Wrestling                                                                          1
Crayon Angel                                                                                       1
Koumon-teki Juuku Ketsujiru Juke                                                                   1
                                                                                                  ..
Bakusou Kyoudai Let&#039;s &amp; Go!! WGP Bousou Mini Yonku Daitsuiseki                            1
Beet the Vandel Buster Excellion                                                                   1
Black Jack ONA                                                                                     1
Crayon Shin-chan Manatsu no Yoru ni Ora Sanjou! Arashi wo Yobu Den-O vs. Shin-O 60-bu 

In [23]:
print(len(anime['genre'].unique()))
anime['genre'].value_counts()

3229


genre
Hentai                                                   816
Comedy                                                   521
Music                                                    297
Kids                                                     197
Comedy, Slice of Life                                    174
                                                        ... 
Adventure, Comedy, Horror, Shounen, Supernatural           1
Comedy, Harem, Romance, School, Seinen, Slice of Life      1
Comedy, Ecchi, Sci-Fi, Shounen                             1
Adventure, Shounen, Sports                                 1
Hentai, Slice of Life                                      1
Name: count, Length: 3229, dtype: int64

### 2. Feature extraction

In [25]:
anime1 = anime.assign(genre=anime['genre'].str.split(', ')).explode('genre')

In [27]:
user_anime = anime1.pivot_table(index='name', columns='genre', values='rating', aggfunc='mean').fillna(0)
user_anime

genre,Action,Adventure,Cars,Comedy,Dementia,Demons,Drama,Ecchi,Fantasy,Game,...,Shounen Ai,Slice of Life,Space,Sports,Super Power,Supernatural,Thriller,Vampire,Yaoi,Yuri
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
&quot;0&quot;,0.0,0.0,0.0,0.00,0.00,0.0,0.00,0.0,0.00,0.0,...,0.0,0.0,0.0,0.0,0.0,0.00,0.0,0.0,0.0,0.0
"&quot;Aesop&quot; no Ohanashi yori: Ushi to Kaeru, Yokubatta Inu",0.0,0.0,0.0,0.00,0.00,0.0,0.00,0.0,0.00,0.0,...,0.0,0.0,0.0,0.0,0.0,0.00,0.0,0.0,0.0,0.0
&quot;Bungaku Shoujo&quot; Kyou no Oyatsu: Hatsukoi,0.0,0.0,0.0,7.06,0.00,0.0,0.00,0.0,7.06,0.0,...,0.0,0.0,0.0,0.0,0.0,0.00,0.0,0.0,0.0,0.0
&quot;Bungaku Shoujo&quot; Memoire,0.0,0.0,0.0,0.00,0.00,0.0,7.54,0.0,0.00,0.0,...,0.0,0.0,0.0,0.0,0.0,0.00,0.0,0.0,0.0,0.0
&quot;Bungaku Shoujo&quot; Movie,0.0,0.0,0.0,0.00,0.00,0.0,7.63,0.0,0.00,0.0,...,0.0,0.0,0.0,0.0,0.0,0.00,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
xxxHOLiC Movie: Manatsu no Yoru no Yume,0.0,0.0,0.0,8.04,0.00,0.0,8.04,0.0,0.00,0.0,...,0.0,0.0,0.0,0.0,0.0,8.04,0.0,0.0,0.0,0.0
xxxHOLiC Rou,0.0,0.0,0.0,0.00,0.00,0.0,0.00,0.0,0.00,0.0,...,0.0,0.0,0.0,0.0,0.0,8.32,0.0,0.0,0.0,0.0
xxxHOLiC Shunmuki,0.0,0.0,0.0,8.17,0.00,0.0,8.17,0.0,0.00,0.0,...,0.0,0.0,0.0,0.0,0.0,8.17,0.0,0.0,0.0,0.0
Üks Uks,0.0,0.0,0.0,0.00,6.17,0.0,0.00,0.0,0.00,0.0,...,0.0,0.0,0.0,0.0,0.0,0.00,0.0,0.0,0.0,0.0


In [29]:
user_sim = 1 - pairwise_distances( user_anime.values,metric='cosine')
user_sim

array([[1.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.70710678],
       [0.        , 1.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 1.        , ..., 0.25819889, 0.        ,
        0.        ],
       ...,
       [0.        , 0.        , 0.25819889, ..., 1.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 1.        ,
        0.70710678],
       [0.70710678, 0.        , 0.        , ..., 0.        , 0.70710678,
        1.        ]])

In [31]:
user_sim_df = pd.DataFrame(user_sim)
user_sim_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,12005,12006,12007,12008,12009,12010,12011,12012,12013,12014
0,1.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.00000,0.000000,0.500000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.707107
1,0.000000,1.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
2,0.000000,0.0,1.000000,0.333333,0.288675,0.333333,0.408248,0.235702,0.288675,0.516398,...,0.57735,0.000000,0.288675,0.258199,0.258199,0.258199,0.000000,0.258199,0.000000,0.000000
3,0.000000,0.0,0.333333,1.000000,0.866025,0.333333,0.000000,0.235702,0.000000,0.000000,...,0.00000,0.258199,0.288675,0.258199,0.258199,0.258199,0.000000,0.258199,0.000000,0.000000
4,0.000000,0.0,0.288675,0.866025,1.000000,0.288675,0.000000,0.204124,0.000000,0.000000,...,0.00000,0.223607,0.250000,0.447214,0.447214,0.447214,0.353553,0.447214,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12010,0.000000,0.0,0.258199,0.258199,0.447214,0.516398,0.000000,0.182574,0.000000,0.200000,...,0.00000,0.200000,0.000000,1.000000,1.000000,1.000000,0.632456,1.000000,0.000000,0.000000
12011,0.000000,0.0,0.000000,0.000000,0.353553,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.00000,0.000000,0.000000,0.632456,0.632456,0.632456,1.000000,0.632456,0.000000,0.000000
12012,0.000000,0.0,0.258199,0.258199,0.447214,0.516398,0.000000,0.182574,0.000000,0.200000,...,0.00000,0.200000,0.000000,1.000000,1.000000,1.000000,0.632456,1.000000,0.000000,0.000000
12013,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,1.000000,0.707107


In [33]:
user_sim_df.index = anime1.name.unique()
user_sim_df.columns = anime1.name.unique()

In [35]:
user_sim_df

Unnamed: 0,Kimi no Na wa.,Fullmetal Alchemist: Brotherhood,Gintama°,Steins;Gate,Gintama&#039;,Haikyuu!!: Karasuno Koukou VS Shiratorizawa Gakuen Koukou,Hunter x Hunter (2011),Ginga Eiyuu Densetsu,Gintama Movie: Kanketsu-hen - Yorozuya yo Eien Nare,Gintama&#039;: Enchousen,...,Silent Chaser Kagami,Super Erotic Anime,Teleclub no Himitsu,Tenshi no Habataki Jun,The Satisfaction,Toushindai My Lover: Minami tai Mecha-Minami,Under World,Violence Gekiga David no Hoshi,Violence Gekiga Shin David no Hoshi: Inma Densetsu,Yasuji no Pornorama: Yacchimae!!
Kimi no Na wa.,1.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.00000,0.000000,0.500000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.707107
Fullmetal Alchemist: Brotherhood,0.000000,1.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
Gintama°,0.000000,0.0,1.000000,0.333333,0.288675,0.333333,0.408248,0.235702,0.288675,0.516398,...,0.57735,0.000000,0.288675,0.258199,0.258199,0.258199,0.000000,0.258199,0.000000,0.000000
Steins;Gate,0.000000,0.0,0.333333,1.000000,0.866025,0.333333,0.000000,0.235702,0.000000,0.000000,...,0.00000,0.258199,0.288675,0.258199,0.258199,0.258199,0.000000,0.258199,0.000000,0.000000
Gintama&#039;,0.000000,0.0,0.288675,0.866025,1.000000,0.288675,0.000000,0.204124,0.000000,0.000000,...,0.00000,0.223607,0.250000,0.447214,0.447214,0.447214,0.353553,0.447214,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Toushindai My Lover: Minami tai Mecha-Minami,0.000000,0.0,0.258199,0.258199,0.447214,0.516398,0.000000,0.182574,0.000000,0.200000,...,0.00000,0.200000,0.000000,1.000000,1.000000,1.000000,0.632456,1.000000,0.000000,0.000000
Under World,0.000000,0.0,0.000000,0.000000,0.353553,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.00000,0.000000,0.000000,0.632456,0.632456,0.632456,1.000000,0.632456,0.000000,0.000000
Violence Gekiga David no Hoshi,0.000000,0.0,0.258199,0.258199,0.447214,0.516398,0.000000,0.182574,0.000000,0.200000,...,0.00000,0.200000,0.000000,1.000000,1.000000,1.000000,0.632456,1.000000,0.000000,0.000000
Violence Gekiga Shin David no Hoshi: Inma Densetsu,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,1.000000,0.707107


In [37]:
np.fill_diagonal(user_sim_df.values, 0)
user_sim_df

Unnamed: 0,Kimi no Na wa.,Fullmetal Alchemist: Brotherhood,Gintama°,Steins;Gate,Gintama&#039;,Haikyuu!!: Karasuno Koukou VS Shiratorizawa Gakuen Koukou,Hunter x Hunter (2011),Ginga Eiyuu Densetsu,Gintama Movie: Kanketsu-hen - Yorozuya yo Eien Nare,Gintama&#039;: Enchousen,...,Silent Chaser Kagami,Super Erotic Anime,Teleclub no Himitsu,Tenshi no Habataki Jun,The Satisfaction,Toushindai My Lover: Minami tai Mecha-Minami,Under World,Violence Gekiga David no Hoshi,Violence Gekiga Shin David no Hoshi: Inma Densetsu,Yasuji no Pornorama: Yacchimae!!
Kimi no Na wa.,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.00000,0.000000,0.500000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.707107
Fullmetal Alchemist: Brotherhood,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
Gintama°,0.000000,0.0,0.000000,0.333333,0.288675,0.333333,0.408248,0.235702,0.288675,0.516398,...,0.57735,0.000000,0.288675,0.258199,0.258199,0.258199,0.000000,0.258199,0.000000,0.000000
Steins;Gate,0.000000,0.0,0.333333,0.000000,0.866025,0.333333,0.000000,0.235702,0.000000,0.000000,...,0.00000,0.258199,0.288675,0.258199,0.258199,0.258199,0.000000,0.258199,0.000000,0.000000
Gintama&#039;,0.000000,0.0,0.288675,0.866025,0.000000,0.288675,0.000000,0.204124,0.000000,0.000000,...,0.00000,0.223607,0.250000,0.447214,0.447214,0.447214,0.353553,0.447214,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Toushindai My Lover: Minami tai Mecha-Minami,0.000000,0.0,0.258199,0.258199,0.447214,0.516398,0.000000,0.182574,0.000000,0.200000,...,0.00000,0.200000,0.000000,1.000000,1.000000,0.000000,0.632456,1.000000,0.000000,0.000000
Under World,0.000000,0.0,0.000000,0.000000,0.353553,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.00000,0.000000,0.000000,0.632456,0.632456,0.632456,0.000000,0.632456,0.000000,0.000000
Violence Gekiga David no Hoshi,0.000000,0.0,0.258199,0.258199,0.447214,0.516398,0.000000,0.182574,0.000000,0.200000,...,0.00000,0.200000,0.000000,1.000000,1.000000,1.000000,0.632456,0.000000,0.000000,0.000000
Violence Gekiga Shin David no Hoshi: Inma Densetsu,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.707107


### 3. Recommendation system

In [39]:
user_sim_df.idxmax(axis=1)[0:10]

Kimi no Na wa.                                                             Aria The Origination
Fullmetal Alchemist: Brotherhood                                                     Death Note
Gintama°                                                                 Getsumen To Heiki Mina
Steins;Gate                                                                Boukyaku no Senritsu
Gintama&#039;                                                                     Heisa Byoutou
Haikyuu!!: Karasuno Koukou VS Shiratorizawa Gakuen Koukou                             Mitsuwano
Hunter x Hunter (2011)                                                   Shin Koihime†Musou OVA
Ginga Eiyuu Densetsu                                              Sen to Chihiro no Kamikakushi
Gintama Movie: Kanketsu-hen - Yorozuya yo Eien Nare          Code Geass: Hangyaku no Lelouch R2
Gintama&#039;: Enchousen                                           Ookami Kodomo no Ame to Yuki
dtype: object

In [45]:
def recommend_animes(target_anime, user_sim_df, top_n= 5):
    sim_scores = user_sim_df[target_anime]
    
    # Sort the scores in descending order and get the top_n most similar animes
    sim_scores = sim_scores.sort_values(ascending=False)
    
    # Exclude the target anime from the recommendations
    sim_scores = sim_scores.drop(target_anime)
    
    # Get the top_n most similar animes
    top_animes = sim_scores.head(top_n)
    
    return top_animes

#### Recommended movie for them who already seen Kimi no Na wa.

In [47]:
target_anime = 'Kimi no Na wa.'
top_animes = recommend_animes(target_anime, user_sim_df)
print(f"Recommendations for {target_anime}:\n{top_animes}")

Recommendations for Kimi no Na wa.:
Hakuchuu Meikyuu     1.0
Mo Jing Lieren       1.0
Hachikadzuki-hime    1.0
Backkom 2            1.0
D.I.C.E.             1.0
Name: Kimi no Na wa., dtype: float64


#### Recommended movie for them who already seen Fullmetal Alchemist: Brotherhood 

In [49]:
target_anime = 'Fullmetal Alchemist: Brotherhood'
top_animes = recommend_animes(target_anime, user_sim_df)
print(f"Recommendations for {target_anime}:\n{top_animes}")

Recommendations for Fullmetal Alchemist: Brotherhood:
Oh! Super Milk-chan Special    1.0
Mina no Mura to Kawa           1.0
Plastic Memories               1.0
Psycho-Pass Movie              1.0
Seto no Hanayome               1.0
Name: Fullmetal Alchemist: Brotherhood, dtype: float64


### 4. Evaluation

#### Split the dataset

In [51]:
train_data, test_data = train_test_split(user_anime, test_size=0.4, random_state=42)

In [53]:
print(train_data.shape, test_data.shape)

(7209, 43) (4806, 43)


#### Recommendation for train data

In [55]:
user_sim_train = 1 - pairwise_distances( train_data.values,metric='cosine')
user_sim_train

array([[1.        , 0.5       , 0.        , ..., 0.5       , 0.5       ,
        0.31622777],
       [0.5       , 1.        , 0.        , ..., 0.5       , 0.5       ,
        0.        ],
       [0.        , 0.        , 1.        , ..., 0.40824829, 0.        ,
        0.25819889],
       ...,
       [0.5       , 0.5       , 0.40824829, ..., 1.        , 0.        ,
        0.        ],
       [0.5       , 0.5       , 0.        , ..., 0.        , 1.        ,
        0.31622777],
       [0.31622777, 0.        , 0.25819889, ..., 0.        , 0.31622777,
        1.        ]])

In [57]:
user_train_df = pd.DataFrame(user_sim_train)
user_train_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,7199,7200,7201,7202,7203,7204,7205,7206,7207,7208
0,1.000000,0.500000,0.000000,0.0,0.000000,0.632456,0.0,0.0,0.408248,0.500000,...,0.000000,0.500000,0.288675,0.316228,0.267261,0.500000,0.000000,0.500000,0.500000,0.316228
1,0.500000,1.000000,0.000000,0.0,0.000000,0.632456,0.0,0.0,0.000000,0.500000,...,0.353553,0.000000,0.000000,0.316228,0.000000,0.000000,0.408248,0.500000,0.500000,0.000000
2,0.000000,0.000000,1.000000,0.0,0.333333,0.000000,0.0,0.0,0.333333,0.000000,...,0.288675,0.000000,0.235702,0.000000,0.218218,0.408248,0.000000,0.408248,0.000000,0.258199
3,0.000000,0.000000,0.000000,1.0,0.000000,0.000000,0.0,0.0,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.316228,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
4,0.000000,0.000000,0.333333,0.0,1.000000,0.000000,0.0,0.0,0.000000,0.000000,...,0.000000,0.408248,0.000000,0.000000,0.654654,0.000000,0.000000,0.000000,0.000000,0.258199
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7204,0.500000,0.000000,0.408248,0.0,0.000000,0.316228,0.0,0.0,0.408248,0.500000,...,0.353553,0.500000,0.288675,0.316228,0.267261,1.000000,0.000000,0.000000,0.500000,0.316228
7205,0.000000,0.408248,0.000000,0.0,0.000000,0.774597,0.0,0.0,0.000000,0.408248,...,0.288675,0.000000,0.000000,0.516398,0.218218,0.000000,1.000000,0.000000,0.408248,0.258199
7206,0.500000,0.500000,0.408248,0.0,0.000000,0.316228,0.0,0.0,0.408248,0.000000,...,0.000000,0.000000,0.288675,0.000000,0.000000,0.000000,0.000000,1.000000,0.000000,0.000000
7207,0.500000,0.500000,0.000000,0.0,0.000000,0.632456,0.0,0.0,0.408248,1.000000,...,0.353553,0.500000,0.288675,0.632456,0.267261,0.500000,0.408248,0.000000,1.000000,0.316228


In [59]:
user_train_df.index = train_data.index.unique()
user_train_df.columns = train_data.index.unique()

In [61]:
user_train_df

name,Biriken,Kero Kero Keroppi no Aladdin to Mahou no Lamp,Violence Jack: Harlem Bomber-hen,Hyakujitsu no Bara: Jinginaki Nikukyuu-hen,84 Taekwon V,Doraemon: Treasure of the Shinugumi Mountain,UN-GO,Namida no Tsubomi,Akachan to Boku,Gokuu no Daibouken Pilot,...,Inma Youjo,Animegatari x Ghost in the Shell Collab Eizou,Mahou no Stage Fancy Lala,Kuroshitsuji II Specials,Vandread: Gekitou-hen,Zombie Clay Animation: Life of the Dead,Kiba,Kitsune no Kan Chigai,Barbapapa,Naruto: Shippuuden Movie 4 - The Lost Tower
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Biriken,1.000000,0.500000,0.000000,0.0,0.000000,0.632456,0.0,0.0,0.408248,0.500000,...,0.000000,0.500000,0.288675,0.316228,0.267261,0.500000,0.000000,0.500000,0.500000,0.316228
Kero Kero Keroppi no Aladdin to Mahou no Lamp,0.500000,1.000000,0.000000,0.0,0.000000,0.632456,0.0,0.0,0.000000,0.500000,...,0.353553,0.000000,0.000000,0.316228,0.000000,0.000000,0.408248,0.500000,0.500000,0.000000
Violence Jack: Harlem Bomber-hen,0.000000,0.000000,1.000000,0.0,0.333333,0.000000,0.0,0.0,0.333333,0.000000,...,0.288675,0.000000,0.235702,0.000000,0.218218,0.408248,0.000000,0.408248,0.000000,0.258199
Hyakujitsu no Bara: Jinginaki Nikukyuu-hen,0.000000,0.000000,0.000000,1.0,0.000000,0.000000,0.0,0.0,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.316228,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
84 Taekwon V,0.000000,0.000000,0.333333,0.0,1.000000,0.000000,0.0,0.0,0.000000,0.000000,...,0.000000,0.408248,0.000000,0.000000,0.654654,0.000000,0.000000,0.000000,0.000000,0.258199
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Zombie Clay Animation: Life of the Dead,0.500000,0.000000,0.408248,0.0,0.000000,0.316228,0.0,0.0,0.408248,0.500000,...,0.353553,0.500000,0.288675,0.316228,0.267261,1.000000,0.000000,0.000000,0.500000,0.316228
Kiba,0.000000,0.408248,0.000000,0.0,0.000000,0.774597,0.0,0.0,0.000000,0.408248,...,0.288675,0.000000,0.000000,0.516398,0.218218,0.000000,1.000000,0.000000,0.408248,0.258199
Kitsune no Kan Chigai,0.500000,0.500000,0.408248,0.0,0.000000,0.316228,0.0,0.0,0.408248,0.000000,...,0.000000,0.000000,0.288675,0.000000,0.000000,0.000000,0.000000,1.000000,0.000000,0.000000
Barbapapa,0.500000,0.500000,0.000000,0.0,0.000000,0.632456,0.0,0.0,0.408248,1.000000,...,0.353553,0.500000,0.288675,0.632456,0.267261,0.500000,0.408248,0.000000,1.000000,0.316228


In [63]:
np.fill_diagonal(user_train_df.values, 0)
user_train_df

name,Biriken,Kero Kero Keroppi no Aladdin to Mahou no Lamp,Violence Jack: Harlem Bomber-hen,Hyakujitsu no Bara: Jinginaki Nikukyuu-hen,84 Taekwon V,Doraemon: Treasure of the Shinugumi Mountain,UN-GO,Namida no Tsubomi,Akachan to Boku,Gokuu no Daibouken Pilot,...,Inma Youjo,Animegatari x Ghost in the Shell Collab Eizou,Mahou no Stage Fancy Lala,Kuroshitsuji II Specials,Vandread: Gekitou-hen,Zombie Clay Animation: Life of the Dead,Kiba,Kitsune no Kan Chigai,Barbapapa,Naruto: Shippuuden Movie 4 - The Lost Tower
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Biriken,0.000000,0.500000,0.000000,0.0,0.000000,0.632456,0.0,0.0,0.408248,0.500000,...,0.000000,0.500000,0.288675,0.316228,0.267261,0.500000,0.000000,0.500000,0.500000,0.316228
Kero Kero Keroppi no Aladdin to Mahou no Lamp,0.500000,0.000000,0.000000,0.0,0.000000,0.632456,0.0,0.0,0.000000,0.500000,...,0.353553,0.000000,0.000000,0.316228,0.000000,0.000000,0.408248,0.500000,0.500000,0.000000
Violence Jack: Harlem Bomber-hen,0.000000,0.000000,0.000000,0.0,0.333333,0.000000,0.0,0.0,0.333333,0.000000,...,0.288675,0.000000,0.235702,0.000000,0.218218,0.408248,0.000000,0.408248,0.000000,0.258199
Hyakujitsu no Bara: Jinginaki Nikukyuu-hen,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.0,0.0,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.316228,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
84 Taekwon V,0.000000,0.000000,0.333333,0.0,0.000000,0.000000,0.0,0.0,0.000000,0.000000,...,0.000000,0.408248,0.000000,0.000000,0.654654,0.000000,0.000000,0.000000,0.000000,0.258199
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Zombie Clay Animation: Life of the Dead,0.500000,0.000000,0.408248,0.0,0.000000,0.316228,0.0,0.0,0.408248,0.500000,...,0.353553,0.500000,0.288675,0.316228,0.267261,0.000000,0.000000,0.000000,0.500000,0.316228
Kiba,0.000000,0.408248,0.000000,0.0,0.000000,0.774597,0.0,0.0,0.000000,0.408248,...,0.288675,0.000000,0.000000,0.516398,0.218218,0.000000,0.000000,0.000000,0.408248,0.258199
Kitsune no Kan Chigai,0.500000,0.500000,0.408248,0.0,0.000000,0.316228,0.0,0.0,0.408248,0.000000,...,0.000000,0.000000,0.288675,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
Barbapapa,0.500000,0.500000,0.000000,0.0,0.000000,0.632456,0.0,0.0,0.408248,1.000000,...,0.353553,0.500000,0.288675,0.632456,0.267261,0.500000,0.408248,0.000000,0.000000,0.316228


#### Recommendation for train data

In [65]:
user_train_df.idxmax(axis=1)[0:10]

name
Biriken                                                                             Backkom
Kero Kero Keroppi no Aladdin to Mahou no Lamp      Hello Kitty no Kieta Santa-san no Boushi
Violence Jack: Harlem Bomber-hen                                    Koutetsujou no Kabaneri
Hyakujitsu no Bara: Jinginaki Nikukyuu-hen                                UN-GO: Inga Nikki
84 Taekwon V                                                              Magnerobo Ga-Keen
Doraemon: Treasure of the Shinugumi Mountain     Doraemon Movie 08: Nobita to Ryuu no Kishi
UN-GO                                                                       UN-GO: Inga-ron
Namida no Tsubomi                                                       Itsumo Kokoro ni Ho
Akachan to Boku                                                             Ashinaga Ojisan
Gokuu no Daibouken Pilot                                                      Xiao Men Shen
dtype: object

#### Recommendation for test data

In [67]:
user_sim_test = 1 - pairwise_distances( test_data.values,metric='cosine')
user_sim_test

array([[1.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.3086067 ],
       [0.        , 1.        , 0.81649658, ..., 0.40824829, 0.        ,
        0.        ],
       [0.        , 0.81649658, 1.        , ..., 0.33333333, 0.        ,
        0.        ],
       ...,
       [0.        , 0.40824829, 0.33333333, ..., 1.        , 0.40824829,
        0.23570226],
       [0.        , 0.        , 0.        , ..., 0.40824829, 1.        ,
        0.28867513],
       [0.3086067 , 0.        , 0.        , ..., 0.23570226, 0.28867513,
        1.        ]])

In [69]:
user_test_df = pd.DataFrame(user_sim_test)
user_test_df.index = test_data.index.unique()
user_test_df.columns = test_data.index.unique()
user_test_df

name,Macross F Movie 2: Sayonara no Tsubasa,Judo Sanka,Kuroko no Basket Movie 2: Winter Cup Soushuuhen - Namida no Saki e,Nandaka Velonica,Angel Heart,Yume-iro Pâtissière,Flutter of Birds II: Tenshi-tachi no Tsubasa,Tetsujin 28-gou (1980),Yuuki Yuuna wa Yuusha de Aru,Ogenki Clinic,...,Vampire Holmes,Koguma no Misha,Mobile Suit Gundam 0083: Stardust Memory - The Mayfly of Space,One Piece: Heart of Gold,Mazinger Z tai Devilman,Tasukeai no Rekishi: Inochi Hoken no Hajimari,Rinkan Club,Attack No.1 (1970),Enbo,Gun Frontier
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Macross F Movie 2: Sayonara no Tsubasa,1.000000,0.000000,0.000000,0.000000,0.338062,0.000000,0.218218,0.534522,0.000000,0.000000,...,0.000000,0.000000,0.771517,0.154303,0.338062,0.000000,0.000000,0.000000,0.000000,0.308607
Judo Sanka,0.000000,1.000000,0.816497,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.288675,0.316228,0.000000,0.000000,0.408248,0.000000,0.000000
Kuroko no Basket Movie 2: Winter Cup Soushuuhen - Namida no Saki e,0.000000,0.816497,1.000000,0.000000,0.000000,0.333333,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.235702,0.258199,0.000000,0.000000,0.333333,0.000000,0.000000
Nandaka Velonica,0.000000,0.000000,0.000000,1.000000,0.000000,0.333333,0.000000,0.000000,0.288675,0.333333,...,0.288675,0.577350,0.000000,0.471405,0.000000,0.333333,0.000000,0.000000,0.000000,0.000000
Angel Heart,0.338062,0.000000,0.000000,0.000000,1.000000,0.000000,0.516398,0.316228,0.223607,0.000000,...,0.223607,0.223607,0.365148,0.365148,0.000000,0.258199,0.000000,0.258199,0.316228,0.547723
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Tasukeai no Rekishi: Inochi Hoken no Hajimari,0.000000,0.000000,0.000000,0.333333,0.258199,0.333333,0.333333,0.000000,0.288675,0.000000,...,0.000000,0.577350,0.235702,0.235702,0.000000,1.000000,0.000000,0.333333,0.408248,0.471405
Rinkan Club,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.577350,0.000000,0.000000,0.577350,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,1.000000,0.000000,0.707107,0.000000
Attack No.1 (1970),0.000000,0.408248,0.333333,0.000000,0.258199,0.333333,0.333333,0.000000,0.288675,0.000000,...,0.000000,0.288675,0.235702,0.235702,0.000000,0.333333,0.000000,1.000000,0.408248,0.235702
Enbo,0.000000,0.000000,0.000000,0.000000,0.316228,0.000000,0.816497,0.000000,0.353553,0.408248,...,0.000000,0.353553,0.288675,0.288675,0.000000,0.408248,0.707107,0.408248,1.000000,0.288675


In [71]:
np.fill_diagonal(user_test_df.values, 0)
user_test_df

name,Macross F Movie 2: Sayonara no Tsubasa,Judo Sanka,Kuroko no Basket Movie 2: Winter Cup Soushuuhen - Namida no Saki e,Nandaka Velonica,Angel Heart,Yume-iro Pâtissière,Flutter of Birds II: Tenshi-tachi no Tsubasa,Tetsujin 28-gou (1980),Yuuki Yuuna wa Yuusha de Aru,Ogenki Clinic,...,Vampire Holmes,Koguma no Misha,Mobile Suit Gundam 0083: Stardust Memory - The Mayfly of Space,One Piece: Heart of Gold,Mazinger Z tai Devilman,Tasukeai no Rekishi: Inochi Hoken no Hajimari,Rinkan Club,Attack No.1 (1970),Enbo,Gun Frontier
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Macross F Movie 2: Sayonara no Tsubasa,0.000000,0.000000,0.000000,0.000000,0.338062,0.000000,0.218218,0.534522,0.000000,0.000000,...,0.000000,0.000000,0.771517,0.154303,0.338062,0.000000,0.000000,0.000000,0.000000,0.308607
Judo Sanka,0.000000,0.000000,0.816497,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.288675,0.316228,0.000000,0.000000,0.408248,0.000000,0.000000
Kuroko no Basket Movie 2: Winter Cup Soushuuhen - Namida no Saki e,0.000000,0.816497,0.000000,0.000000,0.000000,0.333333,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.235702,0.258199,0.000000,0.000000,0.333333,0.000000,0.000000
Nandaka Velonica,0.000000,0.000000,0.000000,0.000000,0.000000,0.333333,0.000000,0.000000,0.288675,0.333333,...,0.288675,0.577350,0.000000,0.471405,0.000000,0.333333,0.000000,0.000000,0.000000,0.000000
Angel Heart,0.338062,0.000000,0.000000,0.000000,0.000000,0.000000,0.516398,0.316228,0.223607,0.000000,...,0.223607,0.223607,0.365148,0.365148,0.000000,0.258199,0.000000,0.258199,0.316228,0.547723
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Tasukeai no Rekishi: Inochi Hoken no Hajimari,0.000000,0.000000,0.000000,0.333333,0.258199,0.333333,0.333333,0.000000,0.288675,0.000000,...,0.000000,0.577350,0.235702,0.235702,0.000000,0.000000,0.000000,0.333333,0.408248,0.471405
Rinkan Club,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.577350,0.000000,0.000000,0.577350,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.707107,0.000000
Attack No.1 (1970),0.000000,0.408248,0.333333,0.000000,0.258199,0.333333,0.333333,0.000000,0.288675,0.000000,...,0.000000,0.288675,0.235702,0.235702,0.000000,0.333333,0.000000,0.000000,0.408248,0.235702
Enbo,0.000000,0.000000,0.000000,0.000000,0.316228,0.000000,0.816497,0.000000,0.353553,0.408248,...,0.000000,0.353553,0.288675,0.288675,0.000000,0.408248,0.707107,0.408248,0.000000,0.288675


In [73]:
recommendations= user_test_df.idxmax(axis=1)[0:10]
recommendations

name
Macross F Movie 2: Sayonara no Tsubasa                                                                       Macross Δ
Judo Sanka                                                                                                 Kick no Oni
Kuroko no Basket Movie 2: Winter Cup Soushuuhen - Namida no Saki e                                           Days (TV)
Nandaka Velonica                                                                 Hello Kitty: Ringo no Mori no Fantasy
Angel Heart                                                           Itsuka no Main: Kaminari Shounen - Tenta Sanjou!
Yume-iro Pâtissière                                                      Nekota no Koto ga Kininatte Shikatanai. (ONA)
Flutter of Birds II: Tenshi-tachi no Tsubasa                                                         Trouble Evocation
Tetsujin 28-gou (1980)                                                                                      Macross XX
Yuuki Yuuna wa Yuusha de Aru               

### 5. Interview questions

##### 1. Can you explain the difference between user-based and item-based collaborative filtering?

User-based collaborative filtering recommends items based on the preferences of users similar to the target user, focusing on user similarity. Item-based collaborative filtering, on the other hand, recommends items based on the similarity between items themselves, leveraging item interactions. Item-based filtering generally scales better and handles new users more effectively, while user-based filtering can struggle with scalability and new user cold start issues.

##### 2. What is collaborative filtering, and how does it work?

Collaborative filtering works by utilizing user interaction data to identify patterns and similarities, either among users or items. It generates recommendations based on the premise that users with similar tastes or items with similar attributes are likely to appeal to the same preferences.