In [275]:
import numpy as np
import pandas as pd

from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics import precision_score, roc_auc_score
from sklearn.model_selection import train_test_split

from scipy.sparse import csr_matrix

## Load the data from the Excel sheets

In [276]:
ANIME_CSV_PATH = "./datasets/anime.csv"
RATING_CSV_PATH = "./datasets/rating.csv"

In [277]:
anime_df = pd.read_csv(ANIME_CSV_PATH)
rating_df = pd.read_csv(RATING_CSV_PATH)

## Preprocess the data

In [278]:
rating_df.rating.replace({-1: np.nan}, inplace = True)

In [279]:
merged_df = pd.merge(anime_df, rating_df, on='anime_id', suffixes=['_anime', '_user'])

In [280]:
merged_df = merged_df[['user_id', 'name', 'rating_user']]

In [281]:
merged_limit_df = merged_df[merged_df.user_id <= 10000]

create pivot table for merged data

In [282]:
user_anime_matrix = merged_limit_df.pivot_table(index=['user_id'], columns=['name'], values='rating_user')

In [283]:
norm_user_anime_matrix = user_anime_matrix.apply(lambda x: (x-np.mean(x))/(np.max(x)-np.min(x)), axis=1)

In [284]:
norm_user_anime_matrix.fillna(0, inplace=True)
norm_user_anime_matrix = norm_user_anime_matrix.T
norm_user_anime_matrix = norm_user_anime_matrix.loc[:, (norm_user_anime_matrix != 0).any(axis=0)]

create sparse matrix for more efficient computation

In [285]:
anime_user_csr = csr_matrix(norm_user_anime_matrix.values)

## Cosine Similarity

In [286]:
item_similarity = cosine_similarity(anime_user_csr)
user_similarity = cosine_similarity(anime_user_csr.T)

In [287]:
item_sim_df = pd.DataFrame(item_similarity, index=norm_user_anime_matrix.index, columns=norm_user_anime_matrix.index)
user_sim_df = pd.DataFrame(user_similarity, index=norm_user_anime_matrix.columns, columns=norm_user_anime_matrix.columns)

In [288]:
def get_recommendations_by_item(anime_name, n_recommendations=10):
    print(f'Recommendations for anime: {anime_name}\n')
    for anime in item_sim_df.sort_values(by=anime_name, ascending=False).index[1:n_recommendations]:
        print(f'{anime}')

In [289]:
def get_recommendations_by_user(user_id, n_recommendations=10):
    if user_id not in norm_user_anime_matrix.columns:
        return print(f'Not found user: {user_id}\n')
    
    sim_users = user_sim_df.sort_values(by=user_id, ascending=False).index[1:11]
    best = []
    most_common = {}
    
    for i in sim_users:
        max_score = norm_user_anime_matrix.loc[:, i].max()
        best.append(norm_user_anime_matrix[norm_user_anime_matrix.loc[:, i] == max_score].index.tolist())
    for i in range(len(best)):
        for j in best[i]:
            if j in most_common:
                most_common[j] += 1
            else:
                most_common[j] = 1
    sorted_list = sorted(most_common.items(), key=lambda elem: elem[1], reverse=True)

    print(f'Recommendations for user: {user_id}\n')
    for anime, _ in sorted_list[:n_recommendations]:
        print(f'{anime}')

In [290]:
get_recommendations_by_item('Naruto')

Recommendations for anime: Naruto

Naruto: Shippuuden Movie 1
Bleach
Naruto Movie 1: Dai Katsugeki!! Yuki Hime Shinobu Houjou Dattebayo!
Naruto: Shippuuden Movie 2 - Kizuna
Naruto: Shippuuden Movie 6 - Road to Ninja
Naruto: Shippuuden Movie 5 - Blood Prison
Gunslinger Stratos
Sword Gai
Dragon Ball Z


In [291]:
get_recommendations_by_user(5)

Recommendations for user: 5

Fullmetal Alchemist: Brotherhood
Steins;Gate
Gintama
Gintama&#039;
Clannad: After Story
Shigatsu wa Kimi no Uso
Gintama°
Gintama&#039;: Enchousen
Cowboy Bebop
Hunter x Hunter (2011)
