In [1]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib as mpl
import matplotlib.pylab as pylab
%matplotlib inline
pd.set_option('display.max_columns', 500)
mpl.style.use('ggplot')
sns.set_style('white')
pylab.rcParams['figure.figsize'] = 12,8

#ignore warnings
import warnings
warnings.filterwarnings('ignore')

In [2]:
anime_data=pd.read_csv('anime.csv')
rating_data=pd.read_csv('rating.csv')

# Merging the two databases
anime_fulldata=pd.merge(anime_data,rating_data,on='anime_id',suffixes= ['', '_user'])
anime_fulldata = anime_fulldata.rename(columns={'name': 'anime_title', 'rating_user': 'user_rating'})

# Replaces -1 ratings with NaN and then removes them
anime_feature=anime_fulldata.copy()
anime_feature["user_rating"].replace({-1: np.nan}, inplace=True)
anime_feature = anime_feature.dropna(axis = 0, how ='any')


# *Collaborative Based Filtering*

# Considering only users who have a min of 100 ratings
counts = anime_feature['user_id'].value_counts()
anime_feature = anime_feature[anime_feature['user_id'].isin(counts[counts >= 100].index)]

# Creating a pivot table that we will use in the sparse matrix
anime_pivot=anime_feature.pivot_table(index='anime_title',columns='user_id',values='user_rating').fillna(0)

# Creating the sparse matrix
from scipy.sparse import csr_matrix
anime_matrix = csr_matrix(anime_pivot.values)

# Using the KNN model fitted on the sparse matrix
from sklearn.neighbors import NearestNeighbors
model_knn = NearestNeighbors(metric = 'cosine', algorithm = 'brute')
model_knn.fit(anime_matrix)


## *Content Based Filtering*

# Anime titles have a lot of symbols, this will remove them
import re
def text_cleaning(text):
    text = re.sub(r'&quot;', '', text)
    text = re.sub(r'.hack//', '', text)
    text = re.sub(r'&#039;', '', text)
    text = re.sub(r'A&#039;s', '', text)
    text = re.sub(r'I&#039;', 'I\'', text)
    text = re.sub(r'&amp;', 'and', text)
    
    return text
anime_data['name'] = anime_data['name'].apply(text_cleaning)

# For Term Frequency (TF) and Inverse Document Frequency (IDF)
from sklearn.feature_extraction.text import TfidfVectorizer
tfv = TfidfVectorizer(min_df=3,  max_features=None, 
            strip_accents='unicode', analyzer='word',token_pattern=r'\w{1,}',
            ngram_range=(1, 3),
            stop_words = 'english')
genres_str = anime_data['genre'].str.split(',').astype(str)
tfv_matrix = tfv.fit_transform(genres_str)


from sklearn.metrics.pairwise import sigmoid_kernel

# Compute the sigmoid kernel. This function determines which anime to recommend with 1 and not recommend with 0.
sig = sigmoid_kernel(tfv_matrix, tfv_matrix)
indices = pd.Series(anime_data.index, index=anime_data['name']).drop_duplicates()


In [3]:
# Recommendation function, takes title of the anime as input as well as the sigmoid values.
# Returns a pandas dataframe with the recommended anime
def give_rec(title,n, sig=sig):
    # Get the index corresponding to title
    idx = indices[title]

    # Get the pairwsie similarity scores 
    sig_scores = list(enumerate(sig[idx]))

    # Sort the anime 
    sig_scores = sorted(sig_scores, key=lambda x: x[1], reverse=True)

    # Scores of the 10 most similar anime
    sig_scores = sig_scores[1:n+1]

    # Anime indices
    anime_indices = [i[0] for i in sig_scores]
    
    rec_df = pd.DataFrame({'Anime name': anime_data['name'].iloc[anime_indices].values})
    
    query_index = anime_pivot.index.get_loc(title)
    distances, indices2 = model_knn.kneighbors(anime_pivot.iloc[query_index,:].values.reshape(1, -1), n_neighbors = 6)
    for i in range(0, len(distances.flatten())):
        if (anime_pivot.index[indices2.flatten()[i]] in list(rec_df["Anime name"])) or (anime_pivot.index[indices2.flatten()[i]] == title):
            pass
        else:
            rec_df = rec_df.append({"Anime name": anime_pivot.index[indices2.flatten()[i]]},ignore_index=True)
            
    for i in range(0,n+1):
        if rec_df.shape[0] > n:
            rec_df = rec_df.drop(i)
            
    return rec_df.reset_index(drop=True)

In [20]:
give_rec("Naruto",15)

Unnamed: 0,Anime name
0,Naruto: Shippuuden Movie 3 - Hi no Ishi wo Tsu...
1,Naruto Shippuuden: Sunny Side Battle
2,Naruto Soyokazeden Movie: Naruto to Mashin to ...
3,Battle Spirits: Ryuuko no Ken
4,Kyutai Panic Adventure!
5,Ranma ½: Akumu! Shunmin Kou
6,Ben-To
7,Naruto: Shippuuden Movie 6 - Road to Ninja
8,Rekka no Honoo
9,Naruto: Honoo no Chuunin Shiken! Naruto vs. Ko...
