Recommender picks animes selected user rated the highest, for each anime recommends 15 other ones based on genre and type, selects top ones based on rating and number of appearances in the recommendation and selects top ones to the selected user that they haven't seen/rated.

In [None]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import linear_kernel

In [None]:
anime = pd.read_csv('../input/anime-recommendations-database/anime.csv')
ratings = pd.read_csv('../input/anime-recommendations-database/rating.csv')
anime.head()

In [None]:
anime['episodes'].replace('Unknown',0, inplace=True)
anime_titles = anime['name']
anime_titles

In [None]:
ratings.head()
ratings['rating'].unique()
anime['rating'].replace(-1,1, inplace=True)
ratings['rating'].replace(-1,1, inplace=True)
# ratings.info()
ratings.shape[0]

In [None]:
# creating dummies for genres
genre_pd = anime['genre'].str.split(', ').str.join('|').str.get_dummies()
genre_pd = genre_pd.astype('int32')

type_pd = anime['type'].str.get_dummies()
type_pd.head()

type_pd=type_pd.astype('int32')

In [None]:
anime_df = pd.concat([anime[['anime_id','rating','members']],genre_pd,type_pd],axis=1)
anime_df.head()

anime_df[['anime_id','members']] = anime_df[['anime_id','members']].astype('int32')
anime_df['rating'] = anime_df['rating'].astype('float32')


In [None]:
def mapping():
    return pd.Series(anime_df.index, index=anime['anime_id'])

In [None]:
# normalise ratings and number of members (popularity)
# anime_df['rating']=(anime_df['rating']-anime_df['rating'].mean())/anime_df['rating'].std()
anime_df['members']=(anime_df['members']-anime_df['members'].mean())/anime_df['members'].std()

cat_df=anime_df.drop('anime_id', axis=1)

cat_df.fillna(0, inplace=True)

In [None]:
def similarity_matrix(cat_df):
    from sklearn.metrics.pairwise import linear_kernel
    return linear_kernel(cat_df, cat_df)

In [None]:
def recommend_on_genre(anime_movies, num_recommended: int = 15):
    anime_index = mapping()[anime_movies]
    similarity = list(enumerate(similarity_matrix(cat_df)[anime_index]))
    similarity = sorted(similarity, key=lambda x: x[1], reverse=True)
    similarity = similarity[1:num_recommended]
    anime_indices = [i[0] for i in similarity]
    return anime['anime_id'].iloc[anime_indices].reset_index()['anime_id'].tolist()

In [None]:
def get_rating(movie_id, df=anime_df):
    return df['rating'][df['anime_id']==movie_id].iloc[0]

In [None]:
def user_top_anime(user_id, df=ratings, top_n: int =10, rating_min=5):
    """
    args: user id, optional: ratings dataframe if other than default, number of movies to return, minimum rating
    returns: dataframe with n top movie id's for a specified user
    """
    recommended_df = df[['anime_id','rating']].loc[df['user_id']==user_id]
    return recommended_df[recommended_df['rating']>=rating_min].nlargest(top_n, 'rating')

In [None]:
def seen(movie, user, df=ratings):
    """function checks if user has seen and reviewed recommended movie
    args: movie id, user id, optional: ratings dataframe if other than default
    returns True/False for each movie
    """
    isin = movie in df['anime_id'].loc[df['user_id']==user]
    return isin

In [None]:
def combine(user, df1=anime_df, df2=ratings, n=5):
    """
    main function
    run user_top_anime
    apply recommend_on_genre to each top title
    group count recommended titles
    apply get_rating 
    apply seen
    sort by rating & count recommended where seen = False
    return top n
    """
    from itertools import chain
    from collections import Counter
    # generate top 10 animes for a user
    top_anime = user_top_anime(user, df2)
    recommended_list = []
    #for each anime from user's top list recommend similar animes
    for anime, _ in top_anime.iterrows():
        recommended_list.append(recommend_on_genre(anime))
    final_list = chain(*recommended_list)
    # add number of times they appear in recommendations
    rec_count = Counter(final_list)
    df = pd.DataFrame.from_dict(rec_count, orient='index').reset_index()
    print(df.columns)
    df['rating']=df['index'].apply(get_rating)
    df['seen']=df['index'].apply(lambda x: seen(x, user=user))
    df.rename(columns={'index':'anime_id',0:'num_recommendations'}, inplace=True)
    return df[df['seen']==False].sort_values(by=['num_recommendations','rating'],ascending=False).head(n)

In [None]:
combine(10)