In [2]:
#import required libraries
from lightfm import LightFM
from lightfm.evaluation import precision_at_k
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pandas_profiling
from scipy.sparse import csr_matrix
from lightfm.evaluation import auc_score
from lightfm.data import Dataset


# Hybrid Anime Recommender System Model

In [8]:
anime = pd.read_csv('./train/anime.csv')
rating = pd.read_csv('./train/rating.csv')
n_user = 5000

In [9]:
def sigmoid(x):  
    return np.exp(-np.logaddexp(0, -x))

In [None]:
# Selecting only firest n_user as to avoid crash :'
rating_selected = rating.head(np.where(rating['user_id']==n_user)[0][-1]+1)
rating_selected['rating'].replace({-1:0},inplace=True)


In [13]:
user_anime_interaction = pd.pivot_table(rating_selected, index='user_id', columns='anime_id', values='rating')
user_anime_interaction.fillna(0,inplace=True)
user_anime_csr = csr_matrix(user_anime_interaction.values)


In [14]:
n_users, n_items = user_anime_interaction.shape

In [34]:
#Creating Dataset object so to ease mappings 
# Making user features matrix
nanime = anime.set_index('anime_id')
nuser = rating_selected.set_index('user_id')
total_genres =  sorted(set(map(lambda x:x.strip(),','.join(map(lambda x: str(x), anime['genre'].values)).split(','))))[:-1]

  
def create_UserFeature(rating_selected,start_id=1,end_id=n_user,new_user=False,new_user_id=n_user+1,new_userDat={}):
  user_data = []
  for user_id in range(start_id,end_id+1):
    filter_user = (nuser.loc[user_id].iloc[np.where(nuser.loc[user_id]['rating'] !=0)])
    total_anime_watch = len(filter_user)
    
    if total_anime_watch ==0:
      genres_user = list()
    else:
      genres_user = nanime.loc[filter_user['anime_id']]['genre']
    
      if isinstance(genres_user,str):
        genres_user= [genres_user]
      else:
        genres_user = genres_user.values
    
    # print('\n-->',user_id,total_anime_watch,genres_user)
    
    user_gen_dat = {}
    all_genres = (','.join(map(lambda x:str(x),genres_user))).split(',')
    total_anime_watch = total_anime_watch if total_anime_watch else 1

    for unique_gen in total_genres:
      
      user_gen_dat[unique_gen] = all_genres.count(unique_gen)/total_anime_watch
    # user_data.append([user_id,sorted(set(all_genres).intersection(total_genres))])
    
    user_data.append([user_id,user_gen_dat])
  # print(user_data[0],len(user_data[0][1].keys()))
  return user_data  


In [39]:
#Init the dataset 
dataset = Dataset(user_identity_features=False)
dataset.fit(np.arange(1,n_user+1),rating_selected.sort_values(by='anime_id')['anime_id'].unique(),user_features=total_genres,item_features=["Name"])
user_features = dataset.build_user_features(create_UserFeature(rating_selected),normalize=True)


In [None]:
# Training the model 
model = LightFM(loss='warp')

model = model.fit(user_anime_csr,
                  user_features=user_features,#comment this if  you want Pure CF
                  epochs=100,
                  num_threads=16, verbose=True)

In [41]:
# Adding new user and giving top k anime recommendation
def recommendME(model,anime,dataset,user_id=None,new_user_feature=None,k=5):
  nanime=anime.set_index('anime_id')
  if user_id is None:
    user_id = n_users +1
  if user_id > n_users:
    for genre in total_genres:
      if genre not in list(new_user_feature.keys()):
        new_user_feature[genre] = 0
    dataset.fit_partial(users=[user_id],user_features=total_genres)
    # My Feature matrix
    # new_user_feature = [user_id,{' Adventure': 0.11764705882352941, ' Cars': 0.0, ' Comedy': 0.23529411764705882, ' Dementia': 0.0, ' Demons': 0.058823529411764705, ' Drama': 0.17647058823529413, ' Ecchi': 0.058823529411764705, ' Fantasy': 0.35294117647058826, ' Game': 0.058823529411764705, ' Harem': 0.0, ' Hentai': 0.0, ' Historical': 0.0, ' Horror': 0.058823529411764705, ' Josei': 0.0, ' Kids': 0.0, ' Magic': 0.11764705882352941, ' Martial Arts': 0.0, ' Mecha': 0.058823529411764705, ' Military': 0.11764705882352941, ' Music': 0.0, ' Mystery': 0.058823529411764705, ' Parody': 0.058823529411764705, ' Police': 0.17647058823529413, ' Psychological': 0.17647058823529413, ' Romance': 0.23529411764705882, ' Samurai': 0.0, ' School': 0.29411764705882354, ' Sci-Fi': 0.17647058823529413, ' Seinen': 0.11764705882352941, ' Shoujo': 0.058823529411764705, ' Shoujo Ai': 0.0, ' Shounen': 0.29411764705882354, ' Shounen Ai': 0.0, ' Slice of Life': 0.17647058823529413, ' Space': 0.0, ' Sports': 0.058823529411764705, ' Super Power': 0.17647058823529413, ' Supernatural': 0.47058823529411764, ' Thriller': 0.17647058823529413, ' Vampire': 0.0, ' Yaoi': 0.0, ' Yuri': 0.0, 'Action': 0.47058823529411764, 'Adventure': 0.058823529411764705, 'Cars': 0.0, 'Comedy': 0.11764705882352941, 'Dementia': 0.0, 'Demons': 0.0, 'Drama': 0.23529411764705882, 'Ecchi': 0.0, 'Fantasy': 0.0, 'Game': 0.0, 'Harem': 0.0, 'Hentai': 0.0, 'Historical': 0.0, 'Horror': 0.0, 'Josei': 0.0, 'Kids': 0.0, 'Magic': 0.0, 'Martial Arts': 0.0, 'Mecha': 0.0, 'Military': 0.0, 'Music': 0.0, 'Mystery': 0.058823529411764705, 'Parody': 0.0, 'Police': 0.0, 'Psychological': 0.0, 'Romance': 0.0, 'Samurai': 0.0, 'School': 0.0, 'Sci-Fi': 0.058823529411764705, 'Seinen': 0.0, 'Shoujo': 0.0, 'Shounen': 0.0, 'Slice of Life': 0.0, 'Space': 0.0, 'Sports': 0.0, 'Super Power': 0.0, 'Supernatural': 0.0, 'Thriller': 0.0, 'Vampire': 0.0, 'Yaoi': 0.0} ] 
    new_user_feature = [user_id,new_user_feature]
    new_user_feature = dataset.build_user_features([new_user_feature],normalize=False)
  user_id_map = dataset.mapping()[0][user_id] # just user_id -1 
  scores = model.predict(user_id_map, np.arange(n_items),user_features=new_user_feature)
  rank = np.argsort(-scores)
  selected_anime_id =np.array(list(dataset.mapping()[2].keys()))[rank]
  top_items = nanime.loc[selected_anime_id]

  return top_items['name'][:k].values      



In [None]:
recommendME(model,anime,dataset,new_user_feature={'Drama':0.5,'Fantasy':0.75,'Harem':0.9,'Hentai':0,'Comedy':0.96})

array(['Freely Tomorrow', 'Tell Your World', 'Downloader', 'EDEN',
       'Hime Chen! Otogi Chikku Idol Lilpri'], dtype=object)

In [None]:
# Model AUC: 98... and Precision at K 0.60
# for me : split data  into train and test and evaluate model on that 
train_k = precision_at_k(model,user_anime_csr,user_features=user_features)

train_auc = auc_score(model, user_anime_csr, num_threads=16,user_features=user_features).mean()

print('Collaborative filtering train AUC: {} \n Precision at k users {} '.format(train_auc , train_k.mean()))