In [2]:
!pip install lenskit~=0.14

Collecting lenskit~=0.14
  Downloading lenskit-0.14.4-py3-none-any.whl (74 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m74.0/74.0 kB[0m [31m1.5 MB/s[0m eta [36m0:00:00[0m
Collecting binpickle>=0.3.2 (from lenskit~=0.14)
  Downloading binpickle-0.3.4-py3-none-any.whl (13 kB)
Collecting seedbank>=0.1.0 (from lenskit~=0.14)
  Downloading seedbank-0.1.3-py3-none-any.whl (8.5 kB)
Collecting csr>=0.3.1 (from lenskit~=0.14)
  Downloading csr-0.5.1-py3-none-any.whl (25 kB)
Collecting anyconfig==0.13.* (from seedbank>=0.1.0->lenskit~=0.14)
  Downloading anyconfig-0.13.0-py2.py3-none-any.whl (87 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m87.8/87.8 kB[0m [31m9.7 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: binpickle, anyconfig, seedbank, csr, lenskit
Successfully installed anyconfig-0.13.0 binpickle-0.3.4 csr-0.5.1 lenskit-0.14.4 seedbank-0.1.3


In [3]:
import pandas as pd
import numpy as np
from lenskit.metrics.topn import ndcg, hit
from lenskit import topn
from google.colab import drive

drive.mount("/content/gdrive")
output_dir = '/content/gdrive/My Drive/data/output/'

Mounted at /content/gdrive


In [4]:
algorithms = [('content-based','Content Based'),
              ('implicit-mf','Implicit MF'),
              ('item-item-sum','Item-Item Sum'),
              ('lift','Lift'),
              ('popular','Popular'),
              ('torchtag','Torchtag')
              ]

In [5]:
all_recs = pd.DataFrame()
for algorithm in algorithms:
    filename = algorithm[0]+'-rec-20.parquet'
    algo_recs = pd.read_parquet(output_dir + filename)
    algo_recs['Algorithm'] = algorithm[1]
    all_recs = pd.concat([all_recs,algo_recs])

In [6]:
eval_df = pd.read_csv(output_dir + 'eval.csv')

rla = topn.RecListAnalysis()
rla.add_metric(ndcg)
rla.add_metric(hit)

results = rla.compute(all_recs[['Algorithm','user','item','rank']], eval_df).reset_index()

In [7]:
#Get nDCG
summary = results.groupby('Algorithm').mean()[['ndcg','hit']]

In [8]:
summary

Unnamed: 0_level_0,ndcg,hit
Algorithm,Unnamed: 1_level_1,Unnamed: 2_level_1
Content Based,0.022162,0.22542
Implicit MF,0.244411,0.896846
Item-Item Sum,0.246882,0.881706
Lift,0.000185,0.003339
Popular,0.131177,0.730774
Torchtag,0.102971,0.538741


In [23]:
all_recs.head()

Unnamed: 0,item,score,user,rank,algorithm,Algorithm
0,121,3.238474,3,1,content-based,Content Based
1,9135,3.238474,3,2,content-based,Content Based
2,296,3.234171,3,3,content-based,Content Based
3,3588,3.231096,3,4,content-based,Content Based
4,4155,3.218197,3,5,content-based,Content Based


In [14]:
anime = pd.read_csv(output_dir + 'anime.csv')
anime['genre'] = anime['genre'].str.split(', ')
anime = anime[['anime_id','genre']]
anime = anime.explode('genre')
all_genres = pd.merge(all_recs,anime,how='inner',left_on='item',right_on='anime_id')
len(all_genres)

9511051

In [19]:
a = all_genres.groupby(['Algorithm','user','genre']).count()['rank'].reset_index()
b = a.groupby(['Algorithm','user']).max()['rank'].reset_index()
b['Diverse'] = 1
b.loc[b['rank'] > 15, 'Diverse'] = 0
b.groupby('Algorithm').mean()['Diverse']

Algorithm
Content Based    0.010649
Implicit MF      0.749712
Item-Item Sum    0.873359
Lift             0.987394
Popular          0.995107
Torchtag         0.788150
Name: Diverse, dtype: float64

In [22]:
all_recs2=pd.merge(all_recs,b,how='inner',left_on=['Algorithm','user'],right_on=['Algorithm','user'])
rla = topn.RecListAnalysis()
rla.add_metric(hit)

r = rla.compute(all_recs2[all_recs2['Diverse']==1][['Algorithm','user','item']], eval_df).reset_index()
r.groupby('Algorithm').mean()['hit']

Algorithm
Content Based    0.032432
Implicit MF      0.909014
Item-Item Sum    0.894740
Lift             0.003032
Popular          0.729855
Torchtag         0.643175
Name: hit, dtype: float64

In [23]:
rla = topn.RecListAnalysis()
rla.add_metric(hit)

r = rla.compute(all_recs2[all_recs2['Diverse']==0][['Algorithm','user','item']], eval_df).reset_index()
r.groupby('Algorithm').mean()['hit']

Algorithm
Content Based    0.227498
Implicit MF      0.860396
Item-Item Sum    0.791818
Lift             0.027397
Popular          0.917647
Torchtag         0.477154
Name: hit, dtype: float64