In [3]:
import pandas as pd
import numpy as np
from implicit.evaluation import train_test_split, ranking_metrics_at_k
from implicit.datasets.movielens import get_movielens
import implicit

In [42]:
class CrossValidation:
    
    def __init__(self, user_item, k):
        self.user_item = user_item
        self.k = k

    def mpr_per_user(self, model, train, test, num_recs, user):
        recommended_items = model.recommend(user_items=train[user], userid=user, filter_already_liked_items=True, N = num_recs)[0]
        test_items = test[user].nonzero()[1]
        test_items_in_list = test_items[np.isin(test_items, recommended_items)]
        if len(test_items_in_list) == 0:
            return 0.5
        recommended_indices = recommended_items.argsort()
        hit_indices = recommended_indices[np.searchsorted(recommended_items[recommended_indices], test_items_in_list)]
        #return (np.sum(hit_indices) / num_recs) / len(hit_indices)
        return np.mean(hit_indices / num_recs)
   
    def calc_mpr(self, model, train, test):
        mprs = []
        for u in range(self.user_item.shape[0]) :
            mpr = self.mpr_per_user(model, train, test, self.user_item.shape[1], u)
            mprs.append(mpr)
        return {'mpr' : np.mean(mprs)} 
   
    def evaluate_model(self, model, train, test, k):
        metrics = ranking_metrics_at_k(model, train, test, K=k)
        mpr = self.calc_mpr(model, train, test)
        metrics.update(mpr)
        return pd.DataFrame(metrics, index=['metrics@'+str(k)])  
   
    def split_k_fold(self) :
        split_matrix = self.user_item
        return_dict = {}
        return_dict_train = {}
        for i in range(self.k-1):
            train_temp, test_temp = train_test_split(split_matrix, train_percentage=((self.k-(i+1))/(self.k-i)))
            return_dict[str(i)] = test_temp
            if i == 0:
                return_dict_train[str(i)] = train_temp
                rest = test_temp
            else:
                return_dict_train[str(i)] = (train_temp + rest)
                rest = (rest + test_temp)
            if i == (self.k-2):
                return_dict[str(i+1)] = train_temp
                return_dict_train[str(i+1)] = rest
            split_matrix = train_temp
        return (return_dict, return_dict_train)


    def k_fold_eval(self, test, train, model, alpha) :
        for i in range(len(test)) :
            model = model
            test_temp = test[str(i)]
            train_temp = train[str(i)]
            print(test_temp.nnz)
            print(train_temp.nnz)
            model.fit(train_temp * alpha)
            m = self.evaluate_model(model, train_temp, test_temp, 10)
            if i == 0:
                df = m
            else :
                df = pd.concat((df, m), axis=0)
        return df

In [4]:
movies1, ratings1 = get_movielens("1M")

In [5]:
ratings1_t = ratings1.T

In [43]:
cross_validation = CrossValidation(ratings1_t, 5)

In [44]:
train, test = cross_validation.split_k_fold()

In [45]:
als = implicit.als.AlternatingLeastSquares(factors=128, regularization=0.01, num_threads=4, use_cg=True, use_native=True)

In [46]:
res = cross_validation.k_fold_eval(test, train, als, 40)

  0%|          | 0/15 [00:00<?, ?it/s]

800265
199944


100%|██████████| 15/15 [00:04<00:00,  3.40it/s]
100%|██████████| 6040/6040 [00:00<00:00, 9948.28it/s] 
  0%|          | 0/15 [00:00<?, ?it/s]

799976
200233


100%|██████████| 15/15 [00:04<00:00,  3.53it/s]
100%|██████████| 6040/6040 [00:00<00:00, 10575.56it/s]
  0%|          | 0/15 [00:00<?, ?it/s]

800803
199406


100%|██████████| 15/15 [00:04<00:00,  3.49it/s]
100%|██████████| 6040/6040 [00:00<00:00, 10631.42it/s]
  0%|          | 0/15 [00:00<?, ?it/s]

799470
200739


100%|██████████| 15/15 [00:04<00:00,  3.60it/s]
100%|██████████| 6040/6040 [00:00<00:00, 10411.46it/s]
  0%|          | 0/15 [00:00<?, ?it/s]

800322
199887


100%|██████████| 15/15 [00:04<00:00,  3.58it/s]
100%|██████████| 6040/6040 [00:00<00:00, 10393.50it/s]


In [47]:
res

Unnamed: 0,precision,map,ndcg,auc,mpr
metrics@10,0.337152,0.223396,0.352792,0.518282,0.273166
metrics@10,0.409255,0.290524,0.429415,0.524248,0.245237
metrics@10,0.425702,0.305386,0.446282,0.525916,0.242163
metrics@10,0.427036,0.304302,0.445478,0.526293,0.243306
metrics@10,0.421987,0.302089,0.443107,0.525523,0.245361
