In [1]:
import numpy as np, pandas as pd
from datetime import datetime
from copy import deepcopy

In [2]:
%load_ext autoreload
%autoreload 2

import sys
sys.path.append("../")

In [3]:
from src.data_pipeline.DataLoader import DataLoader
from src.utility.sys_utils import get_spark

In [4]:
from src.data_pipeline.pipeline import test_evaluation
from src.utility.Summary import Summary

# Read data from files

In [9]:
# change the config file to your own file, otherwise it will not find proper path
dataloader = DataLoader(dataset_name="user_10_item_1_exp", config_name="default_config.json")
spark = get_spark(cores=4)

# Get optimal parameter for a model

In [10]:
summary = Summary(dataloader.get_config().db_path)

In [11]:
available_models = summary.table.model.unique()
available_models

array(['CollectiveMF_Item', 'CollectiveMF_User', 'CollectiveMF_No',
       'CollectiveMF_Both', 'surprise_SVD', 'surprise_Baseline',
       'PureRandom'], dtype=object)

In [12]:
def get_topk_performance(models, model_class):
    optimal_params = []
    for model_name in models:
        opt = summary.get_optimal_params(dataset_name="user_10_item_1_exp", model_name=model_name, metric="ndcg@10")
        optimal_params.append(opt)
    
    for model_name, param in zip(models, optimal_params):
        param_dict = dict(eval(param))
        
        if "CollectiveMF" in model_name:
            use_item, use_user = False, False
            if model_name == "CollectiveMF_Item":
                use_item = True
            elif model_name == "CollectiveMF_User":
                use_user = True
            elif model_name == "CollectiveMF_Both":
                use_item, use_user = True, True
            param_dict["use_user_info"] = use_user
            param_dict["use_item_info"] = use_item

        model = model_class(param_dict)

        for k in range(10):
            test_evaluation(data_loader=dataloader, model=model, spark=spark, caching=True, 
                            metrics=["ndcg"], top_k=k+1)

# Pure Random Model

In [5]:
from src.model.PureRandom import PureRandom

In [13]:
get_topk_performance(["PureRandom"], PureRandom)

Best ndcg@10 of PureRandom is found as 0.023368252184845093
Using cached file from /media/ExtHDD01/log/user_10_item_1_exp/PureRandom/random_state_42/Ranking_fold_-1.parquet
Using cached file from /media/ExtHDD01/log/user_10_item_1_exp/PureRandom/random_state_42/Rating_fold_-1.parquet
Finish inference phase
Using cached file from /media/ExtHDD01/log/user_10_item_1_exp/PureRandom/random_state_42/Ranking_fold_-1.parquet
Using cached file from /media/ExtHDD01/log/user_10_item_1_exp/PureRandom/random_state_42/Rating_fold_-1.parquet
Finish inference phase
Using cached file from /media/ExtHDD01/log/user_10_item_1_exp/PureRandom/random_state_42/Ranking_fold_-1.parquet
Using cached file from /media/ExtHDD01/log/user_10_item_1_exp/PureRandom/random_state_42/Rating_fold_-1.parquet
Finish inference phase
Using cached file from /media/ExtHDD01/log/user_10_item_1_exp/PureRandom/random_state_42/Ranking_fold_-1.parquet
Using cached file from /media/ExtHDD01/log/user_10_item_1_exp/PureRandom/random_sta

# Collective_MF

In [9]:
from src.model.CollectiveMF import CollectiveMF

In [10]:
models = ['CollectiveMF_Item', 'CollectiveMF_User', 'CollectiveMF_No','CollectiveMF_Both']

In [14]:
get_topk_performance(models, CollectiveMF)

Best ndcg@10 of CollectiveMF_Item is found as 0.13918122178525766
Best ndcg@10 of CollectiveMF_User is found as 0.13722715694739973
Best ndcg@10 of CollectiveMF_No is found as 0.13959638505198105
Best ndcg@10 of CollectiveMF_Both is found as 0.17519375060900866
Using cached file from /media/ExtHDD01/log/user_10_item_1_exp/CollectiveMF_Item/k_16-reg_param_0.01-w_main_0.5/Ranking_fold_-1.parquet
Using cached file from /media/ExtHDD01/log/user_10_item_1_exp/CollectiveMF_Item/k_16-reg_param_0.01-w_main_0.5/Rating_fold_-1.parquet
Finish inference phase
Using cached file from /media/ExtHDD01/log/user_10_item_1_exp/CollectiveMF_Item/k_16-reg_param_0.01-w_main_0.5/Ranking_fold_-1.parquet
Using cached file from /media/ExtHDD01/log/user_10_item_1_exp/CollectiveMF_Item/k_16-reg_param_0.01-w_main_0.5/Rating_fold_-1.parquet
Finish inference phase
Using cached file from /media/ExtHDD01/log/user_10_item_1_exp/CollectiveMF_Item/k_16-reg_param_0.01-w_main_0.5/Ranking_fold_-1.parquet
Using cached file f

Finish inference phase
Using cached file from /media/ExtHDD01/log/user_10_item_1_exp/CollectiveMF_No/k_16-reg_param_0.01-w_main_0.5/Ranking_fold_-1.parquet
Using cached file from /media/ExtHDD01/log/user_10_item_1_exp/CollectiveMF_No/k_16-reg_param_0.01-w_main_0.5/Rating_fold_-1.parquet
Finish inference phase
Using cached file from /media/ExtHDD01/log/user_10_item_1_exp/CollectiveMF_No/k_16-reg_param_0.01-w_main_0.5/Ranking_fold_-1.parquet
Using cached file from /media/ExtHDD01/log/user_10_item_1_exp/CollectiveMF_No/k_16-reg_param_0.01-w_main_0.5/Rating_fold_-1.parquet
Finish inference phase
Using cached file from /media/ExtHDD01/log/user_10_item_1_exp/CollectiveMF_Both/k_4-reg_param_0.01-w_main_1.0/Ranking_fold_-1.parquet
Using cached file from /media/ExtHDD01/log/user_10_item_1_exp/CollectiveMF_Both/k_4-reg_param_0.01-w_main_1.0/Rating_fold_-1.parquet
Finish inference phase
Using cached file from /media/ExtHDD01/log/user_10_item_1_exp/CollectiveMF_Both/k_4-reg_param_0.01-w_main_1.0/R

# surprise model (BaselineOnly & SVD)

In [17]:
from src.model.surprise_Baseline import surprise_Baseline
from src.model.surprise_SVD import surprise_SVD

In [23]:
get_topk_performance(["surprise_Baseline"], surprise_Baseline)

Best ndcg@10 of surprise_Baseline is found as 0.09295524312705684
Using cached file from /media/ExtHDD01/log/user_10_item_1_exp/surprise_Baseline/reg_i_20-reg_u_15/Ranking_fold_-1.parquet
Using cached file from /media/ExtHDD01/log/user_10_item_1_exp/surprise_Baseline/reg_i_20-reg_u_15/Rating_fold_-1.parquet
Finish inference phase
Using cached file from /media/ExtHDD01/log/user_10_item_1_exp/surprise_Baseline/reg_i_20-reg_u_15/Ranking_fold_-1.parquet
Using cached file from /media/ExtHDD01/log/user_10_item_1_exp/surprise_Baseline/reg_i_20-reg_u_15/Rating_fold_-1.parquet
Finish inference phase
Using cached file from /media/ExtHDD01/log/user_10_item_1_exp/surprise_Baseline/reg_i_20-reg_u_15/Ranking_fold_-1.parquet
Using cached file from /media/ExtHDD01/log/user_10_item_1_exp/surprise_Baseline/reg_i_20-reg_u_15/Rating_fold_-1.parquet
Finish inference phase
Using cached file from /media/ExtHDD01/log/user_10_item_1_exp/surprise_Baseline/reg_i_20-reg_u_15/Ranking_fold_-1.parquet
Using cached f

In [24]:
get_topk_performance(["surprise_SVD"], surprise_SVD)

Best ndcg@10 of surprise_SVD is found as 0.08952151829850094
Using cached file from /media/ExtHDD01/log/user_10_item_1_exp/surprise_SVD/lr_all_0.002-n_factors_10-reg_all_0.01/Ranking_fold_-1.parquet
Using cached file from /media/ExtHDD01/log/user_10_item_1_exp/surprise_SVD/lr_all_0.002-n_factors_10-reg_all_0.01/Rating_fold_-1.parquet
Finish inference phase
Using cached file from /media/ExtHDD01/log/user_10_item_1_exp/surprise_SVD/lr_all_0.002-n_factors_10-reg_all_0.01/Ranking_fold_-1.parquet
Using cached file from /media/ExtHDD01/log/user_10_item_1_exp/surprise_SVD/lr_all_0.002-n_factors_10-reg_all_0.01/Rating_fold_-1.parquet
Finish inference phase
Using cached file from /media/ExtHDD01/log/user_10_item_1_exp/surprise_SVD/lr_all_0.002-n_factors_10-reg_all_0.01/Ranking_fold_-1.parquet
Using cached file from /media/ExtHDD01/log/user_10_item_1_exp/surprise_SVD/lr_all_0.002-n_factors_10-reg_all_0.01/Rating_fold_-1.parquet
Finish inference phase
Using cached file from /media/ExtHDD01/log/us

# FM model

In [11]:
dataloader = DataLoader(dataset_name="user_10_item_1_exp", config_name="server_new_config.json")
spark = get_spark(cores=4)
summary = Summary(dataloader.get_config().db_path)

In [12]:
from src.model.FM import FM

In [17]:
get_topk_performance(["FMItem"], FM)

Best ndcg@10 of FMItem is found as 0.049514178125418994
Using cached file from /home/data/log/user_10_item_1_exp/FMItem/l2_reg_V_1-l2_reg_w_1-model_info_used_Item-n_iter_100-rank_2/Ranking_fold_-1.parquet
Using cached file from /home/data/log/user_10_item_1_exp/FMItem/l2_reg_V_1-l2_reg_w_1-model_info_used_Item-n_iter_100-rank_2/Rating_fold_-1.parquet
Finish inference phase
Using cached file from /home/data/log/user_10_item_1_exp/FMItem/l2_reg_V_1-l2_reg_w_1-model_info_used_Item-n_iter_100-rank_2/Ranking_fold_-1.parquet
Using cached file from /home/data/log/user_10_item_1_exp/FMItem/l2_reg_V_1-l2_reg_w_1-model_info_used_Item-n_iter_100-rank_2/Rating_fold_-1.parquet
Finish inference phase
Using cached file from /home/data/log/user_10_item_1_exp/FMItem/l2_reg_V_1-l2_reg_w_1-model_info_used_Item-n_iter_100-rank_2/Ranking_fold_-1.parquet
Using cached file from /home/data/log/user_10_item_1_exp/FMItem/l2_reg_V_1-l2_reg_w_1-model_info_used_Item-n_iter_100-rank_2/Rating_fold_-1.parquet
Finish 

In [21]:
get_topk_performance(["FMNone"], FM)

Best ndcg@10 of FMNone is found as 0.05388744137663189
Using cached file from /home/data/log/user_10_item_1_exp/FMNone/l2_reg_V_1-l2_reg_w_1-model_info_used_None-n_iter_100-rank_8/Ranking_fold_-1.parquet
Using cached file from /home/data/log/user_10_item_1_exp/FMNone/l2_reg_V_1-l2_reg_w_1-model_info_used_None-n_iter_100-rank_8/Rating_fold_-1.parquet
Finish inference phase
Using cached file from /home/data/log/user_10_item_1_exp/FMNone/l2_reg_V_1-l2_reg_w_1-model_info_used_None-n_iter_100-rank_8/Ranking_fold_-1.parquet
Using cached file from /home/data/log/user_10_item_1_exp/FMNone/l2_reg_V_1-l2_reg_w_1-model_info_used_None-n_iter_100-rank_8/Rating_fold_-1.parquet
Finish inference phase
Using cached file from /home/data/log/user_10_item_1_exp/FMNone/l2_reg_V_1-l2_reg_w_1-model_info_used_None-n_iter_100-rank_8/Ranking_fold_-1.parquet
Using cached file from /home/data/log/user_10_item_1_exp/FMNone/l2_reg_V_1-l2_reg_w_1-model_info_used_None-n_iter_100-rank_8/Rating_fold_-1.parquet
Finish i

# BPR model

In [18]:
from src.model.BPR import BPR

In [19]:
get_topk_performance(["Test_implicit"], BPR)

Best ndcg@10 of Test_implicit is found as 0.16527315551622762
Creating directory and start writing ...
Rewriting files in /home/data/log/user_10_item_1_exp/BPR/factors_16-iteration_100-learning_rate_0.005-regularization_0.001/Rating_fold_-1.parquet
Finish inference phase
Using cached file from /home/data/log/user_10_item_1_exp/BPR/factors_16-iteration_100-learning_rate_0.005-regularization_0.001/Ranking_fold_-1.parquet
Using cached file from /home/data/log/user_10_item_1_exp/BPR/factors_16-iteration_100-learning_rate_0.005-regularization_0.001/Rating_fold_-1.parquet
Finish inference phase
Using cached file from /home/data/log/user_10_item_1_exp/BPR/factors_16-iteration_100-learning_rate_0.005-regularization_0.001/Ranking_fold_-1.parquet
Using cached file from /home/data/log/user_10_item_1_exp/BPR/factors_16-iteration_100-learning_rate_0.005-regularization_0.001/Rating_fold_-1.parquet
Finish inference phase
Using cached file from /home/data/log/user_10_item_1_exp/BPR/factors_16-iteratio