In [1]:
import pandas as pd

In [2]:
%load_ext autoreload
%autoreload 2

import sys
sys.path.append("../")

In [3]:
from src.utility.Summary import Summary

In [4]:
side_info_params = {
    "CollectiveMF_Item": {"use_user_info": False, "use_item_info": True},
    "CollectiveMF_User": {"use_user_info": True, "use_item_info": False},
    "CollectiveMF_Both": {"use_user_info": True, "use_item_info": True},
    "CollectiveMF_No": {"use_user_info": False, "use_item_info": False},
}

In [5]:
model_with_params = []
for db_path in ["default_server.db", "new_server.db"]:
    summary = Summary(db_path="sqlite:///results/" + db_path)
    for model in list(side_info_params.keys()) + ["surprise_SVD", "surprise_Baseline", "FMItem", "FMNone", "BPR","PureRandom"]:
        opt = summary.get_optimal_params(dataset_name="user_10_item_1_exp", model_name=model, metric="ndcg@10")
        if not opt: continue
        params_dict = dict(eval(opt))

        model_with_params.append((model, params_dict))

Best ndcg@10 of CollectiveMF_Item is found as 0.13918122178525763
Best ndcg@10 of CollectiveMF_User is found as 0.13722715694739976
Best ndcg@10 of CollectiveMF_Both is found as 0.17519375060900866
Best ndcg@10 of CollectiveMF_No is found as 0.13959638505198083
Best ndcg@10 of surprise_SVD is found as 0.08947580881252634
Best ndcg@10 of surprise_Baseline is found as 0.09295524312705676
Best ndcg@10 of PureRandom is found as 0.02336825218484509
Best ndcg@10 of FMItem is found as 0.049514178125419
Best ndcg@10 of FMNone is found as 0.053887441376631894
Best ndcg@10 of BPR is found as 0.16738308198769195


In [20]:
df_list = []
for db_path in ["default_server.db", "new_server.db"]:
    summary = Summary(db_path="sqlite:///results/" + db_path)
    for model, opt_param in model_with_params:
        for metric in ["ndcg@{}".format(k+1) for k in range(10)]:
            str_param = str(sorted(opt_param.items(), key=lambda x: x[0]))
            
            df = summary.get_result_for_params(dataset_name="user_10_item_1_exp", model_name=model, 
                                           hyper=str_param, metric=metric, verbose=False)
            if len(df) == 0: continue
            df_list.append(df)
    

In [21]:
topk_results = pd.concat(df_list).reset_index(drop=True)

In [22]:
topk_results["k"] = topk_results.metric.apply(lambda x: int(x.split("@")[1]))

In [23]:
topk_results

Unnamed: 0,model,hyper,metric,value,ts,k
0,CollectiveMF_Item,"[('k', 16), ('reg_param', 0.01), ('w_main', 0.5)]",ndcg@1,0.055422,2019-12-18 01:32:20.734647,1
1,CollectiveMF_Item,"[('k', 16), ('reg_param', 0.01), ('w_main', 0.5)]",ndcg@2,0.078422,2019-12-18 01:32:38.024594,2
2,CollectiveMF_Item,"[('k', 16), ('reg_param', 0.01), ('w_main', 0.5)]",ndcg@3,0.092794,2019-12-18 01:32:55.187776,3
3,CollectiveMF_Item,"[('k', 16), ('reg_param', 0.01), ('w_main', 0.5)]",ndcg@4,0.103548,2019-12-18 01:33:12.001429,4
4,CollectiveMF_Item,"[('k', 16), ('reg_param', 0.01), ('w_main', 0.5)]",ndcg@5,0.112015,2019-12-18 01:33:29.350053,5
...,...,...,...,...,...,...
95,BPR,"[('factors', 16), ('iteration', 100), ('learni...",ndcg@6,0.143165,2019-12-18 07:54:02.128447,6
96,BPR,"[('factors', 16), ('iteration', 100), ('learni...",ndcg@7,0.150562,2019-12-18 07:54:29.743686,7
97,BPR,"[('factors', 16), ('iteration', 100), ('learni...",ndcg@8,0.156920,2019-12-18 07:54:57.115100,8
98,BPR,"[('factors', 16), ('iteration', 100), ('learni...",ndcg@9,0.162464,2019-12-18 07:55:24.629524,9


In [24]:
topk_results.to_csv("./results/topk_results.csv")

# Find another group of best params using rmse

In [25]:
regression_models = list(side_info_params.keys()) + ["surprise_SVD", "surprise_Baseline", "FMItem", "FMNone"]

In [26]:
rmse_model_with_params = []
for db_path in ["default_server.db", "new_server.db"]:
    summary = Summary(db_path="sqlite:///results/" + db_path)
    for model in regression_models:
        opt = summary.get_optimal_params(dataset_name="user_10_item_1_exp", model_name=model, metric="rmse")
        if not opt: continue
        params_dict = dict(eval(opt))

        rmse_model_with_params.append((model, params_dict))

Best rmse of CollectiveMF_Item is found as 1.4246476420534175
Best rmse of CollectiveMF_User is found as 1.3985643126656582
Best rmse of CollectiveMF_Both is found as 1.3987647259533256
Best rmse of CollectiveMF_No is found as 1.4252528235467399
Best rmse of surprise_SVD is found as 1.291734245936887
Best rmse of surprise_Baseline is found as 1.2923675755806323
Best rmse of FMItem is found as 2.406458841466745
Best rmse of FMNone is found as 1.4488859976594957


In [27]:
rmse_list = []
for db_path in ["default_server.db", "new_server.db"]:
    summary = Summary(db_path="sqlite:///results/" + db_path)
    for model, opt_param in model_with_params:
        if model in regression_models:
            str_param = str(sorted(opt_param.items(), key=lambda x: x[0]))

            df = summary.get_result_for_params(dataset_name="user_10_item_1_exp", model_name=model, 
                                           hyper=str_param, metric="rmse", verbose=False)
            if len(df) == 0: continue
            df["is_rmse_opt"] = False
            rmse_list.append(df)

In [28]:
for db_path in ["default_server.db", "new_server.db"]:
    summary = Summary(db_path="sqlite:///results/" + db_path)
    for model, opt_param in rmse_model_with_params:
        str_param = str(sorted(opt_param.items(), key=lambda x: x[0]))

        df = summary.get_result_for_params(dataset_name="user_10_item_1_exp", model_name=model, 
                                       hyper=str_param, metric="rmse", verbose=False)
        if len(df) == 0: continue
        df["is_rmse_opt"] = True
        rmse_list.append(df)

In [29]:
rmse_results = pd.concat(rmse_list).reset_index(drop=True)

In [30]:
rmse_results

Unnamed: 0,model,hyper,metric,value,ts,is_rmse_opt
0,CollectiveMF_Item,"[('k', 16), ('reg_param', 0.01), ('w_main', 0.5)]",rmse,1.494356,2019-12-16 08:06:08.523691,False
1,CollectiveMF_User,"[('k', 16), ('reg_param', 0.01), ('w_main', 0.5)]",rmse,1.489873,2019-12-16 07:47:19.779780,False
2,CollectiveMF_Both,"[('k', 4), ('reg_param', 0.01), ('w_main', 1.0)]",rmse,1.492647,2019-12-16 08:30:18.577571,False
3,CollectiveMF_No,"[('k', 16), ('reg_param', 0.01), ('w_main', 0.5)]",rmse,1.491436,2019-12-16 08:18:36.851814,False
4,surprise_SVD,"[('lr_all', 0.002), ('n_factors', 10), ('reg_a...",rmse,1.32515,2019-12-17 00:00:07.702298,False
5,surprise_Baseline,"[('reg_i', 20), ('reg_u', 15)]",rmse,1.325249,2019-12-16 19:15:44.848353,False
6,FMItem,"[('l2_reg_V', 1), ('l2_reg_w', 1), ('model_inf...",rmse,2.406459,2019-12-18 05:53:14.661005,False
7,FMNone,"[('l2_reg_V', 1), ('l2_reg_w', 1), ('model_inf...",rmse,1.581188,2019-12-17 10:58:07.156385,False
8,CollectiveMF_Item,"[('k', 5), ('reg_param', 0.0001), ('w_main', 1...",rmse,1.424648,2019-12-14 22:24:21.933194,True
9,CollectiveMF_User,"[('k', 16), ('reg_param', 0.0001), ('w_main', ...",rmse,1.398564,2019-12-15 02:04:18.135203,True


In [31]:
rmse_results.to_csv("./results/rmse_results.csv")