In [1]:
from typing import List, Callable

import glob
import pandas as pd

from graph_utils import (
    GOWALLA_DATASET_NAME, YELP_2018_DATASET_NAME, AMAZON_BOOK_DATASET_NAME,
    df_table_to_latex,
    BASE_FRAMEWORK_DIR
)

In [2]:
MODEL_COLNAME = "model"
PARAMS_COLNAME = "Params"

def parse_result(filename):
    df = (
        pd.read_csv(filename, sep="\t")
        .round(4)
        .astype(str)
        
    )

    df[MODEL_COLNAME] = df[MODEL_COLNAME].apply(lambda x: x.split("_")[0])

    return df

def get_results(
        dates: List[str],
        datasets: List[str] = None,
        exclude_metrics: List[str] = None,
        models: List[str] = None,
        caption: str = "",
        fix_params_f: Callable = None):
    all_files: List[str] = []
    if datasets is None:
        datasets = ["*"]
    for date in dates:
        for dataset in datasets:
            all_files.extend(
                glob.glob(f"{BASE_FRAMEWORK_DIR}/results/{dataset}/performance/*{date}*.tsv")
            )
    df_list: List[pd.DataFrame] = []
    for file in all_files:
        dataset = file.split("/")[3]
        df = parse_result(file)
        if "--" in dataset:
            params = ",".join(dataset.split("--")[1:])
            df.insert(0, PARAMS_COLNAME, params)
        df_list.append(df)
    
    df_final: pd.DataFrame = pd.concat(df_list, axis=0)

    if models is not None:
        df_final = df_final[df_final[MODEL_COLNAME].isin(models)]

    if PARAMS_COLNAME in df_final.columns:
        df_final = df_final.sort_values(PARAMS_COLNAME).drop_duplicates(PARAMS_COLNAME)
        if fix_params_f is not None:
            df_final[PARAMS_COLNAME] = df_final[PARAMS_COLNAME].apply(fix_params_f)

    unique_models = df_final[MODEL_COLNAME].unique()
    if len(unique_models) == 1:
        df_final.drop(columns=[MODEL_COLNAME], inplace=True)
        print(unique_models[0])
    else:
        df_final = df_final.drop_duplicates(subset=[MODEL_COLNAME]).sort_values(MODEL_COLNAME)

    if exclude_metrics is not None:
        df_final.drop(columns=exclude_metrics, inplace=True)

    df_final["ItemCoverage"] = df_final["ItemCoverage"].astype(float).astype(int)
    df_final["ARP"] = df_final["ARP"].astype(float).round(2).astype(str)
    df_final = df_final.rename(columns={"ItemCoverage": "IC", "TailRecall": "TRecall"})
    
    print(df_table_to_latex(df_final, index=False, caption=caption))

    return df_final

# Results

In [3]:
GLOBAL_EXCLUDE_METRIC_LIST = ["ACLT", "EFD", "SEntropy", "APLT"]

## Hypotheses

In [4]:
GENERAL_CAPTION_TEMPLATE = "GFCF results for `{}` on `{}` dataset"
GFCF_DATASET_NAME = "GFCF"

def basic_fix_params_f(x: str):
    xl = x.split(",")
    return ",".join(xl[1:])

### Hypothesis 1 (Big neighborhoods pruner)

In [5]:
H1_METHOD_NAME = "pruner of big neighborhoods"
H1_SUFFIX = "--????--q=*"

In [6]:
get_results(
    ["2024_05_04"],
    [f"{GOWALLA_DATASET_NAME}{H1_SUFFIX}"],
    GLOBAL_EXCLUDE_METRIC_LIST,
    [GFCF_DATASET_NAME],
    caption=GENERAL_CAPTION_TEMPLATE.format(H1_METHOD_NAME, GOWALLA_DATASET_NAME),
)

GFCF
\begin{table}[H]
\centering
\begin{tabular}{lllllrll}
\toprule
Params & Recall & nDCG & Gini & EPC & IC & TRecall & ARP \\
\midrule
item,q=0.9 & 0.1356 & 0.0982 & 0.2593 & 0.0455 & 32455 & 0.0201 & 23.51 \\
item,q=0.95 & 0.147 & 0.1091 & 0.2034 & 0.051 & 29551 & 0.0199 & 33.08 \\
item,q=0.99 & 0.1658 & 0.127 & 0.1397 & 0.0603 & 25159 & 0.0206 & 66.53 \\
user,q=0.9 & 0.1819 & 0.1448 & 0.1425 & 0.0669 & 27138 & 0.0156 & 114.74 \\
user,q=0.95 & 0.1841 & 0.1483 & 0.1253 & 0.0698 & 24929 & 0.0175 & 135.35 \\
user,q=0.99 & 0.1851 & 0.1511 & 0.1133 & 0.0725 & 22971 & 0.0199 & 159.23 \\
\bottomrule
\end{tabular}
\caption{GFCF results for `pruner of big neighborhoods` on `gowalla` dataset}
\end{table}


Unnamed: 0,Params,Recall,nDCG,Gini,EPC,IC,TRecall,ARP
0,"item,q=0.9",0.1356,0.0982,0.2593,0.0455,32455,0.0201,23.51
0,"item,q=0.95",0.147,0.1091,0.2034,0.051,29551,0.0199,33.08
0,"item,q=0.99",0.1658,0.127,0.1397,0.0603,25159,0.0206,66.53
0,"user,q=0.9",0.1819,0.1448,0.1425,0.0669,27138,0.0156,114.74
0,"user,q=0.95",0.1841,0.1483,0.1253,0.0698,24929,0.0175,135.35
0,"user,q=0.99",0.1851,0.1511,0.1133,0.0725,22971,0.0199,159.23


In [7]:
get_results(
    ["2024_05_04"],
    [f"{YELP_2018_DATASET_NAME}{H1_SUFFIX}"],
    GLOBAL_EXCLUDE_METRIC_LIST,
    [GFCF_DATASET_NAME],
    caption=GENERAL_CAPTION_TEMPLATE.format(H1_METHOD_NAME, YELP_2018_DATASET_NAME),
)

GFCF
\begin{table}[H]
\centering
\begin{tabular}{lllllrll}
\toprule
Params & Recall & nDCG & Gini & EPC & IC & TRecall & ARP \\
\midrule
item,q=0.9 & 0.06 & 0.0478 & 0.1187 & 0.028 & 15304 & 0.002 & 59.44 \\
item,q=0.95 & 0.0641 & 0.0513 & 0.0855 & 0.0303 & 12580 & 0.0022 & 87.43 \\
item,q=0.99 & 0.0681 & 0.055 & 0.0542 & 0.0328 & 10299 & 0.0024 & 159.49 \\
user,q=0.9 & 0.0681 & 0.0536 & 0.0541 & 0.0311 & 13538 & 0.0022 & 209.59 \\
user,q=0.95 & 0.0691 & 0.0549 & 0.0477 & 0.0322 & 11549 & 0.0022 & 240.2 \\
user,q=0.99 & 0.0696 & 0.0564 & 0.0438 & 0.0336 & 10022 & 0.0024 & 274.52 \\
\bottomrule
\end{tabular}
\caption{GFCF results for `pruner of big neighborhoods` on `yelp-2018` dataset}
\end{table}


Unnamed: 0,Params,Recall,nDCG,Gini,EPC,IC,TRecall,ARP
0,"item,q=0.9",0.06,0.0478,0.1187,0.028,15304,0.002,59.44
0,"item,q=0.95",0.0641,0.0513,0.0855,0.0303,12580,0.0022,87.43
0,"item,q=0.99",0.0681,0.055,0.0542,0.0328,10299,0.0024,159.49
0,"user,q=0.9",0.0681,0.0536,0.0541,0.0311,13538,0.0022,209.59
0,"user,q=0.95",0.0691,0.0549,0.0477,0.0322,11549,0.0022,240.2
0,"user,q=0.99",0.0696,0.0564,0.0438,0.0336,10022,0.0024,274.52


In [8]:
get_results(
    ["2024_05_04"],
    [f"{AMAZON_BOOK_DATASET_NAME}{H1_SUFFIX}"],
    GLOBAL_EXCLUDE_METRIC_LIST,
    [GFCF_DATASET_NAME],
    caption=GENERAL_CAPTION_TEMPLATE.format(H1_METHOD_NAME, AMAZON_BOOK_DATASET_NAME),
)

GFCF
\begin{table}[H]
\centering
\begin{tabular}{lllllrll}
\toprule
Params & Recall & nDCG & Gini & EPC & IC & TRecall & ARP \\
\midrule
item,q=0.9 & 0.0676 & 0.0555 & 0.3046 & 0.031 & 74324 & 0.0167 & 32.18 \\
item,q=0.95 & 0.0698 & 0.0572 & 0.2744 & 0.0321 & 72429 & 0.0167 & 42.33 \\
item,q=0.99 & 0.0715 & 0.0585 & 0.2359 & 0.0329 & 69853 & 0.0171 & 70.26 \\
user,q=0.9 & 0.0707 & 0.0574 & 0.2482 & 0.0319 & 73519 & 0.0149 & 101.87 \\
user,q=0.95 & 0.0709 & 0.0577 & 0.2288 & 0.0323 & 70850 & 0.0155 & 122.53 \\
user,q=0.99 & 0.071 & 0.0581 & 0.215 & 0.0326 & 68772 & 0.0166 & 145.75 \\
\bottomrule
\end{tabular}
\caption{GFCF results for `pruner of big neighborhoods` on `amazon-book` dataset}
\end{table}


Unnamed: 0,Params,Recall,nDCG,Gini,EPC,IC,TRecall,ARP
0,"item,q=0.9",0.0676,0.0555,0.3046,0.031,74324,0.0167,32.18
0,"item,q=0.95",0.0698,0.0572,0.2744,0.0321,72429,0.0167,42.33
0,"item,q=0.99",0.0715,0.0585,0.2359,0.0329,69853,0.0171,70.26
0,"user,q=0.9",0.0707,0.0574,0.2482,0.0319,73519,0.0149,101.87
0,"user,q=0.95",0.0709,0.0577,0.2288,0.0323,70850,0.0155,122.53
0,"user,q=0.99",0.071,0.0581,0.215,0.0326,68772,0.0166,145.75


### Hypothesis 2 (Top betweenness centrality connector)

In [9]:
H2_METHOD_NAME = "betweenness centrality connector"
H2_GENERAL_SUFFIX_TEMPLATE = "--{}-bc--*"

#### Closest

In [10]:
H2_CLOSEST_METHOD = f"{H2_METHOD_NAME} (closest)"
H2_CLOSEST_SUFFIX = H2_GENERAL_SUFFIX_TEMPLATE.format("c")

In [11]:
get_results(
    ["2024_05_04"],
    [f"{GOWALLA_DATASET_NAME}{H2_CLOSEST_SUFFIX}"],
    GLOBAL_EXCLUDE_METRIC_LIST,
    [GFCF_DATASET_NAME],
    caption=GENERAL_CAPTION_TEMPLATE.format(H2_CLOSEST_METHOD, GOWALLA_DATASET_NAME),
    fix_params_f=basic_fix_params_f,
)

GFCF
\begin{table}[H]
\centering
\begin{tabular}{lllllrll}
\toprule
Params & Recall & nDCG & Gini & EPC & IC & TRecall & ARP \\
\midrule
item,q=0.9 & 0.1852 & 0.152 & 0.111 & 0.0735 & 22596 & 0.0203 & 168.19 \\
item,q=0.95 & 0.1852 & 0.1521 & 0.1098 & 0.0735 & 22459 & 0.0202 & 169.4 \\
item,q=0.99 & 0.1843 & 0.1494 & 0.1046 & 0.072 & 21797 & 0.0197 & 218.84 \\
user,q=0.9 & 0.1853 & 0.1521 & 0.1112 & 0.0736 & 22527 & 0.0198 & 167.84 \\
user,q=0.95 & 0.1855 & 0.1523 & 0.1103 & 0.0737 & 22364 & 0.0191 & 167.77 \\
user,q=0.99 & 0.1853 & 0.1523 & 0.107 & 0.0737 & 21779 & 0.0178 & 168.69 \\
\bottomrule
\end{tabular}
\caption{GFCF results for `betweenness centrality connector (closest)` on `gowalla` dataset}
\end{table}


Unnamed: 0,Params,Recall,nDCG,Gini,EPC,IC,TRecall,ARP
0,"item,q=0.9",0.1852,0.152,0.111,0.0735,22596,0.0203,168.19
0,"item,q=0.95",0.1852,0.1521,0.1098,0.0735,22459,0.0202,169.4
0,"item,q=0.99",0.1843,0.1494,0.1046,0.072,21797,0.0197,218.84
0,"user,q=0.9",0.1853,0.1521,0.1112,0.0736,22527,0.0198,167.84
0,"user,q=0.95",0.1855,0.1523,0.1103,0.0737,22364,0.0191,167.77
0,"user,q=0.99",0.1853,0.1523,0.107,0.0737,21779,0.0178,168.69


In [12]:
get_results(
    ["2024_05_04"],
    [f"{YELP_2018_DATASET_NAME}{H2_CLOSEST_SUFFIX}"],
    GLOBAL_EXCLUDE_METRIC_LIST,
    [GFCF_DATASET_NAME],
    caption=GENERAL_CAPTION_TEMPLATE.format(H2_CLOSEST_METHOD, YELP_2018_DATASET_NAME),
    fix_params_f=basic_fix_params_f,
)

GFCF
\begin{table}[H]
\centering
\begin{tabular}{lllllrll}
\toprule
Params & Recall & nDCG & Gini & EPC & IC & TRecall & ARP \\
\midrule
item,q=0.9 & 0.0697 & 0.0571 & 0.0428 & 0.0344 & 9684 & 0.0024 & 291.46 \\
item,q=0.95 & 0.0696 & 0.0571 & 0.0429 & 0.0344 & 9662 & 0.0024 & 291.55 \\
item,q=0.99 & 0.069 & 0.0559 & 0.0416 & 0.0336 & 9512 & 0.0024 & 368.79 \\
user,q=0.9 & 0.0697 & 0.0571 & 0.043 & 0.0344 & 9589 & 0.0024 & 290.62 \\
user,q=0.95 & 0.0698 & 0.0572 & 0.0432 & 0.0344 & 9579 & 0.0023 & 290.03 \\
user,q=0.99 & 0.0699 & 0.0573 & 0.0436 & 0.0345 & 9517 & 0.0022 & 288.86 \\
\bottomrule
\end{tabular}
\caption{GFCF results for `betweenness centrality connector (closest)` on `yelp-2018` dataset}
\end{table}


Unnamed: 0,Params,Recall,nDCG,Gini,EPC,IC,TRecall,ARP
0,"item,q=0.9",0.0697,0.0571,0.0428,0.0344,9684,0.0024,291.46
0,"item,q=0.95",0.0696,0.0571,0.0429,0.0344,9662,0.0024,291.55
0,"item,q=0.99",0.069,0.0559,0.0416,0.0336,9512,0.0024,368.79
0,"user,q=0.9",0.0697,0.0571,0.043,0.0344,9589,0.0024,290.62
0,"user,q=0.95",0.0698,0.0572,0.0432,0.0344,9579,0.0023,290.03
0,"user,q=0.99",0.0699,0.0573,0.0436,0.0345,9517,0.0022,288.86


In [13]:
get_results(
    ["2024_05_04"],
    [f"{AMAZON_BOOK_DATASET_NAME}{H2_CLOSEST_SUFFIX}"],
    GLOBAL_EXCLUDE_METRIC_LIST,
    [GFCF_DATASET_NAME],
    caption=GENERAL_CAPTION_TEMPLATE.format(H2_CLOSEST_METHOD, AMAZON_BOOK_DATASET_NAME),
    fix_params_f=basic_fix_params_f,
)

GFCF
\begin{table}[H]
\centering
\begin{tabular}{lllllrll}
\toprule
Params & Recall & nDCG & Gini & EPC & IC & TRecall & ARP \\
\midrule
item,q=0.9 & 0.071 & 0.0585 & 0.2129 & 0.0329 & 68424 & 0.0174 & 154.16 \\
item,q=0.95 & 0.0711 & 0.0585 & 0.2125 & 0.0329 & 68403 & 0.0174 & 154.38 \\
item,q=0.99 & 0.071 & 0.0584 & 0.2103 & 0.0329 & 68156 & 0.0174 & 161.21 \\
user,q=0.9 & 0.0709 & 0.0583 & 0.2125 & 0.0328 & 68575 & 0.0168 & 154.88 \\
user,q=0.95 & 0.0707 & 0.0582 & 0.2115 & 0.0328 & 68570 & 0.0164 & 155.44 \\
user,q=0.99 & 0.0706 & 0.058 & 0.2077 & 0.0326 & 68302 & 0.0155 & 157.04 \\
\bottomrule
\end{tabular}
\caption{GFCF results for `betweenness centrality connector (closest)` on `amazon-book` dataset}
\end{table}


Unnamed: 0,Params,Recall,nDCG,Gini,EPC,IC,TRecall,ARP
0,"item,q=0.9",0.071,0.0585,0.2129,0.0329,68424,0.0174,154.16
0,"item,q=0.95",0.0711,0.0585,0.2125,0.0329,68403,0.0174,154.38
0,"item,q=0.99",0.071,0.0584,0.2103,0.0329,68156,0.0174,161.21
0,"user,q=0.9",0.0709,0.0583,0.2125,0.0328,68575,0.0168,154.88
0,"user,q=0.95",0.0707,0.0582,0.2115,0.0328,68570,0.0164,155.44
0,"user,q=0.99",0.0706,0.058,0.2077,0.0326,68302,0.0155,157.04


#### Farthest

In [14]:
H2_FARTHEST_METHOD = f"{H2_METHOD_NAME} (farthest)"
H2_FARTHEST_SUFFIX = H2_GENERAL_SUFFIX_TEMPLATE.format("f")

In [15]:
get_results(
    ["2024_05_04"],
    [f"{GOWALLA_DATASET_NAME}{H2_FARTHEST_SUFFIX}"],
    GLOBAL_EXCLUDE_METRIC_LIST,
    [GFCF_DATASET_NAME],
    caption=GENERAL_CAPTION_TEMPLATE.format(H2_FARTHEST_METHOD, GOWALLA_DATASET_NAME),
    fix_params_f=basic_fix_params_f,
)

GFCF
\begin{table}[H]
\centering
\begin{tabular}{lllllrll}
\toprule
Params & Recall & nDCG & Gini & EPC & IC & TRecall & ARP \\
\midrule
item,q=0.9 & 0.1852 & 0.152 & 0.1111 & 0.0735 & 22594 & 0.0203 & 168.14 \\
item,q=0.95 & 0.1853 & 0.1522 & 0.1098 & 0.0736 & 22449 & 0.0202 & 170.17 \\
item,q=0.99 & 0.1842 & 0.1491 & 0.1052 & 0.0717 & 21868 & 0.0198 & 245.09 \\
user,q=0.9 & 0.1854 & 0.1522 & 0.1112 & 0.0736 & 22544 & 0.0199 & 167.83 \\
user,q=0.95 & 0.1855 & 0.1525 & 0.1102 & 0.0737 & 22357 & 0.0191 & 167.86 \\
user,q=0.99 & 0.1853 & 0.1523 & 0.1069 & 0.0737 & 21780 & 0.0178 & 168.9 \\
\bottomrule
\end{tabular}
\caption{GFCF results for `betweenness centrality connector (farthest)` on `gowalla` dataset}
\end{table}


Unnamed: 0,Params,Recall,nDCG,Gini,EPC,IC,TRecall,ARP
0,"item,q=0.9",0.1852,0.152,0.1111,0.0735,22594,0.0203,168.14
0,"item,q=0.95",0.1853,0.1522,0.1098,0.0736,22449,0.0202,170.17
0,"item,q=0.99",0.1842,0.1491,0.1052,0.0717,21868,0.0198,245.09
0,"user,q=0.9",0.1854,0.1522,0.1112,0.0736,22544,0.0199,167.83
0,"user,q=0.95",0.1855,0.1525,0.1102,0.0737,22357,0.0191,167.86
0,"user,q=0.99",0.1853,0.1523,0.1069,0.0737,21780,0.0178,168.9


In [16]:
get_results(
    ["2024_05_04"],
    [f"{YELP_2018_DATASET_NAME}{H2_FARTHEST_SUFFIX}"],
    GLOBAL_EXCLUDE_METRIC_LIST,
    [GFCF_DATASET_NAME],
    caption=GENERAL_CAPTION_TEMPLATE.format(H2_FARTHEST_METHOD, YELP_2018_DATASET_NAME),
    fix_params_f=basic_fix_params_f,
)

GFCF
\begin{table}[H]
\centering
\begin{tabular}{lllllrll}
\toprule
Params & Recall & nDCG & Gini & EPC & IC & TRecall & ARP \\
\midrule
item,q=0.9 & 0.0697 & 0.0571 & 0.0428 & 0.0344 & 9680 & 0.0024 & 291.42 \\
item,q=0.95 & 0.0697 & 0.0571 & 0.0429 & 0.0344 & 9663 & 0.0025 & 291.4 \\
item,q=0.99 & 0.0692 & 0.0564 & 0.0419 & 0.034 & 9522 & 0.0024 & 310.11 \\
user,q=0.9 & 0.0697 & 0.0572 & 0.043 & 0.0344 & 9652 & 0.0024 & 290.64 \\
user,q=0.95 & 0.0697 & 0.0572 & 0.0434 & 0.0344 & 9564 & 0.0023 & 289.79 \\
user,q=0.99 & 0.0699 & 0.0573 & 0.0437 & 0.0345 & 9512 & 0.0021 & 288.79 \\
\bottomrule
\end{tabular}
\caption{GFCF results for `betweenness centrality connector (farthest)` on `yelp-2018` dataset}
\end{table}


Unnamed: 0,Params,Recall,nDCG,Gini,EPC,IC,TRecall,ARP
0,"item,q=0.9",0.0697,0.0571,0.0428,0.0344,9680,0.0024,291.42
0,"item,q=0.95",0.0697,0.0571,0.0429,0.0344,9663,0.0025,291.4
0,"item,q=0.99",0.0692,0.0564,0.0419,0.034,9522,0.0024,310.11
0,"user,q=0.9",0.0697,0.0572,0.043,0.0344,9652,0.0024,290.64
0,"user,q=0.95",0.0697,0.0572,0.0434,0.0344,9564,0.0023,289.79
0,"user,q=0.99",0.0699,0.0573,0.0437,0.0345,9512,0.0021,288.79


In [17]:
get_results(
    ["2024_05_04"],
    [f"{AMAZON_BOOK_DATASET_NAME}{H2_FARTHEST_SUFFIX}"],
    GLOBAL_EXCLUDE_METRIC_LIST,
    [GFCF_DATASET_NAME],
    caption=GENERAL_CAPTION_TEMPLATE.format(H2_FARTHEST_METHOD, AMAZON_BOOK_DATASET_NAME),
    fix_params_f=basic_fix_params_f,
)

GFCF
\begin{table}[H]
\centering
\begin{tabular}{lllllrll}
\toprule
Params & Recall & nDCG & Gini & EPC & IC & TRecall & ARP \\
\midrule
item,q=0.9 & 0.071 & 0.0585 & 0.2129 & 0.0329 & 68422 & 0.0174 & 154.16 \\
item,q=0.95 & 0.071 & 0.0585 & 0.2125 & 0.0329 & 68410 & 0.0174 & 154.32 \\
item,q=0.99 & 0.071 & 0.0584 & 0.2105 & 0.0329 & 68199 & 0.0174 & 158.55 \\
user,q=0.9 & 0.0708 & 0.0583 & 0.2123 & 0.0328 & 68548 & 0.0168 & 154.97 \\
user,q=0.95 & 0.0707 & 0.0582 & 0.2114 & 0.0328 & 68568 & 0.0163 & 155.51 \\
user,q=0.99 & 0.0706 & 0.058 & 0.2077 & 0.0326 & 68315 & 0.0154 & 157.04 \\
\bottomrule
\end{tabular}
\caption{GFCF results for `betweenness centrality connector (farthest)` on `amazon-book` dataset}
\end{table}


Unnamed: 0,Params,Recall,nDCG,Gini,EPC,IC,TRecall,ARP
0,"item,q=0.9",0.071,0.0585,0.2129,0.0329,68422,0.0174,154.16
0,"item,q=0.95",0.071,0.0585,0.2125,0.0329,68410,0.0174,154.32
0,"item,q=0.99",0.071,0.0584,0.2105,0.0329,68199,0.0174,158.55
0,"user,q=0.9",0.0708,0.0583,0.2123,0.0328,68548,0.0168,154.97
0,"user,q=0.95",0.0707,0.0582,0.2114,0.0328,68568,0.0163,155.51
0,"user,q=0.99",0.0706,0.058,0.2077,0.0326,68315,0.0154,157.04


### Hypothesis 3 (Pre-exploration using Improved Jacard Index)

In [18]:
H3_METHOD_NAME = "pre-exploration using Improved Jacard Index"
H3_GENERAL_SUFFIX_TEMPLATE = "--{}-chns--????--q=0.?5"

#### Soft

In [19]:
H3_SOFT_METHOD_NAME = f"{H3_METHOD_NAME} (soft)"
H3_SOFT_SUFFIX = H3_GENERAL_SUFFIX_TEMPLATE.format("soft")

In [20]:
get_results(
    ["2024_05_11"],
    [f"{GOWALLA_DATASET_NAME}{H3_SOFT_SUFFIX}"],
    GLOBAL_EXCLUDE_METRIC_LIST,
    [GFCF_DATASET_NAME],
    caption=GENERAL_CAPTION_TEMPLATE.format(H3_SOFT_METHOD_NAME, GOWALLA_DATASET_NAME),
    fix_params_f=basic_fix_params_f,
)

GFCF
\begin{table}[H]
\centering
\begin{tabular}{lllllrll}
\toprule
Params & Recall & nDCG & Gini & EPC & IC & TRecall & ARP \\
\midrule
item,q=0.05 & 0.1849 & 0.1517 & 0.1123 & 0.0734 & 22716 & 0.0205 & 167.61 \\
item,q=0.15 & 0.1845 & 0.1514 & 0.1128 & 0.0734 & 22817 & 0.0202 & 167.51 \\
user,q=0.05 & 0.1835 & 0.151 & 0.1099 & 0.0732 & 22253 & 0.0196 & 169.09 \\
user,q=0.15 & 0.1833 & 0.1506 & 0.1093 & 0.0731 & 22126 & 0.0195 & 169.71 \\
\bottomrule
\end{tabular}
\caption{GFCF results for `pre-exploration using Improved Jacard Index (soft)` on `gowalla` dataset}
\end{table}


Unnamed: 0,Params,Recall,nDCG,Gini,EPC,IC,TRecall,ARP
0,"item,q=0.05",0.1849,0.1517,0.1123,0.0734,22716,0.0205,167.61
0,"item,q=0.15",0.1845,0.1514,0.1128,0.0734,22817,0.0202,167.51
0,"user,q=0.05",0.1835,0.151,0.1099,0.0732,22253,0.0196,169.09
0,"user,q=0.15",0.1833,0.1506,0.1093,0.0731,22126,0.0195,169.71


In [21]:
get_results(
    ["2024_05_11"],
    [f"{YELP_2018_DATASET_NAME}{H3_SOFT_SUFFIX}"],
    GLOBAL_EXCLUDE_METRIC_LIST,
    [GFCF_DATASET_NAME],
    caption=GENERAL_CAPTION_TEMPLATE.format(H3_SOFT_METHOD_NAME, YELP_2018_DATASET_NAME),
    fix_params_f=basic_fix_params_f,
)

GFCF
\begin{table}[H]
\centering
\begin{tabular}{lllllrll}
\toprule
Params & Recall & nDCG & Gini & EPC & IC & TRecall & ARP \\
\midrule
item,q=0.05 & 0.0694 & 0.057 & 0.0431 & 0.0344 & 9772 & 0.0023 & 290.88 \\
item,q=0.15 & 0.0695 & 0.0571 & 0.0434 & 0.0344 & 9799 & 0.0023 & 290.45 \\
user,q=0.05 & 0.0695 & 0.0569 & 0.0427 & 0.0343 & 9658 & 0.0025 & 293.55 \\
user,q=0.15 & 0.0694 & 0.0568 & 0.0427 & 0.0342 & 9692 & 0.0024 & 294.62 \\
\bottomrule
\end{tabular}
\caption{GFCF results for `pre-exploration using Improved Jacard Index (soft)` on `yelp-2018` dataset}
\end{table}


Unnamed: 0,Params,Recall,nDCG,Gini,EPC,IC,TRecall,ARP
0,"item,q=0.05",0.0694,0.057,0.0431,0.0344,9772,0.0023,290.88
0,"item,q=0.15",0.0695,0.0571,0.0434,0.0344,9799,0.0023,290.45
0,"user,q=0.05",0.0695,0.0569,0.0427,0.0343,9658,0.0025,293.55
0,"user,q=0.15",0.0694,0.0568,0.0427,0.0342,9692,0.0024,294.62


In [22]:
get_results(
    ["2024_05_11"],
    [f"{AMAZON_BOOK_DATASET_NAME}{H3_SOFT_SUFFIX}"],
    GLOBAL_EXCLUDE_METRIC_LIST,
    [GFCF_DATASET_NAME],
    caption=GENERAL_CAPTION_TEMPLATE.format(H3_SOFT_METHOD_NAME, AMAZON_BOOK_DATASET_NAME),
    fix_params_f=basic_fix_params_f,
)

GFCF
\begin{table}[H]
\centering
\begin{tabular}{lllllrll}
\toprule
Params & Recall & nDCG & Gini & EPC & IC & TRecall & ARP \\
\midrule
item,q=0.05 & 0.0703 & 0.0578 & 0.2166 & 0.0326 & 69324 & 0.0168 & 153.7 \\
item,q=0.15 & 0.0697 & 0.0574 & 0.2199 & 0.0324 & 69927 & 0.0161 & 153.31 \\
user,q=0.05 & 0.0706 & 0.0581 & 0.2119 & 0.0328 & 68359 & 0.0173 & 154.91 \\
user,q=0.15 & 0.0704 & 0.058 & 0.2115 & 0.0327 & 68372 & 0.0172 & 155.26 \\
\bottomrule
\end{tabular}
\caption{GFCF results for `pre-exploration using Improved Jacard Index (soft)` on `amazon-book` dataset}
\end{table}


Unnamed: 0,Params,Recall,nDCG,Gini,EPC,IC,TRecall,ARP
0,"item,q=0.05",0.0703,0.0578,0.2166,0.0326,69324,0.0168,153.7
0,"item,q=0.15",0.0697,0.0574,0.2199,0.0324,69927,0.0161,153.31
0,"user,q=0.05",0.0706,0.0581,0.2119,0.0328,68359,0.0173,154.91
0,"user,q=0.15",0.0704,0.058,0.2115,0.0327,68372,0.0172,155.26


#### Hard

In [23]:
H3_HARD_METHOD_NAME = f"{H3_METHOD_NAME} (hard)"
H3_HARD_SUFFIX = H3_GENERAL_SUFFIX_TEMPLATE.format("hard")

In [24]:
get_results(
    ["2024_05_11"],
    [f"{GOWALLA_DATASET_NAME}{H3_HARD_SUFFIX}"],
    GLOBAL_EXCLUDE_METRIC_LIST,
    [GFCF_DATASET_NAME],
    caption=GENERAL_CAPTION_TEMPLATE.format(H3_HARD_METHOD_NAME, GOWALLA_DATASET_NAME),
    fix_params_f=basic_fix_params_f,
)

GFCF
\begin{table}[H]
\centering
\begin{tabular}{lllllrll}
\toprule
Params & Recall & nDCG & Gini & EPC & IC & TRecall & ARP \\
\midrule
item,q=0.05 & 0.1848 & 0.1519 & 0.11 & 0.0735 & 22282 & 0.0196 & 168.22 \\
item,q=0.15 & 0.1845 & 0.1518 & 0.1088 & 0.0735 & 22023 & 0.0189 & 168.6 \\
user,q=0.05 & 0.1847 & 0.1516 & 0.1082 & 0.0734 & 22055 & 0.0201 & 174.42 \\
user,q=0.15 & 0.1845 & 0.1513 & 0.1071 & 0.0732 & 21907 & 0.0198 & 178.31 \\
\bottomrule
\end{tabular}
\caption{GFCF results for `pre-exploration using Improved Jacard Index (hard)` on `gowalla` dataset}
\end{table}


Unnamed: 0,Params,Recall,nDCG,Gini,EPC,IC,TRecall,ARP
0,"item,q=0.05",0.1848,0.1519,0.11,0.0735,22282,0.0196,168.22
0,"item,q=0.15",0.1845,0.1518,0.1088,0.0735,22023,0.0189,168.6
0,"user,q=0.05",0.1847,0.1516,0.1082,0.0734,22055,0.0201,174.42
0,"user,q=0.15",0.1845,0.1513,0.1071,0.0732,21907,0.0198,178.31


In [25]:
get_results(
    ["2024_05_11"],
    [f"{YELP_2018_DATASET_NAME}{H3_HARD_SUFFIX}"],
    GLOBAL_EXCLUDE_METRIC_LIST,
    [GFCF_DATASET_NAME],
    caption=GENERAL_CAPTION_TEMPLATE.format(H3_HARD_METHOD_NAME, YELP_2018_DATASET_NAME),
    fix_params_f=basic_fix_params_f,
)

GFCF
\begin{table}[H]
\centering
\begin{tabular}{lllllrll}
\toprule
Params & Recall & nDCG & Gini & EPC & IC & TRecall & ARP \\
\midrule
item,q=0.05 & 0.0691 & 0.0567 & 0.0409 & 0.0341 & 9595 & 0.0022 & 293.32 \\
item,q=0.15 & 0.0691 & 0.0567 & 0.0402 & 0.0341 & 9418 & 0.0021 & 294.07 \\
user,q=0.05 & 0.0695 & 0.0571 & 0.0428 & 0.0344 & 9645 & 0.0025 & 291.63 \\
user,q=0.15 & 0.0694 & 0.057 & 0.0428 & 0.0343 & 9636 & 0.0025 & 291.79 \\
\bottomrule
\end{tabular}
\caption{GFCF results for `pre-exploration using Improved Jacard Index (hard)` on `yelp-2018` dataset}
\end{table}


Unnamed: 0,Params,Recall,nDCG,Gini,EPC,IC,TRecall,ARP
0,"item,q=0.05",0.0691,0.0567,0.0409,0.0341,9595,0.0022,293.32
0,"item,q=0.15",0.0691,0.0567,0.0402,0.0341,9418,0.0021,294.07
0,"user,q=0.05",0.0695,0.0571,0.0428,0.0344,9645,0.0025,291.63
0,"user,q=0.15",0.0694,0.057,0.0428,0.0343,9636,0.0025,291.79


In [26]:
get_results(
    ["2024_05_11"],
    [f"{AMAZON_BOOK_DATASET_NAME}{H3_HARD_SUFFIX}"],
    GLOBAL_EXCLUDE_METRIC_LIST,
    [GFCF_DATASET_NAME],
    caption=GENERAL_CAPTION_TEMPLATE.format(H3_HARD_METHOD_NAME, AMAZON_BOOK_DATASET_NAME),
    fix_params_f=basic_fix_params_f,
)

GFCF
\begin{table}[H]
\centering
\begin{tabular}{lllllrll}
\toprule
Params & Recall & nDCG & Gini & EPC & IC & TRecall & ARP \\
\midrule
item,q=0.05 & 0.0705 & 0.058 & 0.2115 & 0.0327 & 68652 & 0.0162 & 155.43 \\
item,q=0.15 & 0.0703 & 0.0578 & 0.2077 & 0.0325 & 68175 & 0.0154 & 156.61 \\
user,q=0.05 & 0.071 & 0.0584 & 0.2111 & 0.0329 & 68169 & 0.0174 & 155.55 \\
user,q=0.15 & 0.071 & 0.0584 & 0.2103 & 0.0329 & 68119 & 0.0174 & 158.6 \\
\bottomrule
\end{tabular}
\caption{GFCF results for `pre-exploration using Improved Jacard Index (hard)` on `amazon-book` dataset}
\end{table}


Unnamed: 0,Params,Recall,nDCG,Gini,EPC,IC,TRecall,ARP
0,"item,q=0.05",0.0705,0.058,0.2115,0.0327,68652,0.0162,155.43
0,"item,q=0.15",0.0703,0.0578,0.2077,0.0325,68175,0.0154,156.61
0,"user,q=0.05",0.071,0.0584,0.2111,0.0329,68169,0.0174,155.55
0,"user,q=0.15",0.071,0.0584,0.2103,0.0329,68119,0.0174,158.6


## Base

In [27]:
BASE_CAPTION_TEMPLATE = "Baseline results on `{}` dataset"

In [28]:
get_results(
    ["2024_04_05", "2024_04_06", "2024_04_07"],
    [GOWALLA_DATASET_NAME],
    GLOBAL_EXCLUDE_METRIC_LIST,
    caption=BASE_CAPTION_TEMPLATE.format(GOWALLA_DATASET_NAME)
)

\begin{table}[H]
\centering
\begin{tabular}{lllllrll}
\toprule
model & Recall & nDCG & Gini & EPC & IC & TRecall & ARP \\
\midrule
DGCF & 0.1735 & 0.1478 & 0.0525 & 0.0715 & 12094 & 0.0044 & 207.37 \\
GFCF & 0.1849 & 0.1518 & 0.1117 & 0.0735 & 22638 & 0.0204 & 167.85 \\
LightGCN & 0.1826 & 0.1545 & 0.0831 & 0.0744 & 17391 & 0.0065 & 184.74 \\
NGCF & 0.1565 & 0.1335 & 0.0696 & 0.0649 & 16069 & 0.0047 & 181.92 \\
SGL & 0.1772 & 0.1491 & 0.076 & 0.0722 & 13869 & 0.0089 & 159.28 \\
UltraGCN & 0.1796 & 0.1481 & 0.0828 & 0.071 & 18492 & 0.0156 & 220.25 \\
\bottomrule
\end{tabular}
\caption{Baseline results on `gowalla` dataset}
\end{table}


Unnamed: 0,model,Recall,nDCG,Gini,EPC,IC,TRecall,ARP
0,DGCF,0.1735,0.1478,0.0525,0.0715,12094,0.0044,207.37
0,GFCF,0.1849,0.1518,0.1117,0.0735,22638,0.0204,167.85
0,LightGCN,0.1826,0.1545,0.0831,0.0744,17391,0.0065,184.74
0,NGCF,0.1565,0.1335,0.0696,0.0649,16069,0.0047,181.92
0,SGL,0.1772,0.1491,0.076,0.0722,13869,0.0089,159.28
0,UltraGCN,0.1796,0.1481,0.0828,0.071,18492,0.0156,220.25


In [29]:
get_results(
    ["2024_04_07", "2024_04_08", "2024_04_09", "2024_04_10", "2024_04_11"], 
    [YELP_2018_DATASET_NAME],
    GLOBAL_EXCLUDE_METRIC_LIST,
    caption=BASE_CAPTION_TEMPLATE.format(YELP_2018_DATASET_NAME)
)

\begin{table}[H]
\centering
\begin{tabular}{lllllrll}
\toprule
model & Recall & nDCG & Gini & EPC & IC & TRecall & ARP \\
\midrule
DGCF & 0.0629 & 0.0511 & 0.0551 & 0.0308 & 11834 & 0.0004 & 282.13 \\
GFCF & 0.0697 & 0.0571 & 0.0428 & 0.0344 & 9683 & 0.0025 & 291.52 \\
LightGCN & 0.0629 & 0.0516 & 0.0851 & 0.0311 & 15426 & 0.0007 & 244.2 \\
NGCF & 0.0558 & 0.0455 & 0.0838 & 0.0274 & 15413 & 0.0006 & 226.32 \\
SGL & 0.0669 & 0.0552 & 0.0533 & 0.0331 & 8655 & 0.0006 & 241.07 \\
UltraGCN & 0.0672 & 0.0553 & 0.0933 & 0.0334 & 17453 & 0.002 & 235.93 \\
\bottomrule
\end{tabular}
\caption{Baseline results on `yelp-2018` dataset}
\end{table}


Unnamed: 0,model,Recall,nDCG,Gini,EPC,IC,TRecall,ARP
0,DGCF,0.0629,0.0511,0.0551,0.0308,11834,0.0004,282.13
0,GFCF,0.0697,0.0571,0.0428,0.0344,9683,0.0025,291.52
0,LightGCN,0.0629,0.0516,0.0851,0.0311,15426,0.0007,244.2
0,NGCF,0.0558,0.0455,0.0838,0.0274,15413,0.0006,226.32
0,SGL,0.0669,0.0552,0.0533,0.0331,8655,0.0006,241.07
0,UltraGCN,0.0672,0.0553,0.0933,0.0334,17453,0.002,235.93


In [30]:
get_results(
    ["2024_04_11", "2024_04_19", "2024_04_20", "2024_04_21", "2024_04_22", "2024_04_30"],
    [AMAZON_BOOK_DATASET_NAME],
    GLOBAL_EXCLUDE_METRIC_LIST,
    caption=BASE_CAPTION_TEMPLATE.format(YELP_2018_DATASET_NAME)
)

\begin{table}[H]
\centering
\begin{tabular}{lllllrll}
\toprule
model & Recall & nDCG & Gini & EPC & IC & TRecall & ARP \\
\midrule
DGCF & 0.0379 & 0.0294 & 0.0537 & 0.017 & 28995 & 0.0014 & 273.3 \\
GFCF & 0.071 & 0.0584 & 0.2129 & 0.0329 & 68393 & 0.0174 & 154.08 \\
LightGCN & 0.0419 & 0.0323 & 0.0737 & 0.0185 & 34102 & 0.0019 & 232.37 \\
NGCF & 0.0322 & 0.0248 & 0.1372 & 0.0143 & 52177 & 0.0023 & 164.14 \\
SGL & 0.0474 & 0.0372 & 0.0812 & 0.0213 & 30601 & 0.0031 & 190.64 \\
UltraGCN & 0.0688 & 0.0561 & 0.1555 & 0.032 & 49095 & 0.017 & 149.96 \\
\bottomrule
\end{tabular}
\caption{Baseline results on `yelp-2018` dataset}
\end{table}


Unnamed: 0,model,Recall,nDCG,Gini,EPC,IC,TRecall,ARP
0,DGCF,0.0379,0.0294,0.0537,0.017,28995,0.0014,273.3
0,GFCF,0.071,0.0584,0.2129,0.0329,68393,0.0174,154.08
0,LightGCN,0.0419,0.0323,0.0737,0.0185,34102,0.0019,232.37
0,NGCF,0.0322,0.0248,0.1372,0.0143,52177,0.0023,164.14
0,SGL,0.0474,0.0372,0.0812,0.0213,30601,0.0031,190.64
0,UltraGCN,0.0688,0.0561,0.1555,0.032,49095,0.017,149.96
