In [1]:
# Load the autoreload extension
%load_ext autoreload

# Automatically reload modules before executing code
%autoreload 2

In [2]:
from pathlib import Path
from benchmark_src.results_processing.plots.plot_utils import get_task_df, get_list_of_all_runs
from benchmark_src.results_processing import ranking
from benchmark_src.results_processing.plots import table_retrieval_plots
import pandas as pd

In [3]:
results_folder = Path("../../results")
assert results_folder.exists(), f"Could not find results folder at {results_folder}"

In [4]:
task_df = get_task_df(results_folder=results_folder, task_name="column_similarity_search")

Unique datasets (column_similarity_search): 5


In [5]:
task_df.groupby(['task', 'Approach', 'Configuration'])['dataset'].nunique().reset_index(name='num_datasets')

Unnamed: 0,task,Approach,Configuration,num_datasets
0,column_similarity_search,hytrel,hytrel,5
1,column_similarity_search,sap_rpt_oss,"bagging=1,max_context_size=2048,predML_based_o...",5
2,column_similarity_search,sentence_transformer,"embedding_model=all-MiniLM-L6-v2,table_row_lim...",5
3,column_similarity_search,tabicl,"n_estimators=32,predML_based_on=custom_predict...",5


## Configure which approaches to include

In [6]:
# get list as ouptput and copy it to the next cell to comment out approaches not to be plotted
get_list_of_all_runs(task_df)

include_runs = [
    ('hytrel', 'hytrel'),
    ('sap_rpt_oss', 'bagging=1,max_context_size=2048,predML_based_on=custom_predictiveML_model'),
    ('sentence_transformer', 'embedding_model=all-MiniLM-L6-v2,table_row_limit=100'),
    ('tabicl', 'n_estimators=32,predML_based_on=custom_predictiveML_model'),
]


In [7]:
include_runs = [
    ('hytrel', 'hytrel'),
    ('sap_rpt_oss', 'bagging=1,max_context_size=2048,predML_based_on=custom_predictiveML_model'),
    ('sentence_transformer', 'embedding_model=all-MiniLM-L6-v2,table_row_limit=100'),
    ('tabicl', 'n_estimators=32,predML_based_on=custom_predictiveML_model'),
]

In [8]:
# Use the list to filter the original task_df
filtered_task_df = task_df[
    task_df.set_index(['Approach', 'Configuration']).index.isin(include_runs)
]

In [17]:
filtered_task_df.columns

Index(['Approach', 'Configuration', 'task', 'dataset', 'MRR_mean',
       'MRR_std_mean', 'MAP_mean', 'MAP_std_mean', 'Precision_mean',
       'Precision_std_mean', 'Recall_mean', 'Recall_std_mean', '# Runs'],
      dtype='object')

In [18]:
# only keep columns 'Approach', 'Configuration', 'task', 'dataset', 'MRR_mean', 'MRR_std_mean', 'MAP_mean', 'MAP_std_mean', 'Precision_mean', 'Precision_std_mean', 'Recall_mean', 'Recall_std_mean', '# Runs'
filtered_task_df = filtered_task_df[['Approach', 'Configuration', 'task', 'dataset',  "MRR_mean", 'MRR_std_mean', 'MAP_mean', 'MAP_std_mean', 'Precision_mean', 'Precision_std_mean', 'Recall_mean', 'Recall_std_mean', '# Runs']]   

In [30]:
#filtered_task_df

In [31]:
# create results table, dataset names as rows, approaches as columns (order is hytrel, sap_rpt_oss, sentence_transformer, tabicl)

# pivot the dataframe to have datasets as rows and approaches as columns
results_table = filtered_task_df.pivot_table(
    index='dataset',
    columns=['Approach', 'Configuration'],
    values='MRR_mean',
    aggfunc='mean'
)

# flatten the multi-level columns
results_table.columns = [f"{approach} ({config})" for approach, config in results_table.columns]

# row order should be nextia, valentine, opendata, wikijoin-small, autojoin
results_table = results_table.reindex(['nextia', 'valentine', 'opendata', 'wikijoin_small', 'autojoin'])

# rename columns (order is hytrel, sap_rpt_oss, sentence_transformer, tabicl)
results_table.columns = ['hytrel', 'sap_rpt_oss', 'sentence_transformer', 'tabicl']

In [32]:
results_table

Unnamed: 0_level_0,hytrel,sap_rpt_oss,sentence_transformer,tabicl
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
nextia,0.36413,0.005435,0.396739,0.0
valentine,0.264722,9.9e-05,0.613393,0.013523
opendata,0.429365,0.19983,0.601587,0.0
wikijoin_small,0.743206,0.192079,0.939667,0.0
autojoin,0.0,0.0,0.0,0.0
