In [1]:
import pandas as pd
import os
import sys


# make possible to import from src
notebook_dir = os.getcwd()
src_path = os.path.abspath(os.path.join(notebook_dir, '..', '..', 'src'))
if src_path not in sys.path:
    sys.path.append(src_path)

from my_secrets import my_path


def assign_experiment_name(group):
    group = group.copy()
    group['experiment_name'] = ['main_experiment_1'] + ['variance_test_experiment_1'] * (len(group) - 1)
    return group


def assign_total_time_outside_user_cb(row):
    if pd.notnull(row['total_time_in_user_cb']):
        return row['Runtime'] - row['total_time_in_user_cb']
    else:
        return None

In [2]:
# * specify what you want to prepare
# only one at a time
do_test_out = False
do_experiment_1 = True
do_preliminary_tests = False
do_list = [do_test_out, do_experiment_1, do_preliminary_tests]
# assert that only one do is turend True
assert len([do for do in do_list if do]) == 1

if do_test_out:
    df = pd.read_pickle(my_path + 'visualizations/data_frames/processed_test_out.pkl')
if do_experiment_1:
    df = pd.read_pickle(my_path + 'visualizations/data_frames/processed_exp_1.pkl')
if do_preliminary_tests:
    df = pd.read_pickle(my_path + 'visualizations/data_frames/processed_prelims.pkl')
df.head()

Unnamed: 0,ModelName,instance_name,solving_technique,file_created_at,model.Status,NodeCount,IterCount,Runtime,Work,SolCount,...,raw_time,callback_call_count,benders_started_count,total_time_in_user_cb,total_num_cuts,check_1,check_2,check_3,real_obj_val,output_file_name
0,Disjunctive-Programming,chr12c.dat,dp,2024-05-31_00-53.,2,96271.0,1786167.0,67.354129,33.576222,10,...,68.204944,370039.0,22.0,22.778581,114.0,True,False,False,1.0,output_2024-05-31_00-53_r1.txt
1,Disjunctive-Programming,esc16g.dat,dp,2024-05-31_00-53.,2,105924.0,1151998.0,57.075964,32.240647,7,...,59.722019,334333.0,7.0,20.261192,36.0,True,False,False,1.0,output_2024-05-31_00-53_r2.txt
2,Kaufman-Broeckx,esc16g.dat,kbl,2024-05-31_00-53.,2,2248.0,31511.0,2.465137,2.400171,7,...,5.494323,,,,,True,True,True,,output_2024-05-31_00-53_r3.txt
3,Kaufman-Broeckx,chr12c.dat,kbl,2024-05-31_00-53.,2,16559.0,436221.0,26.991214,21.796962,10,...,27.96675,,,,,True,False,False,1.0,output_2024-05-31_00-53_r4.txt
4,Disjunctive-Programming,chr15a.dat,dp,2024-05-31_00-56.,2,91196.0,2358024.0,155.236369,94.532177,10,...,157.275981,361290.0,21.0,50.803567,129.0,True,False,False,9.0,output_2024-05-31_00-56_r1.txt


In [3]:
# distinguish between variance_test and main_experiment_1 variance test
df = df.groupby(['instance_name', 'solving_technique']).apply(assign_experiment_name).reset_index(drop=True)
df.head()

Unnamed: 0,ModelName,instance_name,solving_technique,file_created_at,model.Status,NodeCount,IterCount,Runtime,Work,SolCount,...,callback_call_count,benders_started_count,total_time_in_user_cb,total_num_cuts,check_1,check_2,check_3,real_obj_val,output_file_name,experiment_name
0,Disjunctive-Programming,chr12c.dat,dp,2024-05-31_00-53.,2,96271.0,1786167.0,67.354129,33.576222,10,...,370039.0,22.0,22.778581,114.0,True,False,False,1.0,output_2024-05-31_00-53_r1.txt,main_experiment_1
1,Disjunctive-Programming,esc16g.dat,dp,2024-05-31_00-53.,2,105924.0,1151998.0,57.075964,32.240647,7,...,334333.0,7.0,20.261192,36.0,True,False,False,1.0,output_2024-05-31_00-53_r2.txt,main_experiment_1
2,Kaufman-Broeckx,esc16g.dat,kbl,2024-05-31_00-53.,2,2248.0,31511.0,2.465137,2.400171,7,...,,,,,True,True,True,,output_2024-05-31_00-53_r3.txt,main_experiment_1
3,Kaufman-Broeckx,chr12c.dat,kbl,2024-05-31_00-53.,2,16559.0,436221.0,26.991214,21.796962,10,...,,,,,True,False,False,1.0,output_2024-05-31_00-53_r4.txt,main_experiment_1
4,Disjunctive-Programming,chr15a.dat,dp,2024-05-31_00-56.,2,91196.0,2358024.0,155.236369,94.532177,10,...,361290.0,21.0,50.803567,129.0,True,False,False,9.0,output_2024-05-31_00-56_r1.txt,main_experiment_1


In [4]:
# add column time outside user callback
df['total_time_outside_user_cb'] = df.apply(assign_total_time_outside_user_cb, axis=1)
df.head()

Unnamed: 0,ModelName,instance_name,solving_technique,file_created_at,model.Status,NodeCount,IterCount,Runtime,Work,SolCount,...,benders_started_count,total_time_in_user_cb,total_num_cuts,check_1,check_2,check_3,real_obj_val,output_file_name,experiment_name,total_time_outside_user_cb
0,Disjunctive-Programming,chr12c.dat,dp,2024-05-31_00-53.,2,96271.0,1786167.0,67.354129,33.576222,10,...,22.0,22.778581,114.0,True,False,False,1.0,output_2024-05-31_00-53_r1.txt,main_experiment_1,44.575548
1,Disjunctive-Programming,esc16g.dat,dp,2024-05-31_00-53.,2,105924.0,1151998.0,57.075964,32.240647,7,...,7.0,20.261192,36.0,True,False,False,1.0,output_2024-05-31_00-53_r2.txt,main_experiment_1,36.814772
2,Kaufman-Broeckx,esc16g.dat,kbl,2024-05-31_00-53.,2,2248.0,31511.0,2.465137,2.400171,7,...,,,,True,True,True,,output_2024-05-31_00-53_r3.txt,main_experiment_1,
3,Kaufman-Broeckx,chr12c.dat,kbl,2024-05-31_00-53.,2,16559.0,436221.0,26.991214,21.796962,10,...,,,,True,False,False,1.0,output_2024-05-31_00-53_r4.txt,main_experiment_1,
4,Disjunctive-Programming,chr15a.dat,dp,2024-05-31_00-56.,2,91196.0,2358024.0,155.236369,94.532177,10,...,21.0,50.803567,129.0,True,False,False,9.0,output_2024-05-31_00-56_r1.txt,main_experiment_1,104.432802


In [5]:

if do_test_out:
    df.to_pickle(my_path + 'visualizations/data_frames/prepared_test_out.pkl')
if do_experiment_1:
    df.to_pickle(my_path + 'visualizations/data_frames/prepared_exp_1.pkl')
if do_preliminary_tests:
    df.to_pickle(my_path + 'visualizations/data_frames/prepared_prelims.pkl')