In [1]:
import os
import pandas as pd
import json
import tqdm
import numpy as np
import re

# Read Data

In [2]:
records = os.listdir("multi_run_server_json/")
records = [_ for _ in records if _.split(".")[-1]=='json']
records = sorted(records)
records

['ARFC_adult_run_1.json',
 'ARFC_adult_run_2.json',
 'ARFC_adult_run_3.json',
 'ARFC_adult_run_4.json',
 'ARFC_adult_run_5.json',
 'ARFC_electricity_run_1.json',
 'ARFC_electricity_run_2.json',
 'ARFC_electricity_run_3.json',
 'ARFC_electricity_run_4.json',
 'ARFC_electricity_run_5.json',
 'ARFC_forest_cover_run_1.json',
 'ARFC_forest_cover_run_2.json',
 'ARFC_forest_cover_run_3.json',
 'ARFC_forest_cover_run_4.json',
 'ARFC_forest_cover_run_5.json',
 'ARFC_hyperplane_high_gradual_drift_run_1.json',
 'ARFC_hyperplane_high_gradual_drift_run_2.json',
 'ARFC_hyperplane_high_gradual_drift_run_3.json',
 'ARFC_hyperplane_high_gradual_drift_run_4.json',
 'ARFC_hyperplane_high_gradual_drift_run_5.json',
 'ARFC_insects_run_1.json',
 'ARFC_insects_run_2.json',
 'ARFC_insects_run_3.json',
 'ARFC_insects_run_4.json',
 'ARFC_insects_run_5.json',
 'ARFC_movingRBF_run_1.json',
 'ARFC_movingRBF_run_2.json',
 'ARFC_movingRBF_run_3.json',
 'ARFC_movingRBF_run_4.json',
 'ARFC_movingRBF_run_5.json',
 'ARF

In [3]:
df_raw = pd.DataFrame()

In [4]:
for record in tqdm.tqdm(records):
    # To read the dictionary from the JSON file
    with open(f'multi_run_server_json/{record}', 'r') as json_file:
        loaded_record = json.load(json_file)
    
    #if loaded_record['model'] in ['AutoStreamML','OnlineAutoML','EvoAutoML','Chcha']:
    rec = {"run":record.split("_")[-1][0],
            "model":loaded_record['model'],
           "dataset":loaded_record['dataset'],
          "prequential_scores":loaded_record['prequential_scores'][-1],
          "time":abs(sum([i for i in loaded_record['time']]))}
    try:   
        rec["memory"] = abs(sum([i/1024/1024  for i in loaded_record['memory']]))
    except:
        rec["memory"] = abs(sum([i/1024/1024  for i in loaded_record['memeory']]))
    df_raw = df_raw.append(rec,ignore_index=True)

100%|██████████| 560/560 [12:22<00:00,  1.33s/it]  


In [5]:
df = df_raw.copy()

In [6]:
df

Unnamed: 0,run,model,dataset,prequential_scores,time,memory
0,1,ARFC,adult,0.817145,79.857509,18.027344
1,2,ARFC,adult,0.816408,88.500604,20.464844
2,3,ARFC,adult,0.813153,79.812694,17.750000
3,4,ARFC,adult,0.811822,81.136639,15.285156
4,5,ARFC,adult,0.809037,89.699469,23.972656
...,...,...,...,...,...,...
555,1,SRPC,vehicle_sensIT,0.784782,2296.645563,207.492188
556,2,SRPC,vehicle_sensIT,0.779240,1983.974548,159.156250
557,3,SRPC,vehicle_sensIT,0.780296,2148.411239,143.398438
558,4,SRPC,vehicle_sensIT,0.775678,2055.873046,133.304688


In [7]:
#df[df['dataset']=='vehicle_sensIT']

In [8]:
#df.to_csv('saved_cd_results_csv/example_all_new.csv',index=False)

# Score

In [9]:
#models = ['AutoStreamML', 'ChCha', 'EvoAutoML','HATC','OnlineAutoML','ARFC','SRPC']
models =['AutoStreamML','AutoStreamML_Best','AutoClass','OnlineAutoML','EvoAutoML','ARFC','SRPC','HATC']

In [10]:
dataset_name_list = ['adult',
                     'electricity',
                     'forest_cover',
                     'insects',
                     'new_airlines',
                     'shuttle',
                     'vehicle_sensIT',
                     'hyperplane_high_gradual_drift',
                     'movingRBF',
                     'moving_squares',
                     'sea_high_abrupt_drift',
                     'sea_high_mixed_drift',
                     'synth_RandomRBFDrift',
                     'synth_agrawal']

# Creating Table

In [11]:
#columns_order = ['AutoStreamML','EvoAutoML','OnlineAutoML','ChCha','ARFC','SRPC','HATC']
columns_order =['AutoStreamML','AutoStreamML_Best','AutoClass','OnlineAutoML','EvoAutoML','ARFC','SRPC','HATC']

In [12]:
columns_rename = ['ASML_E','ASML_B','AutoClass','OAML','EAML','ARFC','SRPC','HATC']

In [13]:
temp_df = df.groupby(['model','dataset'])['prequential_scores'].aggregate(['mean','std'])
temp_df.reset_index(inplace=True)

In [14]:
temp_df

Unnamed: 0,model,dataset,mean,std
0,ARFC,adult,0.813513,0.003339
1,ARFC,electricity,0.858408,0.002463
2,ARFC,forest_cover,0.888535,0.003993
3,ARFC,hyperplane_high_gradual_drift,0.757298,0.001618
4,ARFC,insects,0.686138,0.004787
...,...,...,...,...
107,SRPC,sea_high_mixed_drift,0.817319,0.031955
108,SRPC,shuttle,0.994755,0.000777
109,SRPC,synth_RandomRBFDrift,0.562174,0.007706
110,SRPC,synth_agrawal,0.981910,0.015721


# SCORE MEAN±STD

In [15]:
temp_df['score'] = [f"{mean:.2f}±{std:.2f}" for mean, std in zip(temp_df['mean'] * 100, temp_df['std'] * 100)]

In [16]:
temp_df = temp_df.pivot_table(index='dataset',columns='model',values='score',aggfunc='first').reset_index(drop=False).reset_index(drop=True)
temp_df.columns.name=None
temp_df = temp_df.set_index('dataset')
temp_df = temp_df.reindex(dataset_name_list)
temp_df.index = [" ".join(i.split("_")).title() for i in temp_df.index]
temp_df = temp_df[columns_order]
temp_df.columns = columns_rename
temp_df

Unnamed: 0,ASML_E,ASML_B,AutoClass,OAML,EAML,ARFC,SRPC,HATC
Adult,80.01±0.61,80.36±0.27,76.86±1.28,72.07±0.44,80.56±1.29,81.35±0.33,80.11±0.30,81.64±0.27
Electricity,91.50±0.12,90.65±0.26,87.98±1.16,86.96±0.49,89.13±0.44,85.84±0.25,86.63±0.16,83.19±0.32
Forest Cover,95.63±0.07,95.39±0.18,95.32±0.02,83.16±0.52,94.07±0.06,88.85±0.40,92.97±0.09,70.99±1.48
Insects,70.95±0.46,71.25±0.28,64.24±0.25,63.69±0.27,70.05±1.66,68.61±0.48,68.60±0.51,60.25±1.50
New Airlines,66.58±0.09,65.46±0.05,63.03±0.48,67.03±0.49,67.64±0.34,65.31±0.09,64.53±0.17,65.27±0.10
Shuttle,99.34±0.07,98.58±0.11,99.66±0.03,97.31±0.18,98.65±0.25,99.54±0.07,99.48±0.08,94.57±0.69
Vehicle Sensit,79.64±0.83,75.70±0.67,73.73±0.16,73.11±0.25,79.11±1.74,75.44±0.50,78.10±0.39,75.38±0.27
Hyperplane High Gradual Drift,91.85±0.03,91.56±0.03,75.78±0.11,91.27±0.53,87.69±3.35,75.73±0.16,71.95±0.83,84.88±0.13
Movingrbf,88.00±0.34,86.82±0.17,85.11±0.70,67.23±0.31,83.20±2.35,51.27±0.32,49.62±0.67,39.36±0.40
Moving Squares,98.21±0.35,98.61±0.20,88.69±0.15,88.34±0.69,87.12±3.02,59.59±1.98,75.44±1.10,80.75±0.48


In [17]:
def add_mean_row_to_dataframe(df):
    # Calculate the mean for each column and create a DataFrame for the mean values
    mean_values = df.mean().to_frame().transpose()
    
    # Append the mean values DataFrame to the original DataFrame
    df_with_mean = df.append(mean_values, ignore_index=False)
    
    df_with_mean.rename({df_with_mean.index[-1]: 'Mean'}, inplace=True)
    
    return df_with_mean

def add_mean_rank_row_to_dataframe(df,ascending=True):
    
    df_temp = df[:-1].rank(axis=1,ascending=ascending)
    # Calculate the mean for each column and create a DataFrame for the mean values
    mean_values = df_temp.mean().to_frame().transpose()
    
    # Append the mean values DataFrame to the original DataFrame
    df_with_mean = df.append(mean_values, ignore_index=False)
    
    df_with_mean.rename({df_with_mean.index[-1]: 'Mean_Rank'}, inplace=True)
    
    return df_with_mean

# Score

In [18]:
dataset_name_list +=['Mean','Mean_Rank']

In [19]:
temp_df = df.pivot_table(index='dataset',columns='model',values='prequential_scores',aggfunc='mean').reset_index(drop=False).reset_index(drop=True)
temp_df.columns.name=None
temp_df = temp_df.set_index('dataset')
temp_df = add_mean_row_to_dataframe(temp_df).multiply(100)
temp_df = add_mean_rank_row_to_dataframe(temp_df,ascending=False)
temp_df = temp_df.reindex(dataset_name_list)
temp_df.index = [" ".join(i.split("_")).title() for i in temp_df.index]
temp_df = temp_df[columns_order]
temp_df.columns = columns_rename
temp_df.style.highlight_max(color = 'yellow', axis = 1)

Unnamed: 0,ASML_E,ASML_B,AutoClass,OAML,EAML,ARFC,SRPC,HATC
Adult,80.009418,80.355432,76.862536,72.073032,80.556898,81.351296,80.11138,81.635887
Electricity,91.500706,90.645304,87.977578,86.957407,89.133121,85.840837,86.634887,83.189001
Forest Cover,95.632414,95.388288,95.31545,83.157048,94.065493,88.853483,92.965963,70.98979
Insects,70.952165,71.245837,64.238949,63.686245,70.054874,68.61376,68.595595,60.246745
New Airlines,66.576106,65.460869,63.026421,67.02869,67.635131,65.305692,64.533068,65.272098
Shuttle,99.34,98.583793,99.664828,97.31328,98.645517,99.541379,99.475517,94.565172
Vehicle Sensit,79.643959,75.69564,73.731934,73.112975,79.110101,75.443123,78.09577,75.378776
Hyperplane High Gradual Drift,91.85268,91.558,75.78372,91.274914,87.6868,75.72976,71.95376,84.87512
Movingrbf,88.0007,86.8222,85.1102,67.231572,83.2,51.2673,49.6177,39.3578
Moving Squares,98.2131,98.6109,88.6865,88.340983,87.1183,59.5883,75.444477,80.7522


# TIME

In [20]:
temp_df = df.pivot_table(index='dataset',columns='model',values='time',aggfunc='mean').reset_index(drop=False).reset_index(drop=True)
temp_df.columns.name=None
temp_df = temp_df.set_index('dataset')
temp_df = add_mean_row_to_dataframe(temp_df)
temp_df = add_mean_rank_row_to_dataframe(temp_df,ascending=True)
temp_df = temp_df.reindex(dataset_name_list)
temp_df.index = [" ".join(i.split("_")).title() for i in temp_df.index]
temp_df = temp_df[columns_order]
temp_df.columns = columns_rename
temp_df.style.highlight_min(color = 'yellow', axis = 1)

Unnamed: 0,ASML_E,ASML_B,AutoClass,OAML,EAML,ARFC,SRPC,HATC
Adult,267.645828,162.932108,642.177901,94.820077,624.771619,83.801383,188.412519,46.76303
Electricity,118.286254,88.093301,621.687502,2444.210254,281.305515,53.310854,142.163612,27.195322
Forest Cover,8243.766147,3783.625193,12000.874686,19989.76102,6921.744572,1225.470848,6717.351377,1751.638008
Insects,977.698078,422.639361,1782.637241,798.72511,1552.306368,113.817312,626.203494,124.689219
New Airlines,2674.950077,1772.958102,5311.560684,3145.301892,3404.492021,749.642307,1824.013838,324.858694
Shuttle,693.616411,328.568572,504.780141,400.497986,499.644658,62.870735,150.594469,49.254889
Vehicle Sensit,4028.786397,1548.139459,3312.301328,201.941889,9193.06433,283.386112,2090.456466,434.709332
Hyperplane High Gradual Drift,1335.765805,1001.757439,4229.894579,4081.310597,3137.776158,874.794145,1913.355277,391.905536
Movingrbf,1544.654801,876.98326,1862.355546,2554.011037,2001.402843,340.837484,1084.485961,186.074948
Moving Squares,681.170153,442.1294,2252.658359,11383.016381,1069.368911,264.430284,271.948976,93.830947


# MEMORY

In [21]:
temp_df = df.pivot_table(index='dataset',columns='model',values='memory',aggfunc='mean').reset_index(drop=False).reset_index(drop=True)
temp_df.columns.name=None
temp_df = temp_df.set_index('dataset')
temp_df = add_mean_row_to_dataframe(temp_df)
temp_df = add_mean_rank_row_to_dataframe(temp_df,ascending=True)
temp_df = temp_df.reindex(dataset_name_list)
temp_df.index = [" ".join(i.split("_")).title() for i in temp_df.index]
temp_df = temp_df[columns_order]
temp_df.columns = columns_rename
temp_df.style.highlight_min(color = 'yellow', axis = 1)

Unnamed: 0,ASML_E,ASML_B,AutoClass,OAML,EAML,ARFC,SRPC,HATC
Adult,10.496094,9.019531,254.028125,9.0625,52.623438,19.1,24.841406,3.553125
Electricity,5.117188,5.360938,43.003125,18.257813,4.821094,6.108594,14.901563,2.364844
Forest Cover,64.940625,58.426563,81.053906,48.898438,64.170312,37.497656,90.740625,36.039844
Insects,15.669531,11.023438,229.20625,17.453125,13.479688,5.221875,8.325781,3.621094
New Airlines,60.992969,51.655469,329.980469,62.402345,61.682031,44.926563,64.473438,31.4875
Shuttle,16.030469,10.292969,9.069531,8.3125,5.735938,4.190625,5.989063,2.925781
Vehicle Sensit,17.601562,17.1375,193.222656,45.308594,386.013281,44.394531,163.1125,10.807031
Hyperplane High Gradual Drift,34.450781,33.077344,36.664844,56.070315,67.117969,98.189844,128.392969,29.534375
Movingrbf,14.475,13.953125,78.752344,4.281251,14.185937,16.475781,24.479688,10.642188
Moving Squares,16.6375,15.914062,14.340625,8.789063,12.625,17.279688,10.432813,10.238281
