In [1]:
import base64
import json
import pandas as pd
import numpy as np
preprocesses = (
    'none',
    'ae_standardization',
    'ae_normalization',
    'standardization',
    'normalization'
)
layers = (
    [0],
    [20, 10, 5],
    [20, 15, 10],
    [20, 15, 10, 5]
)
models = ('lr', 'svm', 'rf', 'mp', 'lgb')

datasets = ('kdd99', 'kdd99_dropped', 'creditcardfraud', 'ecoli', 'optical_digits', 'satimage', 'pen_digits', 'abalone', 'sick_euthyroid',
              'spectrometer', 'car_eval_34', 'isolet', 'us_crime', 'yeast_ml8', 'scene', 'libras_move', 'thyroid_sick', 'coil_2000',
              'arrhythmia', 'solar_flare_m0', 'oil', 'car_eval_4', 'wine_quality', 'letter_img', 'yeast_me2', 'webpage',
              'ozone_level', 'mammography', 'protein_homo', 'abalone_19')

used_classes = ('all', 'minority', 'majority')


def gen_hash(preprocess, layer, model, dataset, used_class, optuna) -> str:
    return base64.b64encode("".join([
        str(preprocess),
        str(layer),
        str(model),
        str(dataset),
        str(used_class),
        str(optuna)
    ]).encode()).decode()

In [2]:
with open("results/results.json") as f:
        data = json.load(f)
results = dict()
for result in data:
    results[result['hash']] = {
        'minority': result['result']['minority']['f1'],
        'macro': result['result']['macro']['f1'],
    }

In [3]:
results[gen_hash('none', [0], 'lr','kdd99', 'all', False)]

{'minority': 0.0, 'macro': 0.4017}

In [4]:
pp = preprocesses[0]
mdl = models[0]
aeclass = used_classes[0]
minority_df = pd.DataFrame({
    dataset: {
        str(layer): results[gen_hash(pp, layer, mdl, dataset, aeclass, False)]['minority']
        for layer in layers
    } for dataset in datasets
})
macro_df = pd.DataFrame({
    dataset: {
        str(layer): results[gen_hash(pp, layer, mdl, dataset, aeclass, False)]['macro']
        for layer in layers
    } for dataset in datasets
})

In [5]:
minority_df.T

Unnamed: 0,[0],"[20, 10, 5]","[20, 15, 10]","[20, 15, 10, 5]"
kdd99,0.0,0.0,0.0,0.0
kdd99_dropped,0.0,0.0,0.0,0.0
creditcardfraud,0.6807,0.6828,0.7014,0.6759
ecoli,0.0556,0.0556,0.0556,0.0556
optical_digits,0.8256,0.8355,0.8338,0.8415
satimage,0.0427,0.0516,0.0427,0.0512
pen_digits,0.78,0.8067,0.7965,0.8251
abalone,0.0,0.0,0.0,0.0
sick_euthyroid,0.69,0.7042,0.7179,0.6722
spectrometer,0.8335,0.8214,0.7885,0.7574


In [8]:
from visualization.result_table import ResultTable
t = ResultTable('lr', False, 'なし', 'なし', 'all',1)
t.aggregate_results()
with open(f"thesis/tables/{t.label}.tex", "w") as f:
    f.write(t.compile())
t.label

'lr|none|all|0'