In [5]:
import json

In [6]:
def format_float(value):
    # check for nan
    if value != value:
        return '   NaN'
    return "{:+.3f}".format(value)

In [7]:
experiment_1 = 'saves/baseline/'
experiment_2 = 'path_autoencoder/saves/path_autoencoded/'

report_path = 'classification_report.txt'

In [8]:
# load both dicts

experiment_1_dict = json.load(open(experiment_1 + report_path))
experiment_2_dict = json.load(open(experiment_2 + report_path))

In [9]:
# compare both dicts

e1_m_precision = experiment_1_dict['macro avg']['precision']
e1_m_recall = experiment_1_dict['macro avg']['recall']
e1_m_f1 = experiment_1_dict['macro avg']['f1-score']
e1_w_precision = experiment_1_dict['weighted avg']['precision']
e1_w_recall = experiment_1_dict['weighted avg']['recall']
e1_w_f1 = experiment_1_dict['weighted avg']['f1-score']

e2_m_precision = experiment_2_dict['macro avg']['precision']
e2_m_recall = experiment_2_dict['macro avg']['recall']
e2_m_f1 = experiment_2_dict['macro avg']['f1-score']
e2_w_precision = experiment_2_dict['weighted avg']['precision']
e2_w_recall = experiment_2_dict['weighted avg']['recall']
e2_w_f1 = experiment_2_dict['weighted avg']['f1-score']

# remove macro avg, weighted avg and accuracy
del experiment_1_dict['macro avg']
del experiment_1_dict['weighted avg']
del experiment_1_dict['accuracy']
del experiment_2_dict['macro avg']
del experiment_2_dict['weighted avg']
del experiment_2_dict['accuracy']

In [10]:
# print all metrics before and after
print('Before:')
print('Experiment 1 macro precision: ' + format_float(e1_m_precision))
print('Experiment 1 macro recall: ' + format_float(e1_m_recall))
print('Experiment 1 macro f1: ' + format_float(e1_m_f1))
print('Experiment 1 weighted precision: ' + format_float(e1_w_precision))
print('Experiment 1 weighted recall: ' + format_float(e1_w_recall))
print('Experiment 1 weighted f1: ' + format_float(e1_w_f1))
print('Experiment 2 macro precision: ' + format_float(e2_m_precision))
print('Experiment 2 macro recall: ' + format_float(e2_m_recall))
print('Experiment 2 macro f1: ' + format_float(e2_m_f1))
print('Experiment 2 weighted precision: ' + format_float(e2_w_precision))
print('Experiment 2 weighted recall: ' + format_float(e2_w_recall))
print('Experiment 2 weighted f1: ' + format_float(e2_w_f1))



Before:
Experiment 1 macro precision: +0.703
Experiment 1 macro recall: +0.770
Experiment 1 macro f1: +0.669
Experiment 1 weighted precision: +0.923
Experiment 1 weighted recall: +0.884
Experiment 1 weighted f1: +0.889
Experiment 2 macro precision: +0.838
Experiment 2 macro recall: +0.866
Experiment 2 macro f1: +0.801
Experiment 2 weighted precision: +0.986
Experiment 2 weighted recall: +0.973
Experiment 2 weighted f1: +0.976


In [11]:
# print difference in metrics
print(f'macro avg precision difference:     {e2_m_precision - e1_m_precision:+.3f}')
print(f'macro avg recall difference:        {e2_m_recall - e1_m_recall:+.3f}')
print(f'macro avg f1 difference:            {e2_m_f1 - e1_m_f1:+.3f}', end='\n\n')
print(f'weighted avg precision difference:  {e2_w_precision - e1_w_precision:+.3f}')
print(f'weighted avg recall difference:     {e2_w_recall - e1_w_recall:+.3f}')
print(f'weighted avg f1 difference:         {e2_w_f1 - e1_w_f1:+.3f}')

macro avg precision difference:     +0.134
macro avg recall difference:        +0.097
macro avg f1 difference:            +0.132

weighted avg precision difference:  +0.063
weighted avg recall difference:     +0.089
weighted avg f1 difference:         +0.087


In [12]:
# build a dict with the difference in metrics
diff_dict = {}
for key in experiment_1_dict.keys():
    diff_dict[key] = {}
    for metric in experiment_1_dict[key].keys():
        diff_dict[key][metric] = experiment_2_dict[key][metric] - experiment_1_dict[key][metric]

# copy support metric from experiment 1
for key in experiment_1_dict.keys():
    diff_dict[key]['support'] = experiment_1_dict[key]['support']

# sort by f1-score
sorted_diff_dict = {k: v for k, v in sorted(diff_dict.items(), key=lambda item: item[1]['f1-score'], reverse=True)}

In [13]:
# print sorted dict
print('Sorted by f1-score difference:')
for key in sorted_diff_dict.keys():
    print(f'{(key + ":").ljust(17)}precision: {format_float(sorted_diff_dict[key]["precision"])}, recall: {format_float(sorted_diff_dict[key]["recall"])}, f1-score: {format_float(sorted_diff_dict[key]["f1-score"])}, support: {sorted_diff_dict[key]["support"]}')

Sorted by f1-score difference:
python2.7:       precision: +1.000, recall: +0.999, f1-score: +0.999, support: 841.0
hostname:        precision:    NaN, recall: +1.000, f1-score: +0.833, support: 10.0
sed:             precision: +0.667, recall: +1.000, f1-score: +0.800, support: 2.0
mail:            precision: +0.857, recall: +0.500, f1-score: +0.778, support: 2.0
tty:             precision: +0.875, recall: +0.000, f1-score: +0.778, support: 2.0
dhclient:        precision: +0.999, recall: +0.353, f1-score: +0.582, support: 17.0
master:          precision: +0.733, recall: +0.000, f1-score: +0.578, support: 580.0
ls:              precision: +0.729, recall: +0.250, f1-score: +0.563, support: 12.0
sh:              precision: -0.004, recall: +0.702, f1-score: +0.542, support: 1987.0
sendmail:        precision: +0.587, recall: +0.000, f1-score: +0.393, support: 21.0
rm:              precision: +0.396, recall: +0.333, f1-score: +0.368, support: 36.0
imapd:           precision: +0.159, recall: 

In [14]:
# check which classes were never predicted in both experiments (precision NaN)
e1_never_predicted = []
e2_never_predicted = []

for key in experiment_1_dict.keys():
    if experiment_1_dict[key]['precision'] != experiment_1_dict[key]['precision']:
        e1_never_predicted.append(key)
    if experiment_2_dict[key]['precision'] != experiment_2_dict[key]['precision']:
        e2_never_predicted.append(key)

print('Classes never predicted in experiment 1:')
print(e1_never_predicted)
print('Classes never predicted in experiment 2:')
print(e2_never_predicted)

Classes never predicted in experiment 1:
['adjkerntz', 'basename', 'expr', 'hostname']
Classes never predicted in experiment 2:
['adjkerntz', 'basename', 'expr']
