In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.transforms as transforms

import json

In [None]:
def extract_values(data, metric, subtype):
    return [data[model]["metrics"][metric + "_" + subtype] for model in data]

def return_seed_from_model_index(data, model_index):
    return int(float(data['model_' + str(model_index)]['seed']))

def customized_boxplot(box_data, show_means=True):
    meanpointprops = dict(marker='o', markeredgecolor='black', markerfacecolor='black')
    medianlineprops = dict(color='black')
    fig, ax = plt.subplots(figsize =(15, 10))

    bp = plt.boxplot(box_data.values(), notch=True, showmeans=True, meanprops=meanpointprops, medianprops=medianlineprops)
    plt.xticks(range(1, len(box_data)+1), box_data.keys(), fontsize=15)
    plt.yticks(fontsize=15)

    if show_means:
        transforms_boxplot = transforms.blended_transform_factory(ax.transData, ax.transAxes)
        for index, v in enumerate(bp['medians']):
            y = v.get_ydata()[1]
            print(y)
            plt.text(index+1, 1.01, '%.2f' % y, weight='bold', fontsize=15, transform=transforms_boxplot, horizontalalignment='center')

    plt.xlabel('Training Type', fontsize=16)
    plt.ylabel('RMSE values', fontsize=16)
    plt.grid(True)
    plt.show()

# SP Data (AIRQ data)

## D to D

In [None]:
cd 'experiments/D'

In [None]:
with open('type#1(D)_airq-data.json') as json_file:
    type1 = json.load(json_file)

with open('type#2(D)_airq-data.json') as json_file:
    type2 = json.load(json_file)

with open('type#3(D)_airq-data.json') as json_file:
    type3 = json.load(json_file)

In [None]:
# extract_values(data, metric, subtype)
#   "metric": rmse, mae, meae, mape
#   "subtype": entire, train, test 

metric = 'rmse'
subtype = 'entire'

rmses_type1 = extract_values(type1, metric, subtype)
rmses_type2 = extract_values(type2, metric, subtype)
rmses_type3 = extract_values(type3, metric, subtype)

box_data = {"Type#1": rmses_type1, "Type#2": rmses_type2, "Type#3": rmses_type3}
customized_boxplot(box_data)

In [None]:
metric = 'rmse'
subtype = 'test'

rmses_type1 = extract_values(type1, metric, subtype)
rmses_type2 = extract_values(type2, metric, subtype)
rmses_type3 = extract_values(type3, metric, subtype)

box_data = {"Type#1": rmses_type1, "Type#2": rmses_type2, "Type#3": rmses_type3}
customized_boxplot(box_data)

In [None]:
boxdata = rmses_type3
min_value = min(boxdata)
min_index = boxdata.index(min_value)
model_seed = return_seed_from_model_index(type3, min_index)

print(f'Min value %.3f from model seed %d' %(min_value, model_seed))

## D + A to D

In [None]:
cd '../D + A'

In [None]:
with open('type#1(DA)_airq-data.json') as json_file:
    type1 = json.load(json_file)

with open('type#2(DA)_airq-data.json') as json_file:
    type2 = json.load(json_file)
 
with open('type#3(DA)_airq-data.json') as json_file:
    type3 = json.load(json_file)

In [None]:
# extract_values(data, metric, subtype)
#   "metric": rmse, mae, meae, mape
#   "subtype": entire, train, test 

metric = 'rmse'
subtype = 'entire'

rmses_type1 = extract_values(type1, metric, subtype)
rmses_type2 = extract_values(type2, metric, subtype)
rmses_type3 = extract_values(type3, metric, subtype)

box_data = {"Type#1": rmses_type1, "Type#2": rmses_type2, "Type#3": rmses_type3}
customized_boxplot(box_data)

In [None]:
metric = 'rmse'
subtype = 'test'

rmses_type1 = extract_values(type1, metric, subtype)
rmses_type2 = extract_values(type2, metric, subtype)
rmses_type3 = extract_values(type3, metric, subtype)

box_data = {"Type#1": rmses_type1, "Type#2": rmses_type2, "Type#3": rmses_type3}
customized_boxplot(box_data)

In [None]:
boxdata = rmses_type3
min_value = min(boxdata)
min_index = boxdata.index(min_value)
model_seed = return_seed_from_model_index(type3, min_index)

print(f'Min value %.3f from model seed %d' %(min_value, model_seed))

## D + A + T + H to 2

In [None]:
cd '../D + A + T + H'

In [None]:
with open('type#1(DATH)_airq-data.json') as json_file:
    type1 = json.load(json_file)

with open('type#2(DATH)_airq-data.json') as json_file:
    type2 = json.load(json_file)

with open('type#3(DATH)_airq-data.json') as json_file:
    type3 = json.load(json_file)

In [None]:
# extract_values(data, metric, subtype)
#   "metric": rmse, mae, meae, mape
#   "subtype": entire, train, test 

metric = 'rmse'
subtype = 'entire'

rmses_type1 = extract_values(type1, metric, subtype)
rmses_type2 = extract_values(type2, metric, subtype)
rmses_type3 = extract_values(type3, metric, subtype)

box_data = {"Type#1": rmses_type1, "Type#2": rmses_type2, "Type#3": rmses_type3}
customized_boxplot(box_data)

In [None]:
metric = 'rmse'
subtype = 'test'

rmses_type1 = extract_values(type1, metric, subtype)
rmses_type2 = extract_values(type2, metric, subtype)
rmses_type3 = extract_values(type3, metric, subtype)

box_data = {"Type#1": rmses_type1, "Type#2": rmses_type2, "Type#3": rmses_type3}
customized_boxplot(box_data)

In [None]:
boxdata = rmses_type3
min_value = min(boxdata)
min_index = boxdata.index(min_value)
model_seed = return_seed_from_model_index(type3, min_index)

print(f'Min value %.3f from model seed %d' %(min_value, model_seed))

## D + C + A + T + H to 2

In [None]:
cd '../D + C + A + T + H'

In [None]:
with open('type#1(DCATH)_airq-data.json') as json_file:
    type1 = json.load(json_file)

with open('type#2(DCATH)_airq-data.json') as json_file:
    type2 = json.load(json_file)

with open('type#3(DCATH)_airq-data.json') as json_file:
    type3 = json.load(json_file)

In [None]:
# extract_values(data, metric, subtype)
#   "metric": rmse, mae, meae, mape
#   "subtype": entire, train, test 

metric = 'rmse'
subtype = 'entire'

rmses_type1 = extract_values(type1, metric, subtype)
rmses_type2 = extract_values(type2, metric, subtype)
rmses_type3 = extract_values(type3, metric, subtype)

box_data = {"Type#1": rmses_type1, "Type#2": rmses_type2, "Type#3": rmses_type3}
customized_boxplot(box_data)

In [None]:
metric = 'rmse'
subtype = 'test'

rmses_type1 = extract_values(type1, metric, subtype)
rmses_type2 = extract_values(type2, metric, subtype)
rmses_type3 = extract_values(type3, metric, subtype)

box_data = {"Type#1": rmses_type1, "Type#2": rmses_type2, "Type#3": rmses_type3}
customized_boxplot(box_data)

In [None]:
boxdata = rmses_type3
min_value = min(boxdata)
min_index = boxdata.index(min_value)
model_seed = return_seed_from_model_index(type3, min_index)

print(f'Min value %.3f from model seed %d' %(min_value, model_seed))

#  Improvement percentage to Entire Data

## Type#1

In [None]:
value_1 = 10.55  
value_2 = 10.39  
value_3 = 11.22  
value_4 = 12.46 

print(((50.83 - value_1) / 50.83) * 100 )
print(((38.34 - value_2) / 38.34) * 100 )
print(((26.10 - value_3) / 26.10) * 100 )
print(((44.47 - value_4) / 44.47) * 100 )

## Type#2

In [None]:
value_1 = 10.64
value_2 = 10.50
value_3 = 11.01
value_4 = 11.18

print(((50.83 - value_1) / 50.83) * 100 )
print(((38.34 - value_2) / 38.34) * 100 )
print(((26.10 - value_3) / 26.10) * 100 )
print(((44.47 - value_4) / 44.47) * 100 )

## Type#3

In [None]:
value_1 = 4.79 
value_2 = 6.23 
value_3 = 6.41 
value_4 = 8.54 

print(((50.83 - value_1) / 50.83) * 100 )
print(((38.34 - value_2) / 38.34) * 100 )
print(((26.10 - value_3) / 26.10) * 100 )
print(((44.47 - value_4) / 44.47) * 100 )

#  Improvement percentage to Test Data

## Type#1

In [None]:
value_1 = 13.80   
value_2 = 13.41   
value_3 = 15.64   
value_4 = 15.59  

print(((9.97  - value_1) / 9.97 ) * 100 )
print(((12.11 - value_2) / 12.11) * 100 )
print(((12.40 - value_3) / 12.40) * 100 )
print(((14.72 - value_4) / 14.72) * 100 )

## Type#2

In [None]:
value_1 = 14.58
value_2 = 14.05
value_3 = 15.73
value_4 = 15.39

print(((9.97  - value_1) / 9.97 ) * 100 )
print(((12.11 - value_2) / 12.11) * 100 )
print(((12.40 - value_3) / 12.40) * 100 )
print(((14.72 - value_4) / 14.72) * 100 )

## Type#3

In [None]:
value_1 = 5.74
value_2 = 8.13
value_3 = 8.80
value_4 = 13.2

print(((9.97  - value_1) / 9.97 ) * 100 )
print(((12.11 - value_2) / 12.11) * 100 )
print(((12.40 - value_3) / 12.40) * 100 )
print(((14.72 - value_4) / 14.72) * 100 )