# Results

In [35]:
import pandas as pd

In [36]:
RESULTS_PATH = '../results/errors/'

In [37]:
def get_error_table(data_frame, LEVELS):
    errors = data_frame.groupby(by=['error_metric', 'level']).mean().reset_index().round(2)
    errors = pd.pivot(errors, values ='error', columns='error_metric', index='level')
    errors.index = LEVELS
    overall_error = data_frame.groupby(by='error_metric').mean()[['error']].transpose()
    overall_error.index = ['Overall']
    return errors.append(overall_error).round(2)

In [38]:
def get_percentage_improvement(base_df, approach_df):
    return (base_df - approach_df)/base_df*100

In [39]:
def get_results(data, model, LEVELS, compare_prev_params = False):
    file_name = f'{data}_{model}'
    FC_TYPE = ['base', 'case1', 'case2', 'case3', 'case4', 'bottomup', 'ols', 'wls', 'mintsample', 'mintshrink', 'erm']
    column_names = {
        'base': 'BASE',
        'case1': 'ML_CASE1', 
        'case2': 'ML_CASE2',
        'case3': 'ML_CASE3',
        'case4': 'ML_CASE4',
        'bottomup': 'BU',
        'ols': 'OLS',
        'wls': 'WLS',
        'mintsample': 'MinT(Sample)',
        'mintshrink': 'MinT(Shrink)',
        'erm': 'ERM'
    }
    column_order_1 = ['BU', 'OLS', 'WLS', 'MinT(Shrink)', 'ERM', 'ML_CASE1', 'ML_CASE2', 'ML_CASE3', 'ML_CASE4']
    column_order_2 = ['BU', 'OLS', 'WLS', 'MinT(Sample)', 'MinT(Shrink)', 'ERM', 'ML_CASE1', 'ML_CASE2', 'ML_CASE3', 'ML_CASE4']
    
    if compare_prev_params:
        FC_TYPE.append('prev_params')
        column_names['prev_params'] = 'PREV_PARAMS'
        column_order_1.insert(6, 'PREV_PARAMS')
        column_order_2.insert(7, 'PREV_PARAMS')
    
    error_dic = {}
    error_tables = {}
    error_percentages = {}
    final_percentage_tables = {}
    
    for fc_type in FC_TYPE:
        if data == 'prison' and fc_type =='mintsample':
            continue
        error_dic[column_names[fc_type]] = pd.read_csv(f'{RESULTS_PATH}{file_name}_{fc_type}_errors.csv', index_col=0)
        
    for fc_name, error_df in error_dic.items():
        error_tables[fc_name] = get_error_table(error_df, LEVELS)
        
    for fc_name, error_df in error_tables.items():
        error_percentages[fc_name] = get_percentage_improvement(error_tables['BASE'], error_df)
        
    for error_metric in ['MSE', 'MAE', 'SMAPE']:
        dataframe_error = []
        for fc_name, error_df in error_percentages.items():
            if fc_name =='BASE':
                continue
            error_metric_percentages = error_df[[error_metric]]
            error_metric_percentages.columns = [fc_name]
            dataframe_error.append(error_metric_percentages)
        if data == 'prison':
            column_order = column_order_1
        else:
            column_order = column_order_2
        final_percentage_tables[error_metric] = pd.concat(dataframe_error, axis=1)[column_order]
    return error_tables['BASE'], final_percentage_tables

## PRISON DATASET

In [47]:
LEVELS_prison = ['Australia', 'State', 'Gender', 'Legal', 'Indigenous']
data_prison = 'prison'

### ARIMA

In [48]:
model_arima ='arima'
base_errors_prison_arima, percentages_prison_arima = get_results(data_prison, model_arima, LEVELS_prison)

In [49]:
base_errors_prison_arima

error_metric,MAE,MSE,SMAPE
Australia,505.96,424138.76,1.33
State,369.95,385049.52,7.93
Gender,187.61,148874.19,11.22
Legal,111.09,48125.76,17.65
Indigenous,59.63,18835.82,21.9
Overall,114.37,71339.16,18.27


In [50]:
percentages_prison_arima['MAE'].style.highlight_max(color = 'lightgreen', axis = 1)

Unnamed: 0,BU,OLS,WLS,MinT(Shrink),ERM,ML_CASE1,ML_CASE2,ML_CASE3,ML_CASE4
Australia,-300.762906,-6.964977,-91.554668,-50.264843,-63.989248,-193.303818,-255.571587,-293.568662,-210.481066
State,8.365995,5.714286,13.599135,15.664279,0.583863,16.43195,8.14975,9.317475,15.210164
Gender,7.275732,5.212942,13.655988,15.51623,-3.048878,13.735942,6.119077,8.091253,12.696551
Legal,-0.531101,-0.68413,-1.395265,3.294626,-31.181925,5.130975,-0.387074,0.42308,4.311819
Indigenous,0.0,0.301861,1.542848,4.662083,-30.420929,4.645313,-0.838504,0.553413,3.504947
Overall,-7.764274,2.011017,2.588091,7.012328,-19.270788,2.028504,-6.601381,-6.723791,0.384716


In [11]:
percentages_prison_arima['MSE'].style.highlight_max(color = 'lightgreen', axis = 1)

Unnamed: 0,BU,OLS,WLS,MinT(Shrink),ERM,ML_CASE1,ML_CASE2,ML_CASE3,ML_CASE4
Australia,-1204.936229,-13.275846,-230.437789,-114.450766,-150.126572,-705.185327,-995.093273,-1163.80137,-790.806183
State,-0.741349,8.906267,14.764202,20.707253,9.486221,17.75789,-3.302147,0.845512,13.523699
Gender,-4.88986,10.048874,13.384368,19.009192,15.612176,13.273792,-6.228743,-2.903378,9.580364
Legal,-12.232326,-10.070241,-11.037498,-3.632026,-59.544452,0.111292,-13.867729,-10.621962,-2.337709
Indigenous,0.0,1.381039,1.571899,8.681385,-36.429845,7.860555,-0.855922,1.050764,6.404553
Overall,-63.001288,3.695151,-4.110239,7.575853,-15.39393,-23.532083,-54.385165,-59.431622,-30.909545


In [12]:
percentages_prison_arima['SMAPE'].style.highlight_max(color = 'lightgreen', axis = 1)

Unnamed: 0,BU,OLS,WLS,MinT(Shrink),ERM,ML_CASE1,ML_CASE2,ML_CASE3,ML_CASE4
Australia,-313.533835,-6.766917,-93.233083,-51.12782,-66.165414,-198.496241,-264.661654,-305.263158,-217.293233
State,4.918033,16.519546,9.836066,13.366961,-61.538462,13.114754,2.395965,4.539723,13.114754
Gender,0.178253,3.208556,2.762923,4.812834,-64.973262,8.110517,-7.486631,-2.85205,8.02139
Legal,-3.05949,18.583569,1.926346,2.549575,-46.968839,6.74221,-5.665722,-6.345609,6.062323
Indigenous,0.0,8.949772,3.561644,2.648402,-75.753425,8.26484,-6.666667,-3.607306,3.242009
Overall,-0.821018,11.111111,3.174603,3.065134,-67.104543,7.881773,-6.349206,-4.214559,4.488232


### ETS

In [13]:
model_ets ='ets'
base_errors_prison_ets, percentages_prison_ets = get_results(data_prison, model_ets, LEVELS_prison)

In [14]:
base_errors_prison_ets

error_metric,MAE,MSE,SMAPE
Australia,296.41,114530.91,0.78
State,385.07,418994.3,8.91
Gender,182.35,143906.22,12.69
Legal,107.81,51455.2,18.46
Indigenous,57.19,15931.03,21.72
Overall,110.78,69711.88,18.64


In [15]:
percentages_prison_ets['MAE'].style.highlight_max(color = 'lightgreen', axis = 1)

Unnamed: 0,BU,OLS,WLS,MinT(Shrink),ERM,ML_CASE1,ML_CASE2,ML_CASE3,ML_CASE4
Australia,-514.456327,-37.677541,-288.967984,-215.083837,-585.472825,-374.795047,-274.710705,-345.241389,-400.27327
State,19.804191,6.913029,16.612564,16.368452,15.823097,22.653024,22.585504,21.523359,22.38554
Gender,13.605703,-2.127776,10.408555,10.479846,-6.19139,16.100905,12.739238,14.88895,16.013162
Legal,-2.235414,0.176236,-3.598924,-1.094518,-16.733142,-1.205825,-0.742046,-1.864391,-0.667842
Indigenous,0.0,-3.077461,-0.996678,0.681937,-37.961182,0.349711,-0.052457,-0.419654,0.926735
Overall,-4.441235,-0.505506,-1.507492,1.191551,-25.329482,0.198592,1.679003,-0.045135,-0.14443


In [16]:
percentages_prison_ets['MSE'].style.highlight_max(color = 'lightgreen', axis = 1)

Unnamed: 0,BU,OLS,WLS,MinT(Shrink),ERM,ML_CASE1,ML_CASE2,ML_CASE3,ML_CASE4
Australia,-3671.615741,-95.390013,-1439.641421,-932.717901,-5300.929077,-2471.334935,-1661.406165,-2263.39493,-2671.845793
State,23.569975,14.275249,20.715633,20.751485,-14.648431,30.02149,32.290862,27.765831,28.817375
Gender,12.27637,3.129698,7.906455,7.916996,-34.866193,19.360386,19.980999,17.820161,17.911554
Legal,6.08199,-0.466017,-4.214015,0.125546,-24.828064,8.234639,10.96311,6.729602,8.536397
Indigenous,0.0,-5.287354,-14.794398,-7.243976,-86.333464,1.43889,3.355213,0.49413,2.080656
Overall,-35.948105,4.501743,-11.767894,-3.108093,-102.595354,-14.559369,-1.726851,-13.460762,-18.019368


In [17]:
percentages_prison_ets['SMAPE'].style.highlight_max(color = 'lightgreen', axis = 1)

Unnamed: 0,BU,OLS,WLS,MinT(Shrink),ERM,ML_CASE1,ML_CASE2,ML_CASE3,ML_CASE4
Australia,-530.769231,-38.461538,-294.871795,-217.948718,-605.128205,-383.333333,-278.205128,-351.282051,-410.25641
State,15.600449,28.619529,12.233446,15.151515,-37.485971,18.406285,16.94725,19.977553,19.304153
Gender,13.55398,-35.303388,10.165485,9.141056,-47.044917,15.524035,9.219858,18.20331,19.621749
Legal,2.112676,-5.146262,1.245937,4.062839,-70.368364,1.408451,-0.054171,9.696641,7.529794
Indigenous,0.0,-16.620626,-1.243094,-0.92081,-98.296501,-2.163904,-9.622468,-0.138122,2.808471
Overall,2.092275,-13.89485,0.751073,1.716738,-84.656652,0.858369,-4.72103,4.613734,5.901288


## TOURISM DATASET

In [18]:
LEVELS_tourism = ['Australia', 'States', 'Regions']
data_tourism = 'tourism'

### ARIMA

In [19]:
model_arima ='arima'
base_errors_tourism_arima, percentages_tourism_arima = get_results(data_tourism, model_arima, LEVELS_tourism)

In [20]:
base_errors_tourism_arima

error_metric,MAE,MSE,SMAPE
Australia,2373.12,7695575.82,7.05
States,581.33,682870.53,16.33
Regions,101.65,29724.09,34.47
Overall,167.87,173699.11,32.66


In [21]:
percentages_tourism_arima['MAE'].style.highlight_max(color = 'lightgreen', axis = 1)

Unnamed: 0,BU,OLS,WLS,MinT(Sample),MinT(Shrink),ERM,ML_CASE1,ML_CASE2,ML_CASE3,ML_CASE4
Australia,-127.329423,-9.04674,-80.638147,-18.270041,-49.87232,9.184112,-92.758057,-16.291633,-48.307292,-93.304595
States,-36.896427,21.075809,-10.32116,19.30745,5.289595,24.082707,-19.252404,17.208814,4.008051,-18.935888
Regions,0.0,6.473192,7.752091,11.293655,9.729464,0.777177,3.4727,7.437285,6.089523,3.649779
Overall,-31.703104,8.053851,-12.104605,8.655507,-1.453506,8.822303,-19.020671,6.278668,-3.550366,-18.919402


In [22]:
percentages_tourism_arima['MSE'].style.highlight_max(color = 'lightgreen', axis = 1)

Unnamed: 0,BU,OLS,WLS,MinT(Sample),MinT(Shrink),ERM,ML_CASE1,ML_CASE2,ML_CASE3,ML_CASE4
Australia,-336.612973,-15.402187,-186.242951,-32.443611,-102.65085,-0.952876,-221.675322,-31.344199,-93.752933,-222.85461
States,-69.790932,28.159025,-13.739921,35.504637,12.530673,42.812455,-33.759581,32.881787,10.043589,-32.783373
Regions,0.0,16.233769,17.287729,23.978228,20.335997,14.419214,6.151004,17.126916,13.681832,6.746111
Overall,-198.046179,3.605229,-98.842832,-1.698437,-46.294762,15.599435,-125.518956,-3.036648,-43.493683,-125.725319


In [23]:
percentages_tourism_arima['SMAPE'].style.highlight_max(color = 'lightgreen', axis = 1)

Unnamed: 0,BU,OLS,WLS,MinT(Sample),MinT(Shrink),ERM,ML_CASE1,ML_CASE2,ML_CASE3,ML_CASE4
Australia,-139.574468,-9.361702,-86.382979,-18.723404,-52.48227,10.212766,-100.425532,-15.460993,-51.631206,-101.134752
States,-10.16534,27.005511,5.756277,30.128598,19.840784,25.780772,4.776485,23.943662,17.085119,3.306797
Regions,0.0,2.727009,3.713374,7.484769,6.788512,-17.493473,4.72875,4.612707,4.786771,3.771395
Overall,-0.765462,3.704838,3.582364,8.389467,7.195346,-15.615432,4.500919,5.358236,5.174525,3.521127


### ETS

In [24]:
model_ets ='ets'
base_errors_tourism_ets, percentages_tourism_ets = get_results(data_tourism, model_ets, LEVELS_tourism)

In [25]:
base_errors_tourism_ets

error_metric,MAE,MSE,SMAPE
Australia,3149.11,12915938.49,9.62
States,569.86,610351.6,13.32
Regions,91.5,23802.54,32.26
Overall,166.87,223778.77,30.44


In [26]:
percentages_tourism_ets['MAE'].style.highlight_max(color = 'lightgreen', axis = 1)

Unnamed: 0,BU,OLS,WLS,MinT(Sample),MinT(Shrink),ERM,ML_CASE1,ML_CASE2,ML_CASE3,ML_CASE4
Australia,-37.796393,1.198434,-15.479929,19.03649,-9.77292,25.515146,7.468142,-26.729774,-7.2014,-20.576607
States,-19.446882,-1.53371,-5.153897,20.575229,-1.923279,-15.500298,11.100972,-12.245113,-0.61945,-8.730214
Regions,0.0,1.770492,4.295082,1.551913,4.218579,-60.52459,5.661202,1.431694,3.071038,1.857923
Overall,-13.861089,0.719123,-2.750644,10.78684,-0.617247,-28.758914,7.592737,-8.665428,-0.2457,-6.100557


In [27]:
percentages_tourism_ets['MSE'].style.highlight_max(color = 'lightgreen', axis = 1)

Unnamed: 0,BU,OLS,WLS,MinT(Sample),MinT(Shrink),ERM,ML_CASE1,ML_CASE2,ML_CASE3,ML_CASE4
Australia,-74.350078,0.996985,-32.792912,26.454736,-23.578552,34.256214,17.483253,-49.074116,-16.094326,-39.891831
States,-37.322787,2.96749,-8.087188,31.235177,-2.283716,-36.878294,21.918611,-23.789988,-4.277852,-18.712472
Regions,0.0,6.104559,10.261384,7.754256,10.001538,-111.02521,10.751206,2.311098,5.514706,3.083957
Overall,-58.869128,1.931734,-23.0951,25.726605,-15.559769,4.279624,17.830838,-38.443656,-11.358008,-30.993659


In [28]:
percentages_tourism_ets['SMAPE'].style.highlight_max(color = 'lightgreen', axis = 1)

Unnamed: 0,BU,OLS,WLS,MinT(Sample),MinT(Shrink),ERM,ML_CASE1,ML_CASE2,ML_CASE3,ML_CASE4
Australia,-41.683992,0.519751,-18.399168,18.711019,-12.474012,28.898129,7.692308,-29.62578,-8.419958,-22.972973
States,-16.066066,-21.621622,-4.954955,11.636637,-1.426426,-42.192192,9.084084,-6.756757,1.576577,-3.453453
Regions,0.0,-2.944823,1.890887,-1.239926,2.138872,-71.109733,5.300682,2.572846,3.936764,3.130812
Overall,-0.722733,-3.580815,1.609724,-0.689882,1.971091,-69.645204,5.453351,2.135348,3.843627,2.82523


### Forecasts across seeds

In [31]:
data = 'prison'
model = 'ets'
path = '../results/validation_results_experiments/'

fc_seeds = pd.read_csv(f'{path}{data}_{model}_seed_runs.csv', index_col =0)

In [32]:
std_results = fc_seeds.groupby(fc_seeds.index).std()

In [33]:
std_results

Unnamed: 0,0,1,2,3,4,5,6,7
ACT-Female-Remanded-ATSI,2.369480,2.413365,2.372096,2.400927,2.586641,2.634543,2.596754,2.627631
ACT-Female-Remanded-Non-ATSI,1.606393,1.611455,1.552460,1.553457,1.686703,1.692010,1.636631,1.638402
ACT-Female-Sentenced-ATSI,1.728251,1.762741,1.751605,1.765333,1.883511,1.920292,1.908104,1.922641
ACT-Female-Sentenced-Non-ATSI,1.383259,1.423514,1.415436,1.432201,1.494423,1.537753,1.530201,1.548333
ACT-Male-Remanded-ATSI,1.468645,1.516005,1.482083,1.491256,1.631360,1.687992,1.650392,1.662732
...,...,...,...,...,...,...,...,...
WA-Female-Sentenced-Non-ATSI,2.197882,2.236175,2.146643,2.155461,2.397064,2.437066,2.350999,2.360602
WA-Male-Remanded-ATSI,1.978030,2.007106,1.993296,2.020706,2.194336,2.230870,2.220372,2.251134
WA-Male-Remanded-Non-ATSI,3.387002,3.428934,3.323050,3.334754,3.617367,3.655736,3.549880,3.563819
WA-Male-Sentenced-ATSI,12.342096,12.403200,11.969615,11.982837,13.104752,13.161222,12.741746,12.766034


In [34]:
std_results.to_csv(f'{path}{data}_{model}_std_results.csv')

### Comparing with previous parameters

In [44]:
LEVELS_tourism = ['Australia', 'States', 'Regions']
data_tourism = 'tourism'
model_ets ='arima'
base_errors_tourism_ets, percentages_tourism_ets = get_results(data_tourism, model_ets, LEVELS_tourism, True)

In [45]:
percentages_tourism_ets['MAE'].style.highlight_max(color = 'lightgreen', axis = 1)

Unnamed: 0,BU,OLS,WLS,MinT(Sample),MinT(Shrink),ERM,ML_CASE1,PREV_PARAMS,ML_CASE2,ML_CASE3,ML_CASE4
Australia,-127.329423,-9.04674,-80.638147,-18.270041,-49.87232,9.184112,-92.758057,-39.036795,-16.291633,-48.307292,-93.304595
States,-36.896427,21.075809,-10.32116,19.30745,5.289595,24.082707,-19.252404,7.947293,17.208814,4.008051,-18.935888
Regions,0.0,6.473192,7.752091,11.293655,9.729464,0.777177,3.4727,6.109198,7.437285,6.089523,3.649779
Overall,-31.703104,8.053851,-12.104605,8.655507,-1.453506,8.822303,-19.020671,-0.875678,6.278668,-3.550366,-18.919402
