# Results aggregation

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib
import seaborn as sns

matplotlib.pyplot.rcParams["figure.figsize"] = (20,8)
sns.set_theme()
font = {'family' : 'DejaVu Sans', 'size'   : 25}
matplotlib.rc('font', **font)

## Load metrics results

In [2]:
N_CLASSES = 2
metrics_file = 'full_metrics_dict.txt'

In [3]:
import json
with open(f"regression/{metrics_file}") as f:
    metrics_dict = json.load(f)

## Compute results highlights

In [5]:
clf_labels = list(metrics_dict['1']['AAPL'].keys())
clf_labels

['DTree', 'RandomForest', 'SVM', 'LR', 'LinearNN', 'LSTM', 'RandomWalk']

In [25]:
metrics_list = ['mse', 'mae', 'mape']

In [28]:
majority_dicts = {}

for metric in metrics_list:
    majority_df = pd.DataFrame(columns=(clf_labels), index=list(metrics_dict.keys()))
    #print(majority_df)
    for predict_n, quot_metrics in metrics_dict.items():
        n_quots = len(quot_metrics.keys())
        metrics_df = pd.DataFrame.from_dict(quot_metrics).T
        filtered_df = metrics_df.applymap(lambda metrics: metrics[metric])
        acc_df = metrics_df.applymap(lambda metrics: metrics['mse'])

        for index, s in filtered_df.iterrows():
            # replace by 1000 (placeholder) if not the min of the row
            s.where(s == s.min(), 1000, inplace=True)

        #print(acc_df)
        # map the placeholders to 0 and leftover values, i.e. to 1 which marks the best in the row
        filtered_df = filtered_df.applymap(lambda x: 1 if x != 1000 else 0)
        #print(filtered_df)
        row = {col:(sum(filtered_df[col])) for col in filtered_df.columns.values}

        majority_df.loc[predict_n] = row

    majority_dicts[metric] = majority_df
#majority_df.set_index('N Days')

   DTree RandomForest  SVM   LR LinearNN LSTM RandomWalk
1      0            0    0   13        0    1          0
5    NaN          NaN  NaN  NaN      NaN  NaN        NaN
10   NaN          NaN  NaN  NaN      NaN  NaN        NaN
20   NaN          NaN  NaN  NaN      NaN  NaN        NaN
50   NaN          NaN  NaN  NaN      NaN  NaN        NaN
   DTree RandomForest  SVM   LR LinearNN LSTM RandomWalk
1      0            0    0   13        0    1          0
5      0            0    0   11        0    3          0
10   NaN          NaN  NaN  NaN      NaN  NaN        NaN
20   NaN          NaN  NaN  NaN      NaN  NaN        NaN
50   NaN          NaN  NaN  NaN      NaN  NaN        NaN
   DTree RandomForest  SVM   LR LinearNN LSTM RandomWalk
1      0            0    0   13        0    1          0
5      0            0    0   11        0    3          0
10     0            1    0   11        0    2          0
20   NaN          NaN  NaN  NaN      NaN  NaN        NaN
50   NaN          NaN  NaN  NaN

In [36]:
print(majority_dicts['mse'].astype(int).dtypes)

DTree           int32
RandomForest    int32
SVM             int32
LR              int32
LinearNN        int32
LSTM            int32
RandomWalk      int32
dtype: object


In [39]:
for metric, df in majority_dicts.items():
    plt.figure()
    heatmap = sns.heatmap(df.astype(int), cmap ='mako', linewidths = 0.5, annot = True)
    heatmap.figure.savefig(f"regression/majority_{metric}.png")
    plt.close()

In [6]:
majority_df = majority_df.astype(float).round(2)

In [7]:
plt.figure()
acc_heatmap = sns.heatmap(majority_df, cmap ='mako', linewidths = 0.5, annot = True)
acc_heatmap.figure.savefig(f"{N_CLASSES}_classes/majority_acc.png")
plt.close()