In [None]:
import datetime
import math
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import zipfile
import requests
import re
import tensorflow as tf

import matplotlib.pyplot as plt
plt.rcParams['font.family'] = 'sans-serif'
font = {'fontname': 'Helvetica'}
import matplotlib as mpl

# import and load model architectures as well as decoder
from models.cueva import LSTM_FE
from models.llanes_jurado import LSTM_CNN
from utilities.preprocessors import correct_signals
from utilities.loaders import load_meta_data, load_model, load_lookup_array, charge_raw_data, _combine_data, save_lookup_array

from utilities.visualizers import (
    view_time_frame,
    view_wavelet_coeffs,
    analyze,
    data_split_metric_values,
    view_value_frequency,
    multi_class_heatmap,
    view_metric_values,
    view_classified_labels,
    view_label_freq,
    disp_cat_feat,
    plot_all_features,
    describe_col,
    ModelResults,
    view_all_splits_results)

from utilities.feature_extractors import (
    concur_extract_features_from_all,
    extract_features,
    extract_features_hybrid,
    extract_features_per_hour)

%load_ext autoreload
%autoreload 2

In [None]:
sample_models = {
    'cueva_second_phase-svm':{
        'train_results': [
            ('ahixac_expert1', {
                'train_acc': 0.9764,
                'train_prec': 0.9764,
                'train_rec': 0.9764,
                'train_f1': 0.9764,
                'train_roc_auc': 0.8764,
            }),
            ('akakip_expert1', {
                'train_acc': 0.9764,
                'train_prec': 0.9764,
                'train_rec': 0.9764,
                'train_f1': 0.9764,
                'train_roc_auc': 0.7764,
            }),
        ],
        'test_results': [
            ('pqbqpr_expert2', {
                'test_acc': 0.9764,
                'test_prec': 0.9764,
                'test_rec': 0.9764,
                'test_f1': 0.9764,
                'test_roc_auc': 0.8764,
            }),
            ('oxused_expert2', {
                'test_acc': 0.9764,
                'test_prec': 0.9764,
                'test_rec': 0.9764,
                'test_f1': 0.9764,
                'test_roc_auc': 0.6764,
            }),
            ('subject3', {
                'test_acc': 0.9764,
                'test_prec': 0.9764,
                'test_rec': 0.9764,
                'test_f1': 0.9764,
                'test_roc_auc': 0.6764,
            }),
            ('subject4', {
                'test_acc': 0.9764,
                'test_prec': 0.9764,
                'test_rec': 0.9764,
                'test_f1': 0.9764,
                'test_roc_auc': 0.6764,
            }),
            ('subject5', {
                'test_acc': 0.9764,
                'test_prec': 0.9764,
                'test_rec': 0.9764,
                'test_f1': 0.9764,
                'test_roc_auc': 0.6764,
            }),
            ('subject6', {
                'test_acc': 0.9764,
                'test_prec': 0.9764,
                'test_rec': 0.9764,
                'test_f1': 0.9764,
                'test_roc_auc': 0.6764,
            }),
            ('subject7', {
                'test_acc': 0.9764,
                'test_prec': 0.9764,
                'test_rec': 0.9764,
                'test_f1': 0.9764,
                'test_roc_auc': 0.6764,
            }),
        ]
    },
    'cueva-lstm-fe': {
        # 'model':
        # 'hyper_params':
    },
    'jurado-lstm-cnn': {
        # 'model':
        # 'hyper_params':
    },
    'taylor-svm': {
        # 'model':
        # 'selected_feats':
    },
    'taylor-lr': {
        # 'model':
        # 'selected_feats':
    },
    'taylor-rf': {
        # 'model':
        # 'selected_feats':
    },
    'hossain-gbt': {
        # 'model':
        # 'selected_feats':
        # 'scaler':
    },
    'hossain-svm': {
        # 'model':
        # 'selected_feats':
        # 'scaler':
    },
    'hossain-lr': {
        # 'model':
        # 'selected_feats':
        # 'scaler':
    }
}

In [None]:
models = load_meta_data('./results/all_models_results.json')
models

# Visualization
#### what I want is across each model see accuracy for all subjects in train and test set i.e. a figure will show taylor's svm accuracy across all train and test subjects
- have the x ticks be labeled as the subject names
- have the y ticks be labeled as the percentage value of the accuracy or roc  
- the title for the plot will be for instance `taylor svm train accuracy results.png`, and the general title would be `{selector_config} {estimator_name} {data_split} {metric} results.png`

![sample results chart.png](../../sample%20results%20chart.png)

In [None]:
models = models

In [None]:
data = [(subject_name, result['test_roc_auc']) for (subject_name, result) in models['cueva_second_phase-svm']['test_results']]
data

In [None]:
x_ticks, y_ticks = list(zip(*data))
x_ticks, y_ticks

In [None]:
cmap = mpl.colormaps['mako']
fig = plt.figure(figsize=(15, 10))
axis = fig.add_subplot()

bar = axis.bar(x_ticks, y_ticks, color=cmap(np.linspace(0, 1, len(data))), edgecolor='white', linewidth=0.125)
axis.bar_label(bar, fmt='{:.4f}')
axis.set_xlabel('subjects', )
axis.set_ylabel('score', )
axis.tick_params(axis='x', labelrotation=45.0)
axis.set_title('{selector_config} {estimator_name} {data_split} {metric} results')
plt.show()

In [None]:
def view_subject_results(data, selector_config: str, estimator_name: str, data_split: str="train", metric: str="acc", colormap: str="plasma", save_img: bool=True, style: str='default'):
    """
    suitable for all discrete input

    plots either a horizontal bar graph to display frequency of words top 'limit' 
    words e.g. top 20 or a pie chart to display the percentages of the top 'limit' 
    words e.g. top 20, specified by the argument kind which can be either
    strings barh or pie

    main args:
        data - list of tuples representing the subject and the subjects respective score
        selector_config - 
        estimator_name - 
        data_split - 
        metric - 
        colormap - 
        save_img - 
        style - 
    """

    # compose title based on selector_config, estimator_name, data_split, and metric args
    title = f'{selector_config} {estimator_name} {data_split} {metric} results'

    # extract x and y values from data
    x_ticks, y_ticks = list(zip(*data))

    styles = {
        'dark': 'dark_background',
        'solarized': 'Solarized_Light2',
        '538': 'fivethirtyeight',
        'ggplot': 'ggplot',
    }

    plt.style.use(styles.get(style, 'default'))

    # define figure
    cmap = mpl.colormaps[colormap]
    fig = plt.figure(figsize=(15, 10))
    axis = fig.add_subplot()

    # plot bar graph
    bar = axis.bar(x_ticks, y_ticks, color=cmap(np.linspace(0, 1, len(data))), edgecolor='white', linewidth=0.125)
    axis.bar_label(bar, fmt='{:.4f}')
    axis.set_xlabel('subjects', )
    axis.set_ylabel(f'{metric} score', )
    axis.tick_params(axis='x', labelrotation=45.0)
    axis.set_title(title)
    if save_img:
        plt.savefig(f'./figures & images/{title}.png')
    plt.show()        

In [None]:
# model_names = ['cueva_second_phase_1-5-weighted-svm',
#     'cueva_second_phase_1-9-weighted-svm',
#     'cueva_second_phase_1-2p5-weighted-svm',
#     'taylor-lr',
#     'taylor-rf',
#     'taylor-svm',
#     'hossain-lr',
#     'hossain-gbt',
#     'hossain-svm'
# ]
model_names = ['cueva_second_phase_1-5-weighted-svm', 'taylor-lr', 'taylor-rf', 'taylor-svm', 'hossain-lr', 'hossain-svm', 'hossain-gbt', 'jurado-lstm-cnn']
data_splits = ["train", "test"]
metrics = ["acc", "prec", "rec", "f1", "roc_auc"]
colormaps = ['mako', 'GnBu', 'plasma', 'magma', 'twilight', 'YlOrBr']

In [None]:
for model_name in model_names:
    # extract selector config and estimator name from model name
    selector_config, estimator_name = model_name.split('-', 1)

    for data_split in data_splits:
        for metric in metrics:
            data = [(subject_name, result[f'{data_split}_{metric}']) for (subject_name, result) in models[f'{selector_config}-{estimator_name}'][f'{data_split}_results']]
            sample_idx = np.random.choice(len(colormaps), size=1)[0]
            colormap = colormaps[sample_idx]
            view_subject_results(data, 
                selector_config=selector_config, 
                estimator_name=estimator_name,
                data_split=data_split,
                metric=metric,
                colormap=colormap,
                style='dark'
            )


## from here we create a graph now that takes the mean value of each metric for each model
![sample results chart.png](../../sample%20averaged%20results%20chart.png)

## So in this case the `x_ticks` would be the `model_names` themselves and the `y_ticks` would be the mean accuracy of each model listed

In [None]:
# model_names = ['cueva_second_phase_1-5-weighted-svm',
#     'cueva_second_phase_1-9-weighted-svm',
#     'cueva_second_phase_1-2p5-weighted-svm',
#     'taylor-lr',
#     'taylor-rf',
#     'taylor-svm',
#     'hossain-lr',
#     'hossain-gbt',
#     'hossain-svm'
# ]
model_names = ['cueva_second_phase-svm', 'taylor-lr', 'taylor-rf', 'taylor-svm', 'hossain-lr', 'hossain-gbt', 'jurado-lstm-cnn']
data_splits = ["train", "test"]
metrics = ["acc", "prec", "rec", "f1", "roc_auc"]
colormaps = ['mako', 'GnBu', 'plasma', 'magma', 'twilight', 'YlOrBr']

In [None]:
def view_models_mean_results(data, data_split: str="train", metric: str="acc", colormap: str="plasma", save_img: bool=True, style: str='default'):
    """
    suitable for all discrete input

    plots either a horizontal bar graph to display frequency of words top 'limit' 
    words e.g. top 20 or a pie chart to display the percentages of the top 'limit' 
    words e.g. top 20, specified by the argument kind which can be either
    strings barh or pie

    main args:
        data - list of tuples representing the subject and the subjects respective score
        data_split - 
        metric - 
        colormap - 
        save_img - 
        style - 
    """

    # compose title based on selector_config, estimator_name, data_split, and metric args
    title = f'{data_split} {metric} results'

    # extract x and y values from data
    x_ticks, y_ticks = list(zip(*data))

    styles = {
        'dark': 'dark_background',
        'solarized': 'Solarized_Light2',
        '538': 'fivethirtyeight',
        'ggplot': 'ggplot',
    }

    plt.style.use(styles.get(style, 'default'))

    # define figure
    cmap = mpl.colormaps[colormap]
    fig = plt.figure(figsize=(15, 10))
    axis = fig.add_subplot()

    # plot bar graph
    bar = axis.bar(x_ticks, y_ticks, color=cmap(np.linspace(0, 1, len(data))), edgecolor='white', linewidth=0.125)
    axis.bar_label(bar, fmt='{:.4f}')
    axis.set_xlabel('models', )
    axis.set_ylabel(f'{metric} score', )
    axis.tick_params(axis='x', labelrotation=45.0)
    axis.set_title(title)
    if save_img:
        plt.savefig(f'./figures & images/{title}.png')
    plt.show()

In [None]:
for data_split in data_splits:
    for metric in metrics:
        print(f'data split: {data_split}')
        print(f'metric: {metric}')
        ticks = []
        for model_name in model_names:
            # extract selector config and estimator name from model name
            selector_config, estimator_name = model_name.split('-', 1)
            
            # take the mean of a specific models metrics
            mean_result = np.mean([result[f'{data_split}_{metric}'] for (_, result) in models[model_name][f'{data_split}_results']])

            ticks.append((model_name, mean_result))
        # print(f'{ticks}\n')
        # x_ticks, y_ticks = list(zip(*ticks))
        # print(f'{x_ticks} {y_ticks}\n')

        sample_idx = np.random.choice(len(colormaps), size=1)[0]
        colormap = colormaps[sample_idx]
        view_models_mean_results(ticks, data_split=data_split, metric=metric, colormap=colormap, style='dark')

In [None]:
ticks

In [None]:
cmap = mpl.colormaps['mako']
fig = plt.figure(figsize=(15, 10))
axis = fig.add_subplot()

bar = axis.bar(x_ticks, y_ticks, color=cmap(np.linspace(0, 1, len(data))), edgecolor='white', linewidth=0.125)
axis.bar_label(bar, fmt='{:.4f}')
axis.set_xlabel('subjects', )
axis.set_ylabel('score', )
axis.tick_params(axis='x', labelrotation=45.0)
axis.set_title('{selector_config} {estimator_name} {data_split} {metric} results')
plt.show()