In [1]:
USE_GPU = True
from typing import List, Tuple, Dict, Any, Optional
import seaborn as sns
from sklearn.metrics import roc_auc_score, accuracy_score, precision_score, recall_score, f1_score

In [2]:
import pandas as pd
import numpy as np
import plotly as px
import plotly.express as px
import os

In [3]:
pd.set_option('display.max_columns', 500)
pd.set_option('display.max_rows', 500)
pd.set_option('display.float_format', lambda x: '%.3f' % x)

In [4]:
CORE_AUC_PATH = 'auroc_eval_data'
CORE_CSV_PATH = 'result_csv'

In [61]:
def generate_glob_auc(read_path, experiment):
    print(f'Loading data')
    df = pd.read_parquet(read_path)
    df['row_count'] = df.index.values
    eval_cutoff = df.shape[0] * .5
    df['subset'] = df['row_count'].apply(lambda x: 'eval' if x < eval_cutoff else 'test')
    df = df.drop(columns=['row_count'])

    eval_df = df[df['subset'] == 'eval']
    test_df = df[df['subset'] == 'test']
    auc_container = []
    for col in [col for col in eval_df.columns if 'LSTM' in col]:
        print(f'Calculating AUROC for {col}')
        auc_container.append(
            {
                'Model': col,
                'AUROC': roc_auc_score(eval_df['label'], eval_df[col]),
                'Acc': accuracy_score(eval_df['label'], eval_df[col].apply(lambda x: 1 if x > .5 else 0)),
                'Prec': precision_score(eval_df['label'], eval_df[col].apply(lambda x: 1 if x > .5 else 0)),
                'Rec': recall_score(eval_df['label'], eval_df[col].apply(lambda x: 1 if x > .5 else 0)),
                'Subset': 'eval'
            }
        )
    
        auc_container.append(
            {
                'Model': col,
                'AUROC': roc_auc_score(test_df['label'], test_df[col]),
                'Acc': accuracy_score(test_df['label'], test_df[col].apply(lambda x: 1 if x > .5 else 0)),
                'Prec': precision_score(test_df['label'], test_df[col].apply(lambda x: 1 if x > .5 else 0)),
                'Rec': recall_score(test_df['label'], test_df[col].apply(lambda x: 1 if x > .5 else 0)),
                'Subset': 'test'
            }
        )

    out_df = pd.DataFrame(auc_container)
    out_df['Experiment'] = experiment
    return out_df


def plot_subset(df, target_var):
    eval_exp, test_exp = df[df['Subset'] == 'eval'], df[df['Subset'] == 'test']
    
    eval_fig = px.line(eval_exp, x='Window', y=target_var, color='Experiment', markers=True, title=f'Evaluation {target_var} Across Prediction Experiments')
    test_fig = px.line(test_exp, x='Window', y=target_var, color='Experiment', markers=True, title=f'Test {target_var} Across Prediction Experiments')

    eval_fig.update_layout(
        xaxis_title='Data Window Size',
        yaxis_title=target_var,
        width=800,
        height=400
    )
   
    test_fig.update_layout(
        xaxis_title='Data Window Size',
        yaxis_title=target_var,
        width=800,
        height=400
    )
     
    eval_fig.write_image(f'lstm_plots/metric_graphs/{target_var}_eval.png')
    test_fig.write_image(f'lstm_plots/metric_graphs/{target_var}_test.png')
    

In [62]:
df = pd.read_csv('result_csv/result_summary_eval_test.csv')
df['Window'] = df['Model'].apply(lambda x: 30 if 'H' in x else int(x.split(' ')[-1]))
df = df[df['Model'] != 'LSTM SEQ 30 H']

df = df.rename(columns={
    'Acc': 'Accuracy',
    'Prec': 'Precision',
    'Rec': 'Recall'
})

In [63]:
plot_subset(df, 'AUROC')
plot_subset(df, 'Accuracy')
plot_subset(df, 'Precision')
plot_subset(df, 'Recall')