In [2]:
USE_GPU = True
from typing import List, Tuple, Dict, Any, Optional
import seaborn as sns
from sklearn.metrics import roc_auc_score, accuracy_score, precision_score, recall_score, f1_score

In [3]:
import pandas as pd
import numpy as np
import plotly as px
import plotly.express as px
import os
import re

In [4]:
pd.set_option('display.max_columns', 500)
pd.set_option('display.max_rows', 500)
pd.set_option('display.float_format', lambda x: '%.3f' % x)

In [5]:
CORE_AUC_PATH = 'auroc_eval_data'
CORE_CSV_PATH = 'result_csv'

In [1]:
def generate_glob_auc(read_path, experiment):
    print(f'Loading data')
    df = pd.read_parquet(read_path)
    df['row_count'] = df.index.values
    eval_cutoff = df.shape[0] * .5
    df['subset'] = df['row_count'].apply(lambda x: 'eval' if x < eval_cutoff else 'test')
    df = df.drop(columns=['row_count'])

    eval_df = df[df['subset'] == 'eval']
    test_df = df[df['subset'] == 'test']
    auc_container = []
    for col in [col for col in eval_df.columns if 'LSTM' in col]:
        print(f'Calculating AUC for {col}')
        auc_container.append(
            {
                'Model': col,
                'AUC': roc_auc_score(eval_df['label'], eval_df[col]),
                'Acc': accuracy_score(eval_df['label'], eval_df[col].apply(lambda x: 1 if x > .5 else 0)),
                'Prec': precision_score(eval_df['label'], eval_df[col].apply(lambda x: 1 if x > .5 else 0)),
                'Rec': recall_score(eval_df['label'], eval_df[col].apply(lambda x: 1 if x > .5 else 0)),
                'Subset': 'eval'
            }
        )
    
        auc_container.append(
            {
                'Model': col,
                'AUC': roc_auc_score(test_df['label'], test_df[col]),
                'Acc': accuracy_score(test_df['label'], test_df[col].apply(lambda x: 1 if x > .5 else 0)),
                'Prec': precision_score(test_df['label'], test_df[col].apply(lambda x: 1 if x > .5 else 0)),
                'Rec': recall_score(test_df['label'], test_df[col].apply(lambda x: 1 if x > .5 else 0)),
                'Subset': 'test'
            }
        )

    out_df = pd.DataFrame(auc_container)
    out_df['Experiment'] = experiment
    return out_df


def plot_subset(df, target_var):
    eval_exp, test_exp = df[df['Subset'] == 'eval'], df[df['Subset'] == 'test']
    
    eval_fig = px.line(eval_exp, x='Window', y=target_var, color='Experiment', markers=True)
    test_fig = px.line(test_exp, x='Window', y=target_var, color='Experiment', markers=True)

    eval_fig.update_layout(
        xaxis_title='Data Window Size',
        yaxis_title=target_var,
        width=800,
        height=400
    )
   
    test_fig.update_layout(
        xaxis_title='Data Window Size',
        yaxis_title=target_var,
        width=800,
        height=400
    )
     
    eval_fig.write_image(f'lstm_plots/metric_graphs/{target_var}_eval.png')
    test_fig.write_image(f'lstm_plots/metric_graphs/{target_var}_test.png')
    

In [12]:
df = pd.read_csv('result_csv/result_summary_eval_test.csv')
df['Window'] = df['Model'].apply(lambda x: 30 if 'H' in x else int(x.split(' ')[-1]))
df = df[df['Model'] != 'LSTM SEQ 30 H']

df = df.rename(columns={
    'Acc': 'Accuracy',
    'Prec': 'Precision',
    'Rec': 'Recall',
    'AUROC': 'AUC',
})

In [8]:
baseline_df = pd.read_csv('result_csv/baseline_results_rand_f.csv')
baseline_df['Experiment'] = baseline_df['Metric Name'].apply(lambda x: int(re.sub('[^0-9]', '', x)))
baseline_df['Metric'] = baseline_df['Metric Name'].apply(lambda x: x.split(' ')[0])
# baseline_df['Window'] = 1
baseline_df['Subset'] = baseline_df['Metric Name'].apply(lambda x: 'eval' if 'EVAL' in x else 'test')
# baseline_df = baseline_df.pivot(index='Experiment', columns='Subset', values='Metric Value').reset_index()
baseline_df = baseline_df.drop(columns=['Metric Name'])
baseline_df

Unnamed: 0,Metric Value,Experiment,Metric,Subset
0,0.669,10,ACC,eval
1,0.861,10,PREC,eval
2,0.683,10,REC,eval
3,0.653,10,AUC,eval
4,0.676,10,ACC,test
5,0.876,10,PREC,test
6,0.69,10,REC,test
7,0.656,10,AUC,test
8,0.669,20,ACC,eval
9,0.641,20,PREC,eval


In [9]:
baseline_df_pivoted = baseline_df.pivot(index=['Experiment', 'Subset'], columns='Metric', values='Metric Value').reset_index()
baseline_df_pivoted = baseline_df_pivoted.rename(columns={
    'ACC': 'Accuracy',
    'PREC': 'Precision',
    'REC': 'Recall'
})

In [9]:
baseline_df_pivoted['Model'] = 'Random Forest'
baseline_df_pivoted['Window'] = 1

In [10]:
df_appender = df.rename(columns={
    'AUROC': 'AUC'
})

baseline_df_pivoted.columns

Index(['Experiment', 'Subset', 'Accuracy', 'AUC', 'Precision', 'Recall',
       'Model', 'Window'],
      dtype='object', name='Metric')

In [11]:
df_appender.columns

Index(['Model', 'AUC', 'Accuracy', 'Precision', 'Recall', 'Subset',
       'Experiment', 'Window'],
      dtype='object')

In [None]:
print(results_appendix_df[results_appendix_df['Subset'] == 'test'].drop(columns=['Window']).to_latex(index=False, float_format='%.3f'))

\begin{tabular}{lrrrrlr}
\toprule
Model & AUC & Accuracy & Precision & Recall & Subset & Experiment \\
\midrule
LSTM SEQ 1 & 0.699 & 0.670 & 0.691 & 0.856 & test & 10 \\
LSTM SEQ 10 & 0.670 & 0.526 & 0.823 & 0.309 & test & 10 \\
LSTM SEQ 20 & 0.694 & 0.669 & 0.689 & 0.858 & test & 10 \\
LSTM SEQ 30 & 0.696 & 0.668 & 0.691 & 0.852 & test & 10 \\
LSTM SEQ 40 & 0.674 & 0.576 & 0.780 & 0.450 & test & 10 \\
LSTM SEQ 1 & 0.683 & 0.625 & 0.749 & 0.605 & test & 20 \\
LSTM SEQ 10 & 0.686 & 0.642 & 0.736 & 0.669 & test & 20 \\
LSTM SEQ 20 & 0.694 & 0.669 & 0.689 & 0.858 & test & 20 \\
LSTM SEQ 30 & 0.690 & 0.630 & 0.752 & 0.612 & test & 20 \\
LSTM SEQ 40 & 0.690 & 0.632 & 0.749 & 0.622 & test & 20 \\
LSTM SEQ 1 & 0.749 & 0.690 & 0.707 & 0.563 & test & 30 \\
LSTM SEQ 10 & 0.744 & 0.688 & 0.717 & 0.539 & test & 30 \\
LSTM SEQ 20 & 0.749 & 0.690 & 0.719 & 0.541 & test & 30 \\
LSTM SEQ 30 & 0.748 & 0.690 & 0.702 & 0.571 & test & 30 \\
LSTM SEQ 40 & 0.748 & 0.691 & 0.705 & 0.573 & test & 30 \\
Random

In [13]:
plot_subset(df, 'AUC')

# plot_subset(df, 'Accuracy')
# plot_subset(df, 'Precision')
# plot_subset(df, 'Recall')