In [1]:
import pandas as pd
import numpy as np
import plotly as px
import plotly.express as px
from typing import List, Tuple, Dict, Any, Optional
import seaborn as sns
from sklearn.metrics import roc_auc_score, accuracy_score, precision_score, recall_score, f1_score
import os
import plotly.graph_objects as go
from scipy.spatial import Delaunay

SESSION_TIME_BINS = [
    30,
    35,
    40,
    45,
    50,
    60,
    70,
    90,
    120,
    180,
    1_000_000
]

USER_BINS = [
    # 30,
    40,
    60,
    80,
    100,
    120,
    160,
    200,
    240,
    280,
    320,
    360,
    420,
    480,
    540,
    600,
    100_000_000
]

MODEL_LIST = [
    'LSTM SEQ 1',
    'LSTM SEQ 10',
    'LSTM SEQ 20',
    'LSTM SEQ 30',
    'LSTM SEQ 40'
]

In [2]:
CORE_PATH = 'auroc_eval_data'

In [3]:
def generate_auc_sess(df, models):
    max_session_time = df.groupby(['user_id', 'session_30_raw'])['cum_session_time_raw'].max().reset_index().rename(columns={'cum_session_time_raw': 'max_session_time'})
    df = df.merge(max_session_time, on=['user_id', 'session_30_raw'], how='left')

    df['session_bin'] = pd.cut(df['max_session_time'], bins=SESSION_TIME_BINS, labels=SESSION_TIME_BINS[:-1])
    df_session_time = df[
        ['label', 'session_bin', 'experiment'] + \
        models
    ]
    
    df_session_time_melted = pd.melt(
        df_session_time,
        id_vars=['label', 'session_bin', 'experiment'],
    )
    
    df_session_time_melted = df_session_time_melted.dropna()
    df_session_time_melted = df_session_time_melted.rename(columns={'variable': 'model'})
    df_session_time_melted = df_session_time_melted.groupby(['session_bin', 'model', 'experiment']).apply(lambda x: roc_auc_score(x['label'], x['value'])).reset_index()
    df_session_time_melted = df_session_time_melted.rename(columns={0: 'auroc'})
    df_session_time_melted['Window'] = df_session_time_melted['model'].apply(lambda x: int(x.split(' ')[-1]))
    return df_session_time_melted

def generate_auc_plat(df, models):
    df['user_bin'] = pd.cut(df['glob_session_time_raw'], bins=USER_BINS, labels=USER_BINS[:-1])
    
    df_user_time = df[
        ['label', 'user_bin', 'experiment'] + \
        models
    ]
    
    df_user_time= df_user_time.melt(
        id_vars=['label', 'user_bin', 'experiment']
    )
    
    df_user_time = df_user_time.dropna()
    df_user_time = df_user_time.rename(columns={0: 'auroc', 'variable': 'model'})
    df_user_time = df_user_time.groupby(['user_bin', 'model', 'experiment']).apply(lambda x: roc_auc_score(x['label'], x['value'])).reset_index()
    df_user_time = df_user_time.rename(columns={0: 'auroc'})
    df_user_time['Window'] = df_user_time['model'].apply(lambda x: int(x.split(' ')[-1]))
    return df_user_time

In [4]:
def plot_contours(df, xtitle, ytitle, ztitle, target_var, save_suffix, title): 
    x_var, y_var, z_var, target_var = df[xtitle], df[ytitle], df[ztitle], df[target_var]
    delaney_points = np.vstack([x_var, y_var]).T
    
    I, J, K = Delaunay(delaney_points).simplices.T
    fig = go.Figure(go.Mesh3d(x=x_var, y=y_var, z=z_var, i=I, j=J, k=K, intensity=target_var, colorscale='thermal', alphahull=.5))
    fig.update_layout(
        height=1000,
        width=1000,
        scene =dict(
        xaxis_title=xtitle,
        yaxis_title=ytitle,
        zaxis_title=ztitle
    ))
    fig.update_layout(title=title)
    fig.write_image(
        os.path.join(f'lstm_plots/metric_graphs/auroc_granular/{save_suffix}.png'), width=800, height=800, scale=3
    )
    
    

In [5]:
df_auc_10 = pd.read_parquet(os.path.join(CORE_PATH, 'data_for_auc_window_10_eval'))
df_auc_20 = pd.read_parquet(os.path.join(CORE_PATH, 'data_for_auc_window_20_eval'))
df_auc_30 = pd.read_parquet(os.path.join(CORE_PATH, 'data_for_auc_window_30_eval'))

df_auc_10['experiment'] = 10
df_auc_20['experiment'] = 20
df_auc_30['experiment'] = 30


eval_auc_df = pd.concat(
    [df_auc_10.head(int(df_auc_10.shape[0] / 2)), df_auc_20.head(int(df_auc_20.shape[0] / 2)), df_auc_30.head(int(df_auc_30.shape[0] / 2))]
)

test_auc_df = pd.concat(
    [df_auc_10.tail(int(df_auc_10.shape[0] / 2)), df_auc_20.tail(int(df_auc_20.shape[0] / 2)), df_auc_30.tail(int(df_auc_30.shape[0] / 2))]
)




In [6]:
eval_auc = generate_auc_sess(eval_auc_df, MODEL_LIST)
test_auc = generate_auc_sess(test_auc_df, MODEL_LIST)

plot_contours(
    eval_auc,
    'session_bin',
    'Window',
    'experiment',
    'auroc',
    'eval_auroc_sess',
   'Evaluation AUROC | Session Size'
)

plot_contours(
    test_auc,
    'session_bin',
    'Window',
    'experiment',
    'auroc',
    'test_auroc_sess',
    'Test AUROC | Session Size'
)

In [7]:
eval_auc_plat = generate_auc_plat(eval_auc_df, MODEL_LIST)
test_auc_plat = generate_auc_plat(test_auc_df, MODEL_LIST)
plot_contours(
    eval_auc_plat,
    'user_bin',
    'Window',
    'experiment',
    'auroc',
    'eval_auroc_plat',
    'Evaluation AUROC | Cumulative Session Time'
    
)
plot_contours(
    test_auc_plat,
    'user_bin',
    'Window',
    'experiment',
    'auroc',
    'test_auroc_plat',
    'Test AUROC | Cumulative Session Time'
)