In [1]:
import pandas as pd
from statsmodels.tsa.filters.hp_filter import hpfilter
import numpy as np
import re
from scipy.spatial import Delaunay
from scipy.special import kl_div
from datetime import datetime
import plotly
import plotly.offline as pyo
import plotly.io as pio
import plotly.graph_objects as go
import random
PLOTLY_COLORS = plotly.colors.DEFAULT_PLOTLY_COLORS

pio.renderers.defaults= 'notebook+pdf'

np.random.seed(42)
random.seed(42)

In [None]:
def session_bucket(x):
    if x <= 20:
        return 'small'
    if all([x > 20, x<= 40]):
        return 'mid'
    if all([x > 40, x<= 60]):
        return 'large'

    return 'max'

def plot_contours(df, xtitle, ytitle, ztitle, target_var):
    x_var, y_var, z_var, target_var = df[xtitle], df[ytitle], df[ztitle], df[target_var]
    delaney_points = np.vstack([x_var, y_var]).T
    
    I, J, K = Delaunay(delaney_points).simplices.T
    fig = go.Figure(go.Mesh3d(x=x_var, y=y_var, z=z_var, i=I, j=J, k=K, intensity=target_var, colorscale='thermal', alphahull=.5))
    fig.update_layout(
        height=500,
        width=750,
        scene =dict(
        xaxis_title=xtitle,
        yaxis_title=ytitle,
        zaxis_title=ztitle
    ))
    fig.update_layout(margin=dict(l=0, r=0, b=0, t=10))

    fig.show()
    
    

def plot_contours_kl(baseline_df, sensitivity_df, resample=500_000):
    

    sensitivity_df = sensitivity_df.rename(columns={
        'inc_small': 'inc_small_s',
        'inc_medium': 'inc_mid_s',
        'inc_large': 'inc_large_s',
        'ended_event': 'ended_event_s',
        'normalized_distance': 'norm_dist_s'

    })
  
    sensitivity_df = sensitivity_df.sample(baseline_df.shape[0], replace=True)
    sens_joined = sensitivity_df[['session_size', 'ended_event_s', 'inc_small_s', 'inc_mid_s', 'inc_large_s', 'window', 'mid', 'large', 'norm_dist_s']].set_index('session_size') \
        .join(baseline_df[['session_size', 'ended_event', 'normalized_distance']].set_index('session_size'), how='inner') \
        .reset_index() \
        .sample(500_000)
        
    
    sens_joined['kl_div_ended'] = kl_div(sens_joined['ended_event'], sens_joined['ended_event_s'])
    # return sens_joined
    
    plot_contours(sens_joined, 'mid', 'large', 'window', 'kl_div_ended') 


def plot_contours_kl_2(baseline_df, sensitivity_df, resample=500_000):
    sensitivity_df = sensitivity_df.rename(columns={
        'inc_small': 'inc_small_s',
        'inc_medium': 'inc_mid_s',
        'inc_large': 'inc_large_s',
        'ended_event': 'ended_event_s',
        "normalized_distance": "norm_dist_s"
    })
    
    sensitivity_df = sensitivity_df.sample(baseline_df.shape[0] * 4, replace=True)
    print(sensitivity_df.columns)
    sess_joined = sensitivity_df[['session_size', 'ended_event_s', 'inc_small_s', 'inc_mid_s', 'inc_large_s', 'window', 'mid', 'large', 'norm_dist_s', 'soc_freq']].set_index('session_size') \
        .join(baseline_df[['session_size', 'ended_event', 'normalized_distance']].set_index('session_size'), how='inner') \
        .reset_index() \
        .sample(resample)
        
    sess_joined['kl_div_ended'] = kl_div(sess_joined['ended_event'], sess_joined['ended_event_s']) 
    plot_contours(sess_joined, 'large', 'window', 'soc_freq', 'kl_div_ended')
    

def _cutoff(inc, bucket):
    if bucket == 'small':
        return inc >= 10
    
    if bucket == 'medium':
        return inc >= 20

    return inc >= 30


In [3]:
# df_sensitivity = pd.read_parquet('dqn_parquet/sensitivity_analysis/dqn_pred_cnn.parquet')
# df_sensitivity['normalized_distance'] = df_sensitivity['ended_event'] / df_sensitivity['session_size']
# df_sensitivity['session_bucket'] = df_sensitivity['session_size'].apply(session_bucket)

# df_eval = pd.read_parquet('dqn_parquet/eval/dqn_pred_cnn_15000_no_pen.parquet')
# df_eval['normalized_distance'] = df_eval['ended_event'] / df_eval['session_size']
# df_eval['session_bucket'] = df_eval['session_size'].apply(session_bucket)
# df_eval['before_cutoff_small'] = df_eval[['inc_small', 'session_bucket']].apply(lambda x: _cutoff(x['inc_small'], x['session_bucket']), axis=1)
# df_eval['before_cutoff_mid'] = df_eval[['inc_medium', 'session_bucket']].apply(lambda x: _cutoff(x['inc_medium'], x['session_bucket']), axis=1)
# df_eval['before_cutoff_large'] = df_eval[['inc_large', 'session_bucket']].apply(lambda x: _cutoff(x['inc_large'], x['session_bucket']), axis=1)

In [4]:
# plot_contours_kl(df_eval[df_eval['session_bucket'] == 'small'], df_sensitivity[df_sensitivity['session_bucket'] == 'small'])

In [5]:
# plot_contours_kl(df_eval[df_eval['session_bucket'] == 'mid'], df_sensitivity[df_sensitivity['session_bucket'] == 'mid'])

In [6]:
# df_eval

In [7]:
# plot_contours_kl(
#     df_eval[(df_eval['session_bucket'] == 'large') |(df_eval['session_bucket'] == 'max')],
#     df_sensitivity[(df_sensitivity['session_bucket'] == 'large') |(df_sensitivity['session_bucket'] == 'max')],
# )


In [8]:
# df_sensitivity_q2 = pd.read_parquet('dqn_parquet/sensitivity_analysis/q2/dqn_pred_cnn_dist_enc_enforce.parquet')
# df_eval_q2 = pd.read_parquet('dqn_parquet/eval/q2/dqn_pred_cnn_dist_enc_enforce_15000.parquet')

In [9]:
# df_sensitivity_q2 = df_sensitivity_q2.fillna(0)

# df_sensitivity_q2['normalized_distance'] = df_sensitivity_q2['ended_event'] / df_sensitivity_q2['session_size']
# df_eval_q2['normalized_distance'] = df_eval_q2['ended_event'] / df_eval_q2['session_size']

# df_sensitivity_q2['session_bucket'] = df_sensitivity_q2['session_size'].apply(session_bucket)
# df_eval_q2['session_bucket'] = df_eval_q2['session_size'].apply(session_bucket)

In [10]:
# plot_contours_kl_2(df_eval_q2[df_eval_q2['session_bucket'] == 'small'], df_sensitivity_q2[df_sensitivity_q2['session_bucket'] == 'small'])

In [11]:
# plot_contours_kl_2(df_eval_q2[df_eval_q2['session_bucket'] == 'mid'], df_sensitivity_q2[df_sensitivity_q2['session_bucket'] == 'mid'])

In [12]:
# plot_contours_kl_2(df_eval_q2[df_eval_q2['session_bucket'] == 'large'], df_sensitivity_q2[df_sensitivity_q2['session_bucket'] == 'large'])

In [13]:
# plot_contours_kl_2(df_eval_q2[df_eval_q2['session_bucket'] == 'max'], df_sensitivity_q2[df_sensitivity_q2['session_bucket'] == 'max'], 100_000)