In [1]:
USE_GPU = False
from typing import List, Tuple, Dict, Any, Optional
import seaborn as sns
from sklearn.metrics import roc_auc_score, accuracy_score, precision_score, recall_score, f1_score
import statsmodels.api as sm
import re
import plotly.express as px
import tensorboard
import pandas as pd
import numpy as np
import seaborn as sns
from tensorboard.backend.event_processing.event_accumulator import EventAccumulator
import matplotlib.pyplot as plt
import os
import glob
from typing import Dict
from statsmodels.tsa.filters.hp_filter import hpfilter
import re
from datetime import datetime
import plotly
import plotly.offline as pyo
import plotly.io as pio

from plotly.subplots import make_subplots
import plotly.graph_objects as go

In [56]:
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
pd.set_option('display.max_rows', 500)
pd.set_option('mode.chained_assignment', None)
pd.set_option('display.float_format', lambda x: '%.3f' % x)


In [3]:
pio.renderers.default = 'notebook+pdf'
# pyo.init_notebook_mode()

In [4]:


TIME_STATS = [
    'sess_time/ended_time',
    'sess_time/session_minutes',
    'sess_time/time_cutoff',
    'sess_time/time_large',
    'sess_time/time_medium',
    'sess_time/time_small',
]

TIME_STATS_GRANULAR = [
    'ended_time', 
    'session_minutes', 
    'time_cutoff', 
    'time_large', 
    'time_medium', 
    'time_small'
]

SIZE_STATS_GRANULAR = [
    'ended_event',
    'session_size',
    'size_cutoff',
    'inc_small',
    'inc_medium',
    'inc_large'
]
    

In [107]:

def plot_vectors(df, x_name, y, title):

    fig = px.line(
        df,
        x=x_name,
        y=y,
        color='model',
        title=f'Training {title}',
    )
    

    return fig
def plot_vectors_multiple(df, y, title, show=True):

    fig = px.line(
        df,
        x='step',
        y=y,
        title=f'Training {title}',
        showlegend=show
    )
    
    # fig.show()
    return fig


def global_perc_to_end(df_matrix, session_size_bound=None):
    move_to_end_matrix = {}
    for df_name, df_path in df_matrix.items():
        print(f'Calculating percentage done for {df_name} {df_path}')
        df = pd.read_csv(df_path)
        df = df[df['exp_runs'] <= 4000]
        if session_size_bound:
            df = df[
                (df['session_size'] >= session_size_bound[0]) & (df['session_size'] <= session_size_bound[1])
            ]
       
        df = df[df['ended_event'] > 0]
        df['perc_to_end'] = df['ended_event'] / df['session_size']
        perc_to_end_summary = df.sort_values(by='exp_runs')
        move_to_end_matrix[df_name] = perc_to_end_summary
        

    for df_name, df in move_to_end_matrix.items():
        
        if 'DQN' in df_name:
            print(f'{df_name} before rollup shape: {df.shape}')
            df = df.rolling(100, center=True) \
                .mean() \
                .dropna() \
                .reset_index(drop=True)
   
            df = df[::100].reset_index(drop=True) 
            df['step'] = df.index.values
            print(f'{df_name} after rollup shape: {df.shape}')
            
        if 'A2C' in df_name:
            print(f'{df_name} before rollup shape: {df.shape}')
            df = df.rolling(5, center=True) \
                .mean() \
                .dropna() \
                .reset_index(drop=True)
            df = df[::5].reset_index(drop=True)
            df['step'] = df.index.values
            print(f'{df_name} after rollup shape: {df.shape}')
            
        df['model'] = df_name
        
        
        move_to_end_matrix[df_name] = df
        
    out_df = pd.concat(move_to_end_matrix.values())
    

    
    return out_df


def global_perc_done(df_matrix, session_size_bound=None):
    done_matrix = {}
    for df_name, df_path in df_matrix.items():
        print(f'Calculating percentage finished for {df_name} from {df_path}')
        df = pd.read_csv(df_path)
        df = df[df['exp_runs'] <= 5000]
        if session_size_bound:
            df = df[
                (df['session_size'] >= session_size_bound[0]) & (df['session_size'] <= session_size_bound[1])]
        
        df = df[df['ended_event'] > 0]
       
       
        df['perc_done'] = df['ended_event'] == df['session_size']
        perc_done_summary = df.groupby(['exp_runs']).agg(
            done_count=('perc_done', 'sum'),
            sess_count=('perc_done', 'count')
        ) \
        .reset_index()
        perc_done_summary['perc_done'] = perc_done_summary['done_count'] / perc_done_summary['sess_count']
        perc_done_summary['model'] = df_name
        done_matrix[df_name] = perc_done_summary
    
        
    
    out_df = pd.concat(done_matrix.values())
    out_df = out_df.rename(columns={'exp_runs': 'episode'})
    return out_df
    
    

    

PLOTLY_COLORS = plotly.colors.DEFAULT_PLOTLY_COLORS

def plot_granular(df, model_list, showlegend=False):
    plot_list = []
    for model, color in zip(model_list, PLOTLY_COLORS):
        plot_list.append(
            go.Scatter(
                x=df['episode'],
                y=df[model],
                name=model,
                line=dict(color=color),
                showlegend=showlegend
            )
        )
    
    
    return plot_list

                

In [108]:
TB_LOGS = 'dqn_tb'
CSV_LOGS = 'rl_results/dqn_csv'

In [109]:
csv_dirs = {
    'DQN PRED CNN': 'dqn_csv/train/q2/dqn_pred_cnn.csv',
    'QRDQN PRED CNN': 'dqn_csv/train/q2/qrdqn_pred_cnn.csv',
    'A2C PRED CNN': 'dqn_csv/train/q2/a2c_pred_cnn.csv',
}



In [110]:
perc_to_end = global_perc_to_end(csv_dirs)
perc_finished = global_perc_done(csv_dirs)

Calculating percentage done for DQN PRED CNN dqn_csv/train/q2/dqn_pred_cnn.csv
Calculating percentage done for QRDQN PRED CNN dqn_csv/train/q2/qrdqn_pred_cnn.csv
Calculating percentage done for A2C PRED CNN dqn_csv/train/q2/a2c_pred_cnn.csv
DQN PRED CNN before rollup shape: (394223, 23)
DQN PRED CNN after rollup shape: (3942, 24)
QRDQN PRED CNN before rollup shape: (400000, 23)
QRDQN PRED CNN after rollup shape: (4000, 24)
A2C PRED CNN before rollup shape: (25780, 23)
A2C PRED CNN after rollup shape: (5156, 24)
Calculating percentage finished for DQN PRED CNN from dqn_csv/train/q2/dqn_pred_cnn.csv
Calculating percentage finished for QRDQN PRED CNN from dqn_csv/train/q2/qrdqn_pred_cnn.csv
Calculating percentage finished for A2C PRED CNN from dqn_csv/train/q2/a2c_pred_cnn.csv


In [111]:
dqn_pred = pd.read_csv(csv_dirs['DQN PRED CNN'])
qrdqn_pred = pd.read_csv(csv_dirs['QRDQN PRED CNN'])
a2c_pred = pd.read_csv(csv_dirs['A2C PRED CNN'])

In [112]:
print(a2c_pred['exp_runs'].count())
print(qrdqn_pred['exp_runs'].count())
print(dqn_pred['exp_runs'].count())

25780
432862
394265


In [113]:
print(perc_to_end[perc_to_end['model'] == 'DQN PRED CNN'].shape)
print(perc_to_end[perc_to_end['model'] == 'QRDQN PRED CNN'].shape)
print(perc_to_end[perc_to_end['model'] == 'A2C PRED CNN'].shape)

(3942, 25)
(4000, 25)
(5156, 25)


In [114]:
mean_dqn_perc_end, std_dqn_perc_end = (
    perc_to_end[perc_to_end['model'] == 'DQN PRED CNN'].sort_values(by='exp_runs').tail(10)['perc_to_end'].mean(),
    perc_to_end[perc_to_end['model'] == 'DQN PRED CNN'].sort_values(by='exp_runs').tail(10)['perc_to_end'].std()
)

mean_qrdqn_perc_end, std_qrdqn_perc_end = (
    perc_to_end[perc_to_end['model'] == 'QRDQN PRED CNN'].sort_values(by='exp_runs').tail(10)['perc_to_end'].mean(),
    perc_to_end[perc_to_end['model'] == 'QRDQN PRED CNN'].sort_values(by='exp_runs').tail(10)['perc_to_end'].std()
)

mean_a2c_perc_end, std_a2c_perc_end = (
    perc_to_end[perc_to_end['model'] == 'A2C PRED CNN'].sort_values(by='exp_runs').tail(10)['perc_to_end'].mean(),
    perc_to_end[perc_to_end['model'] == 'A2C PRED CNN'].sort_values(by='exp_runs').tail(10)['perc_to_end'].std()
)

print(f'DQN mean perc to end: {mean_dqn_perc_end} std: {std_dqn_perc_end}')
print(f'QRDQN mean perc to end: {mean_qrdqn_perc_end} std: {std_qrdqn_perc_end}')
print(f'A2C mean perc to end: {mean_a2c_perc_end} std: {std_a2c_perc_end}')

DQN mean perc to end: 0.6866023532230889 std: 0.014583192479683161
QRDQN mean perc to end: 0.6647917985554489 std: 0.009817008277803005
A2C mean perc to end: 0.7548205943965149 std: 0.0641652706488363


In [85]:
perc_to_end = perc_to_end[(perc_to_end['step'] % 25 == 0) & (perc_to_end['step'] <= 3750)]
perc_comp_line = px.line(perc_to_end, x='step', y='perc_to_end', color='model')
perc_comp_line.update_layout(
    xaxis_title='Step',
    yaxis_title='Percentage of session completed',
    title='Percentage of session completed by training step',
)

perc_comp_line.show()
pio.write_image(perc_comp_line, 'rl_plots/q2/glob_perc_to_end.png')

In [115]:
perc_finished = perc_finished[(perc_finished['episode'] % 25 == 0) & (perc_finished['episode'] <= 3750)]

print(perc_finished[perc_finished['model'] == 'DQN PRED CNN']['episode'].max())
print(perc_finished[perc_finished['model'] == 'QRDQN PRED CNN']['episode'].max())
print(perc_finished[perc_finished['model'] == 'A2C PRED CNN']['episode'].max())

3750
3750
275


In [116]:
a2c_finished, dqn_finished, qrdqn_finished = (
    perc_finished[perc_finished['model'] == 'A2C PRED CNN'],
    perc_finished[perc_finished['model'] == 'DQN PRED CNN'],
    perc_finished[perc_finished['model'] == 'QRDQN PRED CNN']
)

a2c_finished, dqn_finished, qrdqn_finished = (
    a2c_finished.sort_values(by='episode').tail(10),
    dqn_finished.sort_values(by='episode').tail(10),
    qrdqn_finished.sort_values(by='episode').tail(10)
)

In [117]:
a2c_stat = a2c_finished.sort_values(by=['episode'])[['episode', 'perc_done']].agg({'mean', 'std'})
a2c_stat['model'] = 'A2C PRED CNN'

In [118]:
dqn_stat = dqn_finished.sort_values(by=['episode'])[['episode', 'perc_done']].agg({'mean', 'std'})
dqn_stat['model'] = 'DQN PRED CNN'

In [119]:
qrdqn_stat = qrdqn_finished.sort_values(by=['episode'])[['episode', 'perc_done']].agg({'mean', 'std'})
qrdqn_stat['model'] = 'QRDQN PRED CNN'

In [120]:
q2_stat_df = pd.concat([a2c_stat, dqn_stat, qrdqn_stat]).drop(columns=['episode'])

In [121]:
q2_stat_df

Unnamed: 0,perc_done,model
mean,0.195,A2C PRED CNN
std,0.127,A2C PRED CNN
mean,0.168,DQN PRED CNN
std,0.041,DQN PRED CNN
mean,0.045,QRDQN PRED CNN
std,0.021,QRDQN PRED CNN


In [122]:
print(q2_stat_df.pivot_table(index='model', aggfunc={'mean', 'std'}).reset_index().round(3).reset_index(drop=True).to_latex())

\begin{tabular}{llrr}
\toprule
 & model & \multicolumn{2}{r}{perc_done} \\
 &  & mean & std \\
\midrule
0 & A2C PRED CNN & 0.161000 & 0.048000 \\
1 & DQN PRED CNN & 0.104000 & 0.090000 \\
2 & QRDQN PRED CNN & 0.033000 & 0.017000 \\
\bottomrule
\end{tabular}

