In [1]:
USE_GPU = False
from typing import List, Tuple, Dict, Any, Optional
import seaborn as sns
from sklearn.metrics import roc_auc_score, accuracy_score, precision_score, recall_score, f1_score
import statsmodels.api as sm
import re
import plotly.express as px
import tensorboard
import pandas as pd
import numpy as np
import seaborn as sns
from tensorboard.backend.event_processing.event_accumulator import EventAccumulator
import matplotlib.pyplot as plt
import os
import glob
from typing import Dict
from statsmodels.tsa.filters.hp_filter import hpfilter
import re
from datetime import datetime
import plotly
import plotly.offline as pyo
import plotly.io as pio

from plotly.subplots import make_subplots
import plotly.graph_objects as go

In [2]:
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
pd.set_option('display.max_rows', 500)
pd.set_option('mode.chained_assignment', None)
pd.set_option('display.float_format', lambda x: '%.3f' % x)


In [3]:
pio.renderers.default = 'notebook+pdf'
# pyo.init_notebook_mode()

In [4]:


TIME_STATS = [
    'sess_time/ended_time',
    'sess_time/session_minutes',
    'sess_time/time_cutoff',
    'sess_time/time_large',
    'sess_time/time_medium',
    'sess_time/time_small',
]

TIME_STATS_GRANULAR = [
    'ended_time', 
    'session_minutes', 
    'time_cutoff', 
    'time_large', 
    'time_medium', 
    'time_small'
]

SIZE_STATS_GRANULAR = [
    'ended_event',
    'session_size',
    'size_cutoff',
    'inc_small',
    'inc_medium',
    'inc_large'
]
    

In [5]:

def plot_vectors(df, x_name, y, title):

    fig = px.line(
        df,
        x=x_name,
        y=y,
        color='model',
        title=f'Training {title}',
    )
    

    return fig
def plot_vectors_multiple(df, y, title, show=True):

    fig = px.line(
        df,
        x='step',
        y=y,
        title=f'Training {title}',
        showlegend=show
    )
    
    # fig.show()
    return fig


def global_perc_to_end(df_matrix, session_size_bound=None):
    move_to_end_matrix = {}
    for df_name, df_path in df_matrix.items():
        print(f'Calculating percentage done for {df_name} {df_path}')
        df = pd.read_csv(df_path)
        print(f'{df_name} before shape: {df.shape}')
        df = df[df['exp_runs'] <= 4000]
        if session_size_bound:
            df = df[
                (df['session_size'] >= session_size_bound[0]) & (df['session_size'] <= session_size_bound[1])
            ]
       
        df = df[df['ended_event'] > 0]
        df['perc_to_end'] = df['ended_event'] / df['session_size']
        perc_to_end_summary = df.sort_values(by='exp_runs')
        move_to_end_matrix[df_name] = perc_to_end_summary
        

    for df_name, df in move_to_end_matrix.items():
        print(f'{df_name} before rollup shape: {df.shape}')
        

        df = df.rolling(100, center=True) \
            .mean() \
            .dropna() \
            .reset_index(drop=True)
   
        df = df[::100].reset_index(drop=True) 
        df['step'] = df.index.values
        print(f'{df_name} after rollup shape: {df.shape}')
            
        # if 'A2C' in df_name:
        #     print(f'{df_name} before rollup shape: {df.shape}')
        #     df = df.rolling(5, center=True) \
        #         .mean() \
        #         .dropna() \
        #         .reset_index(drop=True)
        #     df = df[::5].reset_index(drop=True)
        #     df['step'] = df.index.values
        #     print(f'{df_name} after rollup shape: {df.shape}')
            
        df['model'] = df_name
        
        
        move_to_end_matrix[df_name] = df
        
    out_df = pd.concat(move_to_end_matrix.values())
    

    
    return out_df


def global_perc_done(df_matrix, session_size_bound=None):
    done_matrix = {}
    for df_name, df_path in df_matrix.items():
        print(f'Calculating percentage finished for {df_name} from {df_path}')
        df = pd.read_csv(df_path)
        df = df[df['exp_runs'] <= 4000]
        if session_size_bound:
            df = df[
                (df['session_size'] >= session_size_bound[0]) & (df['session_size'] <= session_size_bound[1])]
        
        df = df[df['ended_event'] > 0]
       
       
        df['perc_done'] = df['ended_event'] == df['session_size']
        perc_done_summary = df.groupby(['exp_runs']).agg(
            done_count=('perc_done', 'sum'),
            sess_count=('perc_done', 'count')
        ) \
        .reset_index()
        perc_done_summary['perc_done'] = perc_done_summary['done_count'] / perc_done_summary['sess_count']
        perc_done_summary['model'] = df_name
        done_matrix[df_name] = perc_done_summary
    
        
    
    out_df = pd.concat(done_matrix.values())
    out_df = out_df.rename(columns={'exp_runs': 'episode'})
    return out_df
    
    

    

PLOTLY_COLORS = plotly.colors.DEFAULT_PLOTLY_COLORS

def plot_granular(df, model_list, showlegend=False):
    plot_list = []
    for model, color in zip(model_list, PLOTLY_COLORS):
        plot_list.append(
            go.Scatter(
                x=df['episode'],
                y=df[model],
                name=model,
                line=dict(color=color),
                showlegend=showlegend
            )
        )
    
    
    return plot_list

                

In [6]:
TB_LOGS = 'dqn_tb'
CSV_LOGS = 'rl_results/dqn_csv'

In [7]:
csv_dirs = {
    'DQN PRED CNN': 'dqn_csv/train/q2/dqn_pred_cnn.csv',
    'DQN PRED CNN BASELINE': 'dqn_csv/train/q2/dqn_pred_cnn_baseline.csv',
    'QRDQN PRED CNN': 'dqn_csv/train/q2/qrdqn_pred_cnn.csv',
    'A2C PRED CNN': 'dqn_csv/train/q2/a2c_pred_cnn.csv',
}



In [8]:
perc_to_end = global_perc_to_end(csv_dirs)
perc_finished = global_perc_done(csv_dirs)

Calculating percentage done for DQN PRED CNN dqn_csv/train/q2/dqn_pred_cnn.csv
DQN PRED CNN before shape: (395608, 22)
Calculating percentage done for DQN PRED CNN BASELINE dqn_csv/train/q2/dqn_pred_cnn_baseline.csv
DQN PRED CNN BASELINE before shape: (471423, 15)
Calculating percentage done for QRDQN PRED CNN dqn_csv/train/q2/qrdqn_pred_cnn.csv
QRDQN PRED CNN before shape: (433515, 22)
Calculating percentage done for A2C PRED CNN dqn_csv/train/q2/a2c_pred_cnn.csv
A2C PRED CNN before shape: (424901, 22)
DQN PRED CNN before rollup shape: (395280, 23)
DQN PRED CNN after rollup shape: (3952, 24)
DQN PRED CNN BASELINE before rollup shape: (400000, 16)
DQN PRED CNN BASELINE after rollup shape: (4000, 17)
QRDQN PRED CNN before rollup shape: (400000, 23)
QRDQN PRED CNN after rollup shape: (4000, 24)
A2C PRED CNN before rollup shape: (400000, 23)
A2C PRED CNN after rollup shape: (4000, 24)
Calculating percentage finished for DQN PRED CNN from dqn_csv/train/q2/dqn_pred_cnn.csv
Calculating perce

In [9]:
mean_dqn_perc_end, std_dqn_perc_end = (
    perc_to_end[perc_to_end['model'] == 'DQN PRED CNN'].sort_values(by='exp_runs').tail(100)['perc_to_end'].mean(),
    perc_to_end[perc_to_end['model'] == 'DQN PRED CNN'].sort_values(by='exp_runs').tail(100)['perc_to_end'].std()
)

mean_dqn_base_perc_end, std_dqn_base_perc_end = (
    perc_to_end[perc_to_end['model'] == 'DQN PRED CNN BASELINE'].sort_values(by='exp_runs').tail(100)['perc_to_end'].mean(),
    perc_to_end[perc_to_end['model'] == 'DQN PRED CNN BASELINE'].sort_values(by='exp_runs').tail(100)['perc_to_end'].std()
)

mean_qrdqn_perc_end, std_qrdqn_perc_end = (
    perc_to_end[perc_to_end['model'] == 'QRDQN PRED CNN'].sort_values(by='exp_runs').tail(100)['perc_to_end'].mean(),
    perc_to_end[perc_to_end['model'] == 'QRDQN PRED CNN'].sort_values(by='exp_runs').tail(100)['perc_to_end'].std()
)

mean_a2c_perc_end, std_a2c_perc_end = (
    perc_to_end[perc_to_end['model'] == 'A2C PRED CNN'].sort_values(by='exp_runs').tail(100)['perc_to_end'].mean(),
    perc_to_end[perc_to_end['model'] == 'A2C PRED CNN'].sort_values(by='exp_runs').tail(100)['perc_to_end'].std()
)

print(f'DQN mean perc to end: {mean_dqn_perc_end} std: {std_dqn_perc_end}')
print(f'DQN baseline mean perc to end: {mean_dqn_base_perc_end} std: {std_dqn_base_perc_end}')
print(f'QRDQN mean perc to end: {mean_qrdqn_perc_end} std: {std_qrdqn_perc_end}')
print(f'A2C mean perc to end: {mean_a2c_perc_end} std: {std_a2c_perc_end}')

DQN mean perc to end: 0.7605635103340664 std: 0.018912154672918013
DQN baseline mean perc to end: 0.614904603630048 std: 0.011575233536353889
QRDQN mean perc to end: 0.6509472197494739 std: 0.011041031763578405
A2C mean perc to end: 0.6602616353445622 std: 0.01507686521325191


In [17]:
perc_to_end = perc_to_end[perc_to_end['step'] % 25 == 0]

dist_end_line = px.line(perc_to_end, x='step', y='perc_to_end', color='model')
dist_end_line.update_layout(
    xaxis_title='Episode',
    yaxis_title='Simulation Index / Session Size',
    width=800,
    height=500,
    margin=dict(l=50, r=50, b=50, t=50, pad=4))

dist_end_line.show()

pio.write_image(dist_end_line, 'rl_plots/q2/glob_perc_to_end.png')

In [24]:
# perc_finished = perc_finished[(perc_finished['episode'] % 25 == 0) & (perc_finished['episode'] <= 3750)]

print(perc_finished[perc_finished['model'] == 'DQN PRED CNN']['episode'].max())
print(perc_finished[perc_finished['model'] == 'QRDQN PRED CNN']['episode'].max())
print(perc_finished[perc_finished['model'] == 'A2C PRED CNN']['episode'].max())

4000
4000
4000


In [25]:
perc_finished = perc_finished.sort_values(by='episode')

In [33]:
perc_finished[perc_finished['model'] == 'DQN PRED CNN'].tail(1000)['perc_done'].mean()

0.19149057159334842

In [18]:
perc_finished = perc_finished[(perc_finished['episode'] % 25 == 0)]
perc_finished.sort_values(by='episode')
perc_finished_line = px.line(perc_finished, x='episode', y='perc_done', color='model')
perc_finished_line.update_layout(
    xaxis_title='Episode',
    yaxis_title='Sessions Completed (%)',
    width=800,
    height=500,
    margin=dict(l=50, r=50, b=50, t=50, pad=4)
)

perc_finished_line.show()

# pio.write_image(perc_finished_line, 'rl_plots/q2/glob_perc_finished.png')