In [2]:
!pip install statsmodels plotly --quiet

You should consider upgrading via the '/Users/danielmiller/.pyenv/versions/3.9.15/envs/torch_rl/bin/python3.9 -m pip install --upgrade pip' command.[0m[33m
[0m

In [3]:
USE_GPU = False
from typing import List, Tuple, Dict, Any, Optional
import seaborn as sns
from sklearn.metrics import roc_auc_score, accuracy_score, precision_score, recall_score, f1_score
import statsmodels.api as sm
import re
import plotly.express as px
import tensorboard
import pandas as pd
import numpy as np
import seaborn as sns
from tensorboard.backend.event_processing.event_accumulator import EventAccumulator
import matplotlib.pyplot as plt
import os
import glob
from typing import Dict
from statsmodels.tsa.filters.hp_filter import hpfilter
import re
from datetime import datetime
import plotly.offline as pyo
import plotly.io as pio



In [4]:
pio.renderers.default = 'notebook+pdf'
# pyo.init_notebook_mode()

In [5]:


TIME_STATS = [
    'sess_time/ended_time',
    'sess_time/session_minutes',
    'sess_time/time_cutoff',
    'sess_time/time_large',
    'sess_time/time_medium',
    'sess_time/time_small',
]

TIME_STATS_GRANULAR = [
    'ended_time', 
    'session_minutes', 
    'time_cutoff', 
    'time_large', 
    'time_medium', 
    'time_small'
]

SIZE_STATS_GRANULAR = [
    'ended_event',
    'session_size',
    'size_cutoff',
    'inc_small',
    'inc_medium',
    'inc_large'
]
    

In [21]:
def tensorboard_results(log_matrix, scalars):
    
    log_df = []
    for model, log_dir in log_matrix.items():
        print(f'Getting {model} results')
        events = EventAccumulator(log_dir)
        events.Reload()
        stats_summary_matrix = {}
        for scalar_key in scalars:
            stats = events.Scalars(scalar_key)
            stats_summary = pd.DataFrame({
                scalar_key: [x.value for x in stats],
            })
            
            stats_summary_matrix[scalar_key] = stats_summary[scalar_key] \
                .rolling(20, center=True) \
                .mean() \
                .reset_index(drop=True)
            
            
            stats_summary_matrix[scalar_key] = stats_summary[scalar_key][::20]
        df = pd.DataFrame(stats_summary_matrix)
        df['model'] = model
        df['step'] = df.index.values

        log_df.append(df)
    final_df = pd.concat(log_df, axis=0).reset_index(drop=True)

    return final_df
    
 

def plot_vectors(df, y, title):

    df = df[df['step'] < 6500]
    fig = px.line(
        df,
        x='step',
        y=y,
        color='model',
    )
    
    fig.update_layout(
        width=900,
        height=500,
        xaxis_title='Step',
        yaxis_title='Mean Episode Reward',
    )
    
    
    fig.show()


    
def df_by_time_window(df, model_name):
    df = df.drop(columns=['Unnamed: 0'])
    df = df.rename(columns={'exp_runs': 'exp_count'})
    df = df.groupby(['exp_count']).mean().reset_index()
    
    df['model'] = model_name
    df['step'] = df['exp_count']
        
    print(df.columns)
    df['model'] = model_name
    for col in TIME_STATS_GRANULAR:
        cycle, trend = sm.tsa.filters.hpfilter(df[col], lamb=100)
        df[col] = trend 


    return df




In [22]:
TB_LOGS = 'dqn_tb'
CSV_LOGS = 'rl_results/dqn_csv'

In [31]:
log_dirs_q1_dqn = {
    'DQN CNN Label': 's3://dissertation-data-dmiller/experiments/dqn_label_cnn/2023-06-14_07-19-19/training_metrics/DQN_1/events.out.tfevents.1686727170.ng0zvix62l.32.0',
    'DQN CNN Pred': 's3://dissertation-data-dmiller/experiments/dqn_pred_cnn/2023-06-13_16-11-42/training_metrics/DQN_1/events.out.tfevents.1686672715.n98mxro6un.36.0',
    'DQN CNN No Label': 's3://dissertation-data-dmiller/experiments/dqn_None_cnn/2023-06-14_13-40-54/training_metrics/DQN_1/events.out.tfevents.1686750068.njz8jqixct.32.0',
    'DQN MLP Pred': 's3://dissertation-data-dmiller/experiments/dqn_pred_mlp/2023-06-14_20-10-32/training_metrics/DQN_1/events.out.tfevents.1686773443.nch2dyx01u.36.0',
    'DQN CNN Pred NoPen': 's3://dissertation-data-dmiller/experiments/dqn_pred_cnn/2023-06-21_11-40-04/training_metrics/DQN_1/events.out.tfevents.1687347610.n64lfafrco.177.0',
    'DQN CNN None NoPen': 's3://dissertation-data-dmiller/experiments/dqn_None_cnn/2023-06-20_15-27-49/training_metrics/DQN_1/events.out.tfevents.1687274881.nzma5ps5xq.32.0'
}


log_dirs_q2_dqn = {
    'QRDQN PRED CNN': 's3://dissertation-data-dmiller/experiments/q2/qr_dqn_pred_cnn/2023-07-16_17-49-24/training_metrics/QRDQN_1/',
    'A2C PRED CNN': 's3://dissertation-data-dmiller/experiments/q2/a2c_pred_cnn/2023-07-17_13-57-52/training_metrics/A2C_1/',
    'DQN PRED CNN': 's3://dissertation-data-dmiller/experiments/q2/dqn_pred_cnn/2023-07-13_15-22-43/training_metrics/DQN_1/',
    'DQN PRED CNN BASELINE': 's3://dissertation-data-dmiller/experiments/q2/dqn_pred_cnn/2023-07-20_09-18-23/training_metrics/DQN_1/'
}
results_dir = os.path.join('rl_stats', 'question_1')

if not os.path.exists(results_dir):
    os.makedirs(results_dir)


In [13]:
training_stats = tensorboard_results(log_dirs_q1_dqn, ['rollout/ep_rew_mean', 'train/loss', 'rollout/ep_len_mean'])


Getting DQN CNN Label results
Getting DQN CNN Pred results
Getting DQN CNN No Label results
Getting DQN MLP Pred results
Getting DQN CNN Pred NoPen results
Getting DQN CNN None NoPen results


In [33]:
training_stats_q2 = tensorboard_results(log_dirs_q2_dqn, ['rollout/ep_rew_mean'])

Getting QRDQN PRED CNN results
Getting A2C PRED CNN results
Getting DQN PRED CNN results
Getting DQN PRED CNN BASELINE results


In [24]:
plot_vectors(training_stats.copy(), 'rollout/ep_rew_mean', 'Episode Reward Mean')

In [34]:
plot_vectors(training_stats_q2.copy(), 'rollout/ep_rew_mean', 'Episode Reward Mean')

In [14]:
plot_vectors(training_stats.copy(), 'rollout/ep_len_mean', 'Episode Length Mean')

In [15]:
plot_vectors(training_stats.copy(), 'train/loss',  'Mean Losses')