In [1]:
import json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import plotly.express as px

In [2]:
def collectVersionData(env, version):
  BASE_DIR = "./.checkpoints"
  results_dir = f'{BASE_DIR}/{env}/{version}'
  
  data = []
  for folder in sorted(os.listdir(results_dir)):
    if 'result.json' in os.listdir(f'{results_dir}/{folder}'):
      data.append(json.load(open(f'{results_dir}/{folder}/result.json')))
      
  return data

In [3]:
def getAllFromDict(keys, listOfDicts):
    data = {}
    for key in keys:
      data[key] =  [d[key] for d in listOfDicts]
    return data

In [11]:
def collectData(env, versions, stats=[]):
  SAMPLER_KEYS = [
    'episode_reward_max',
    'episode_reward_min',
    'episode_reward_mean',
    "episode_len_mean"
    ]
  DQN_LEARNER_KEYS = [
    'mean_q',
    'mean_td_error'
  ]
  PPO_LEARNER_KEYS =[
    "cur_lr",
    "total_loss"
  ]
  TIME_KEYS = [
    "time_this_iter_s",
  ]
  PERF_KEYS = [
    "cpu_util_percent",
    "ram_util_percent"
  ]
  
  history = pd.DataFrame()
  
  for version in versions:
    data = collectVersionData(env, version)
    epoch_index = 1
    for epoch in data:
      epoch_data = {
        'epoch': epoch["training_iteration"],
        'environment': env,
        'version': version
      }
      
      sampler = epoch['sampler_results']
      learner = epoch['info']['learner']['default_policy']['learner_stats']
      perf = epoch['perf']
      
      if 'SAMPLER' in stats:
        for key in SAMPLER_KEYS:
          epoch_data[key] = sampler[key]
      if 'DQN_LEARNER' in stats:
        for key in DQN_LEARNER_KEYS:
          epoch_data[key] = learner[key]
      if 'PPO_LEARNER' in stats:
        for key in PPO_LEARNER_KEYS:
          epoch_data[key] = learner[key]
      if 'TIME' in stats:
        for key in TIME_KEYS:
          epoch_data[key] = epoch[key]
      if 'PERF' in stats:
        for key in PERF_KEYS:
          epoch_data[key] = perf[key]
    
      df = pd.DataFrame(epoch_data, index=[len(history)+1])
      epoch_index += 1
      history = pd.concat([history,df])
    
  return history
  


In [12]:
def plotResultsVsEpoch(data, env, versions, keys=[]):
  

  
  for key in keys:
    fig = px.line(data, x='epoch', y=key, color='version')
    fig.show()
  #PLOTTING
  
  # 


In [13]:
ENV = 'BreakoutDeterministic-v4'
VERSIONS = [
    'dqn_v1',
    'dqn_v2',
    'dqn_v3',
    'dqn_v4'
    ]

data = collectData(ENV, VERSIONS,['SAMPLER','DQN_LEARNER','TIME','PERF'])
data

Unnamed: 0,epoch,environment,version,episode_reward_max,episode_reward_min,episode_reward_mean,episode_len_mean,mean_q,mean_td_error,time_this_iter_s,cpu_util_percent,ram_util_percent
1,10,BreakoutDeterministic-v4,dqn_v1,5.0,0.0,0.91,180.49,2.92885,-0.12368,11.537833,50.76250,91.79375
2,20,BreakoutDeterministic-v4,dqn_v1,4.0,0.0,0.86,176.17,2.49939,-0.12261,11.467407,51.51176,90.55882
3,30,BreakoutDeterministic-v4,dqn_v1,4.0,0.0,1.01,179.58,1.88376,-0.05798,11.072473,48.85333,90.24667
4,40,BreakoutDeterministic-v4,dqn_v1,5.0,0.0,1.31,192.15,1.92782,-0.07334,11.363265,50.48750,90.45000
5,50,BreakoutDeterministic-v4,dqn_v1,7.0,0.0,1.95,213.93,1.75707,-0.08303,11.049173,49.70625,88.84375
...,...,...,...,...,...,...,...,...,...,...,...,...
91,300,BreakoutDeterministic-v4,dqn_v4,18.0,8.0,13.66,462.85,1.09318,0.00199,9.990787,57.31429,93.26429
92,350,BreakoutDeterministic-v4,dqn_v4,18.0,6.0,14.93,468.70,0.90136,-0.01597,9.645812,55.11429,91.50000
93,400,BreakoutDeterministic-v4,dqn_v4,20.0,7.0,16.44,498.81,0.78947,0.02038,9.675423,55.12857,92.07857
94,450,BreakoutDeterministic-v4,dqn_v4,19.0,6.0,16.61,496.11,0.96394,-0.00252,9.574791,55.64286,92.20000


In [14]:

plotResultsVsEpoch(data, ENV, VERSIONS, ['episode_reward_mean','episode_len_mean'])