In [2]:
import json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import plotly.express as px

In [3]:
def collectVersionData(env, version):
  BASE_DIR = "./.checkpoints"
  results_dir = f'{BASE_DIR}/{env}/{version}'
  
  data = []
  for folder in sorted(os.listdir(results_dir)):
    if 'result.json' in os.listdir(f'{results_dir}/{folder}'):
      data.append(json.load(open(f'{results_dir}/{folder}/result.json')))
      
  return data

def collectFromRayResults(results_path,folder_name):
  file = open(f"{results_path}/{folder_name}/result.json", "r")
  data = []
  for line in file:
    data.append(json.loads(line))
  file.close()
  return data

In [4]:
def getAllFromDict(keys, listOfDicts):
    data = {}
    for key in keys:
      data[key] =  [d[key] for d in listOfDicts]
    return data

In [22]:
def collectData(env, versions, stats=[], ray_results=False):
  SAMPLER_KEYS = [
    'episode_reward_max',
    'episode_reward_min',
    'episode_reward_mean',
    "episode_len_mean"
    ]
  DQN_LEARNER_KEYS = [
    'mean_q',
    'mean_td_error'
  ]
  PPO_LEARNER_KEYS =[
    "cur_lr",
    "total_loss"
  ]
  TIME_KEYS = [
    "episodes_total",
    "time_this_iter_s",
    "time_total_s"
  ]
  PERF_KEYS = [
    "cpu_util_percent",
    "ram_util_percent"
  ]
  
  history = pd.DataFrame()
  
  for version in versions:
    data = collectVersionData(env, version) if not ray_results else collectFromRayResults(ray_results['results_dir'],ray_results['folder_name'])
    for epoch in data:
      epoch_data = {
        'epoch': epoch["training_iteration"],
        'environment': env,
        'version': version
      }
      
      sampler = epoch['sampler_results']
      learner = epoch['info']['learner']['default_policy']['learner_stats']
      perf = epoch['perf']
      
      if 'SAMPLER' in stats:
        for key in SAMPLER_KEYS:
          epoch_data[key] = sampler[key]
      if 'DQN_LEARNER' in stats:
        for key in DQN_LEARNER_KEYS:
          epoch_data[key] = learner[key]
      if 'PPO_LEARNER' in stats:
        for key in PPO_LEARNER_KEYS:
          epoch_data[key] = learner[key]
      if 'TIME' in stats:
        for key in TIME_KEYS:
          epoch_data[key] = epoch[key]
      if 'PERF' in stats:
        for key in PERF_KEYS:
          epoch_data[key] = perf[key]
    
      df = pd.DataFrame(epoch_data, index=[len(history)+1])
      history = pd.concat([history,df])
    
  return history
  


In [6]:
def plotResults(data, env, versions,keys=[],x_value = 'epoch'):
  for key in keys:
    fig = px.line(data, x=x_value, y=key, color='version')
    fig.show()
    


In [7]:
# Collect data
ENV = 'CustomOffWorldDockerMonolithDiscreteSim-v0'
VERSIONS = [
    'depth_only_v2',
    'RGB_only_v1',
    'rgbd_v1'
    ]

ppo_data = collectData(ENV, VERSIONS,['SAMPLER','PPO_LEARNER','TIME','PERF'])
ppo_data

Unnamed: 0,epoch,environment,version,episode_reward_max,episode_reward_min,episode_reward_mean,episode_len_mean,cur_lr,total_loss,episodes_total,time_this_iter_s,time_total_s,cpu_util_percent,ram_util_percent
1,1,CustomOffWorldDockerMonolithDiscreteSim-v0,depth_only_v2,1.0,0.0,0.16667,36.62500,0.00005,-0.00128,24,198.378625,198.378625,84.88028,77.04613
2,2,CustomOffWorldDockerMonolithDiscreteSim-v0,depth_only_v2,1.0,0.0,0.21739,40.04348,0.00005,-0.00070,46,201.156595,399.535220,84.80488,79.42648
3,3,CustomOffWorldDockerMonolithDiscreteSim-v0,depth_only_v2,1.0,0.0,0.13636,44.18182,0.00005,-0.01210,66,279.636126,849.790208,91.60576,89.78471
4,4,CustomOffWorldDockerMonolithDiscreteSim-v0,depth_only_v2,1.0,0.0,0.15730,44.56180,0.00005,0.00074,89,289.799797,1139.590004,91.59807,90.52126
5,5,CustomOffWorldDockerMonolithDiscreteSim-v0,depth_only_v2,1.0,0.0,0.18000,43.23000,0.00005,0.00121,113,285.488732,1425.078737,91.21744,89.59582
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
223,96,CustomOffWorldDockerMonolithDiscreteSim-v0,rgbd_v1,1.0,0.0,0.56000,46.58000,0.00005,-0.01660,1601,208.451929,19545.250422,85.15034,70.92416
224,97,CustomOffWorldDockerMonolithDiscreteSim-v0,rgbd_v1,1.0,0.0,0.49000,48.44000,0.00005,-0.02461,1623,210.436869,19755.687291,84.96833,71.22333
225,98,CustomOffWorldDockerMonolithDiscreteSim-v0,rgbd_v1,1.0,0.0,0.49000,46.40000,0.00005,-0.02717,1638,207.933633,19963.620924,85.19226,71.35926
226,99,CustomOffWorldDockerMonolithDiscreteSim-v0,rgbd_v1,1.0,0.0,0.42000,52.26000,0.00005,-0.03035,1653,206.753668,20170.374592,85.05831,71.77966


In [8]:
plotResults(ppo_data,ENV, VERSIONS, ['episode_reward_mean'], x_value='epoch' )
plotResults(ppo_data,ENV, VERSIONS, ['episode_reward_mean'], x_value='episodes_total')
plotResults(ppo_data,ENV, VERSIONS, ['episode_reward_mean'], x_value='time_total_s')


In [24]:
dqn_data = collectData(ENV,["dqn_depth_v1"], ["SAMPLER","DQN_LEARNER","TIME", "PERF"], ray_results={'results_dir':"/home/sam/ray_results", 'folder_name':"DQN_CustomOffWorldDockerMonolithDiscreteSim-v0_2022-09-01_00-18-07lr1piv6r"})
dqn_data

Unnamed: 0,epoch,environment,version,episode_reward_max,episode_reward_min,episode_reward_mean,episode_len_mean,mean_q,mean_td_error,episodes_total,time_this_iter_s,time_total_s,cpu_util_percent,ram_util_percent
1,1,CustomOffWorldDockerMonolithDiscreteSim-v0,dqn_depth_v1,1.0,0.0,0.183857,44.035874,0.044934,0.020861,223,2026.473218,2026.473218,76.679382,68.704514
2,2,CustomOffWorldDockerMonolithDiscreteSim-v0,dqn_depth_v1,1.0,0.0,0.165138,45.995413,0.04486,-0.08627,441,2039.213573,4065.686792,76.918789,85.809859
3,3,CustomOffWorldDockerMonolithDiscreteSim-v0,dqn_depth_v1,1.0,0.0,0.185841,44.553097,0.069326,-0.018382,667,2059.494106,6125.180897,76.888196,86.503537
4,4,CustomOffWorldDockerMonolithDiscreteSim-v0,dqn_depth_v1,1.0,0.0,0.248889,44.826667,0.091298,-0.010901,892,2109.345118,8234.526015,76.883138,86.764624
5,5,CustomOffWorldDockerMonolithDiscreteSim-v0,dqn_depth_v1,1.0,0.0,0.236111,46.199074,0.132311,-0.078898,1108,2066.673707,10301.199723,76.941875,86.852545
6,6,CustomOffWorldDockerMonolithDiscreteSim-v0,dqn_depth_v1,1.0,0.0,0.192825,44.578475,0.17928,-0.014487,1331,2078.003396,12379.203119,76.947521,86.91264
7,7,CustomOffWorldDockerMonolithDiscreteSim-v0,dqn_depth_v1,1.0,0.0,0.226087,43.834783,0.194226,-0.016513,1561,2054.313837,14433.516956,77.089923,86.827962
8,8,CustomOffWorldDockerMonolithDiscreteSim-v0,dqn_depth_v1,1.0,0.0,0.219178,45.511416,0.238254,-0.03239,1780,2081.850764,16515.367719,76.977453,86.819982
9,9,CustomOffWorldDockerMonolithDiscreteSim-v0,dqn_depth_v1,1.0,0.0,0.18552,45.113122,0.301548,-0.026588,2001,2066.619357,18581.987076,76.976342,86.724568
10,10,CustomOffWorldDockerMonolithDiscreteSim-v0,dqn_depth_v1,1.0,0.0,0.252033,40.747967,0.310082,-0.008423,2247,2073.266448,20655.253524,77.097632,86.557787


In [29]:
plotResults(dqn_data,ENV, ['dqn_depth_v1'],x_value="epoch", keys=['episode_reward_mean'])
plotResults(dqn_data,ENV, ['dqn_depth_v1'],x_value="episodes_total", keys=['episode_reward_mean'])