# Evaluation of DQN Models
This notebook will compare the rewards between each of the DQN model implementations

In [94]:
import numpy as np
import pandas as pd
import os
import altair as alt

## Reward Convergence
Compare convergence of total reward for each city comparing base to tuned model.

In [27]:
# find all files in reward averaged location and compile
path = 'rewards/averaged'
rewards = os.listdir(path)
# exclude test data
rewards = [f for f in rewards if 'test' not in f]
result_lst = []
# iterate over files and append dicts to list
for f in rewards:
  lst = np.load(f'{path}/{f}').tolist()
  term_lst = f.split('_')
  model_type = term_lst[0]
  city = term_lst[1]
  if city == 'LosAngeles': city = 'Los Angeles'
  if city == 'NewYork': city = 'New York'
  result_dict = {'model_type' : model_type,
                 'city'       : city,
                 'results'    : lst}
  result_lst.append(result_dict)

In [37]:
# create df and explode result list
result_df = pd.DataFrame(result_lst).explode('results')
result_df['episode'] = result_df.groupby(['model_type','city']).cumcount() + 1
result_df.head()

Unnamed: 0,model_type,city,results,episode
0,base,Chicago,-244.0,1
0,base,Chicago,-228.0,2
0,base,Chicago,-228.0,3
0,base,Chicago,-269.0,4
0,base,Chicago,-247.0,5


In [87]:
# create viz
reward_conv = alt.Chart(result_df).mark_line().encode( 
  x = alt.X('episode').title('Episode Number'), 
  y = alt.Y('results').title('Total Reward'),
  color = alt.Color('model_type', legend = alt.Legend(
    orient = 'none', legendX = 580, legendY = -18, direction = 'horizontal')).title(None)
).properties(width = 700, height = 100).facet( 
  alt.Facet('city:N', title = None), 
  columns = 1
).properties(title = 'Episodic Reward - Baseline vs. Tuned Model')

reward_conv.configure_title(
  fontSize = 15,
  anchor = 'start',
  fontWeight = 500)

## Test Reward
Compare total test reward of baseline vs. tuned models.

In [115]:
# process df
test_reward_df = pd.read_json('rewards/test_results.json').T.reset_index()
test_reward_df['model_type'] = test_reward_df['index'].str.split('_').str[0]
test_reward_df['city'] = test_reward_df['index'].str.split('_').str[1]
test_reward_df.drop(['index'], axis = 1, inplace = True)
test_reward_df.replace('LosAngeles', 'Los Angeles', inplace = True)
test_reward_df.replace('NewYork', 'New York', inplace = True)
test_reward_df

Unnamed: 0,total_reward,reward_lst,window_size,model_type,city
0,-28.0,"[-0.0, -0.0, -2.0, -2.0, -2.0, -2.0, -1.0, -0....",3,base,Chicago
1,-20.0,"[-0.0, -1.0, -1.0, -0.0, -0.0, -0.0, -0.0, -0....",3,tuned,Dallas
2,-29.0,"[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -2....",3,tuned,Chicago
3,-13.0,"[-1.0, -0.0, -0.0, -0.0, -0.0, -0.0, -0.0, -0....",3,base,Dallas
4,-20.0,"[-2.0, -0.0, -0.0, -0.0, -0.0, -0.0, -0.0, -0....",3,base,Los Angeles
5,-35.0,"[-0.0, -0.0, -1.0, -2.0, -2.0, -2.0, -2.0, -1....",3,base,New York
6,-30.0,"[-0.0, -2.0, -2.0, -2.0, -2.0, -2.0, -2.0, -2....",3,tuned,Los Angeles
7,-46.0,"[-2.0, -0.0, -0.0, -0.0, -0.0, -0.0, -0.0, -0....",3,tuned,Seattle
8,-47.0,"[-1.0, -1.0, -1.0, -2.0, -2.0, -2.0, -2.0, -2....",3,base,Seattle
9,-28.0,"[-1.0, -1.0, -0.0, -1.0, -1.0, -1.0, -1.0, -1....",3,tuned,New York


In [132]:
# create viz
test_reward = alt.Chart(test_reward_df).mark_bar().encode(
  x = alt.X('model_type:N', title = None),
  y = alt.Y('total_reward:Q', title = 'Total Reward'),
  color = alt.Color('model_type:N', legend = None),
  column = alt.Column('city:N', title = None)
).properties(title = 'Test Reward - Baseline vs. Tuned Model')

test_reward.configure_title(
  fontSize = 15,
  anchor = 'start',
  fontWeight = 500)

## Combined Train and Test Rewards
A visualization showing the averaged reward of all episodes during training concatenated to the reward achieved when the trained model was pitted against the test data.

In [160]:
# find all files in reward episodic location and compile
path = 'rewards/episodic'
rewards = os.listdir(path)
# exclude test data
rewards = [f for f in rewards if 'test' not in f]
step_result_lst = []
# iterate over files and append dicts to list
for f in rewards:
  ary = np.load(f'{path}/{f}')
  lst = list(np.mean(ary, axis = 0))
  term_lst = f.split('_')
  model_type = term_lst[0]
  city = term_lst[1]
  if city == 'LosAngeles': city = 'Los Angeles'
  if city == 'NewYork': city = 'New York'
  result_dict = {'model_type' : model_type,
                 'city'       : city,
                 'results'    : lst}
  step_result_lst.append(result_dict)

In [161]:
# create df and explode result list
step_result_df = pd.DataFrame(step_result_lst).explode('results')
step_result_df['episode'] = step_result_df.groupby(['model_type','city']).cumcount() + 1
step_result_df['stage'] = 'train'
step_result_df.head()

Unnamed: 0,model_type,city,results,episode,stage
0,base,Chicago,-1.26,1,train
0,base,Chicago,-1.42,2,train
0,base,Chicago,-3.37,3,train
0,base,Chicago,-3.82,4,train
0,base,Chicago,-3.63,5,train


In [162]:
# format test reward df to explode individual steps
test_reward_explode_df = test_reward_df[['model_type', 'city', 'reward_lst']].explode('reward_lst')
test_reward_explode_df.rename({'reward_lst' : 'results'}, axis = 1, inplace = True)
# use max episode in train data to start cumulative count
cumcount_start = step_result_df.episode.max() + 1
test_reward_explode_df['episode'] = test_reward_explode_df.groupby(['model_type','city']).cumcount() + cumcount_start
test_reward_explode_df['stage'] = 'test'
test_reward_explode_df.head()

Unnamed: 0,model_type,city,results,episode,stage
0,base,Chicago,-0.0,121,test
0,base,Chicago,-0.0,122,test
0,base,Chicago,-2.0,123,test
0,base,Chicago,-2.0,124,test
0,base,Chicago,-2.0,125,test


In [163]:
# concat df's
concat_df = pd.concat([step_result_df, test_reward_explode_df])

In [183]:
# create viz
concat_line = alt.Chart(concat_df).mark_line().encode( 
  x = alt.X('episode', title = 'Episode Number'), 
  y = alt.Y('results', title = 'Total Reward'),
  color = alt.Color('model_type', 
                    legend = alt.Legend(title = 'Model Type')),
  strokeDash = alt.StrokeDash('stage', sort = 'descending', 
                              legend = alt.Legend(title = 'Stage'))
).properties(width = 700, height = 100).facet( 
  alt.Facet('city:N', title = None), 
  columns = 1
).properties(title = 'Episodic Reward - Baseline vs. Tuned Model')

concat_line.configure_title(
  fontSize = 15,
  anchor = 'start',
  fontWeight = 500)