# Base DQN
This notebook will use the classes and functions defined in dqn.py to train and evaluate base models for all 5 cities.

In [10]:
# import all from dqn
from dqn import *

In [11]:
# iterate over 5 cities
cities = ['Chicago', 'Dallas', 'Los Angeles', 'New York', 'Seattle']
# create static vars
window_size = 3
num_layers = 1
pct_split = 0.8
num_episodes = 100

## Collect and Train for Each City
Loop through each city, collect its data, train a baseline deep reinforcement learning model, and evaluate total reward on the test data.

In [12]:
for city_nm in cities:
  print(f'Training for {city_nm} over {num_episodes} episodes...')
  # grab data
  merged = data_download(city_nm)
  # define action space
  merged = action_space(merged)
  # train test split
  train_X, test_X, train_y, test_y = train_test_split(
    merged, pct_split, window_size)
  # train agent
  all_rewards, all_rewards_each_step, agent = episode_loop(train_X,
    train_y, window_size = window_size, num_layers = num_layers, 
    num_episodes = num_episodes)
  # print model
  print(f'DQN Architecture for {city_nm}: ')
  print(agent.dqn)
  # save train results
  artifact_save('base', city_nm, num_layers, agent.dqn, all_rewards,
                all_rewards_each_step, window_size)
  # test model
  rewards, total_reward, steps = test_loop(test_X, test_y, agent.dqn, window_size)
  # save test results
  save_test_results('base', city_nm, num_layers, rewards, total_reward, window_size)
  # print total test reward
  print(f'Total test reward for {city_nm}: {total_reward}')
  print(f'Average test reward per step for {city_nm}: {total_reward/steps}')
  print('--------------------------------------------------')

Training for Chicago over 100 episodes...
DQN Architecture for Chicago: 
QNetwork(
  (lstm): LSTM(83, 50, batch_first=True)
  (linear): Linear(in_features=50, out_features=5, bias=True)
)
Total test reward for Chicago: -28.0
Average test reward per step for Chicago: -0.9032258064516129
--------------------------------------------------
Training for Dallas over 100 episodes...
DQN Architecture for Dallas: 
QNetwork(
  (lstm): LSTM(83, 50, batch_first=True)
  (linear): Linear(in_features=50, out_features=5, bias=True)
)
Total test reward for Dallas: -13.0
Average test reward per step for Dallas: -0.41935483870967744
--------------------------------------------------
Training for Los Angeles over 100 episodes...
DQN Architecture for Los Angeles: 
QNetwork(
  (lstm): LSTM(83, 50, batch_first=True)
  (linear): Linear(in_features=50, out_features=5, bias=True)
)
Total test reward for Los Angeles: -20.0
Average test reward per step for Los Angeles: -0.6451612903225806
------------------------