# Tuned DQN (LA)
This notebook will use the classes and functions defined in dqn.py to train and evaluate a hyperparameterized model for Los Angeles. The parameters are determined using LSTM/LSTM_LosAngeles.ipynb.

In [11]:
# import all from dqn
from dqn import *

In [12]:
# create static vars
city_nm = 'Los Angeles'
window_size = 3
num_layers = 2
pct_split = 0.8
hidden_dims = [40, 80]
num_episodes = 100

## Create LSTM
Use hyperparameterized architecture and implement with QLearning.

In [13]:
class TunedQNetwork(torch.nn.Module):
  def __init__(self, input_dim, output_dim, hidden_dims, num_layers):
    super().__init__()
    # maintain num_layers implementation as is for rest of functions and classes
    #  current implementation likely not best practice
    self.lstm1 = nn.LSTM(input_size = input_dim, hidden_size = hidden_dims[0], 
                         num_layers = int(num_layers/num_layers), batch_first = True, 
                         bias=True)
    self.lstm2 = nn.LSTM(input_size = hidden_dims[0], hidden_size = hidden_dims[1], 
                         num_layers = int(num_layers/num_layers), batch_first = True, 
                         bias=True)
    self.dropout = nn.Dropout(0.5)
    self.linear = nn.Linear(in_features = hidden_dims[1], out_features = output_dim)
    self.l2_reg = nn.Parameter(torch.tensor(0.001))

  def forward(self, state):
    x, _ = self.lstm1(state)
    x, _ = self.lstm2(x)
    x = self.dropout(x)
    x = x[:, -1, :]
    x = self.linear(x)
    return x

## Preprocess Data
Use determined feature importance to select features and split data.

In [14]:
# Load the selected features
file_city_nm = 'LosAngeles'
with open(f'../LSTM/Results/{file_city_nm}/feature_selection.json', 'r') as file:
  features = json.load(file)
# grab data
merged = data_download(city_nm, features = features)
# define action space
merged = action_space(merged)
# train test split
train_X, test_X, train_y, test_y = train_test_split(
  merged, pct_split, window_size)

## Use Subsetted Data to Train
Use the best found features and architecture to determine reward.

In [15]:
print(f'Training for {city_nm} over {num_episodes} episodes...')
# train agent
all_rewards, all_rewards_each_step, agent = episode_loop(train_X,
  train_y, window_size = window_size, num_layers = 2, hidden_dim = hidden_dims,
  num_episodes = num_episodes, QNetwork = TunedQNetwork)
# print model
print(f'Tuned DQN Architecture for {city_nm}: ')
print(agent.dqn)
# save train results
model_type = 'tuned'
artifact_save(model_type, file_city_nm, num_layers, agent.dqn, all_rewards,
              all_rewards_each_step, window_size)
# test model
rewards, total_reward, steps = test_loop(test_X, test_y, agent.dqn, window_size)
# save test results
save_test_results(model_type, file_city_nm, num_layers, rewards, total_reward, window_size)
# print total test reward
print(f'Total test reward for {city_nm}: {total_reward}')
print(f'Average test reward per step for {city_nm}: {total_reward/steps}')

Training for Los Angeles over 100 episodes...
Tuned DQN Architecture for Los Angeles: 
TunedQNetwork(
  (lstm1): LSTM(5, 40, batch_first=True)
  (lstm2): LSTM(40, 80, batch_first=True)
  (dropout): Dropout(p=0.5, inplace=False)
  (linear): Linear(in_features=80, out_features=5, bias=True)
)
Total test reward for Los Angeles: -30.0
Average test reward per step for Los Angeles: -0.967741935483871
