# Training v1.0 with a Deep Q Network (DQN) #

Import statements

In [1]:
import json
import os

import numpy as np

import matplotlib
import matplotlib.pyplot as plt
from matplotlib import rc
rc('text', usetex=True)
%matplotlib inline

In [2]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.optimizers import Adam, SGD

In [3]:
from rl.agents import DQNAgent
from rl.policy import BoltzmannQPolicy, EpsGreedyQPolicy
from rl.memory import SequentialMemory  # For experience replay!

In [4]:
from gym_environment_ncml import *
from learning import *

Useful numbers

In [5]:
MILLION = 1000000
HTHOUSAND = 100000
THOUSAND = 1000

## 1. Create environment ##

In [6]:
env = GridworldMultiAgentv1()

In [7]:
states = env.observation_space.shape[0]
actions = env.action_space.n

In [8]:
states, actions

(8, 25)

## 2. Create a Deep Learning Model with Keras ##

In [9]:
model = build_model(states, actions, [32, 16], ['relu', 'relu'])

In [10]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten (Flatten)           (None, 8)                 0         
                                                                 
 dense (Dense)               (None, 32)                288       
                                                                 
 dense_1 (Dense)             (None, 16)                528       
                                                                 
 dense_2 (Dense)             (None, 25)                425       
                                                                 
Total params: 1,241
Trainable params: 1,241
Non-trainable params: 0
_________________________________________________________________


## 3. Build Agent with Keras-RL ##

In [11]:
dqn = build_agent(model, actions, 0.01, EpsGreedyQPolicy(), 5000)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])
# dqn.compile(Adam(lr=1e-2), metrics=['mse'])

  super(Adam, self).__init__(name, **kwargs)
2023-09-17 14:51:02.879517: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:354] MLIR V1 optimization pass is not enabled
2023-09-17 14:51:02.881417: W tensorflow/core/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz


In [12]:
name = 'dqn1_5b5_3216_adam_lr0.001_tmu0.01_ml50K_ns5M_eps0.1'

In [13]:
history = dqn.fit(env, nb_steps=0.05*MILLION, visualize=False, verbose=1)

Training for 50000.0 steps ...
Interval 1 (0 steps performed)
    1/10000 [..............................] - ETA: 3:48 - reward: -1.0000

  updates=self.state_updates,


200 episodes - episode_reward: -12.150 [-50.000, 120.000] - loss: 7.399 - mae: 15.169 - mean_q: 18.102

Interval 2 (10000 steps performed)
200 episodes - episode_reward: 2.000 [-50.000, 110.000] - loss: 27.060 - mae: 41.553 - mean_q: 46.567

Interval 3 (20000 steps performed)
200 episodes - episode_reward: 6.850 [-50.000, 110.000] - loss: 27.073 - mae: 43.093 - mean_q: 47.957

Interval 4 (30000 steps performed)
200 episodes - episode_reward: 8.300 [-50.000, 160.000] - loss: 33.115 - mae: 49.227 - mean_q: 54.349

Interval 5 (40000 steps performed)
   71/10000 [..............................] - ETA: 21s - reward: 1.8169

InvalidArgumentError: Expected tensor of type int64 but got type float
	 [[{{node training/Adam/add}}]]

In [None]:
data = history.history
data['episode_reward'] = [float(v) for v in data['episode_reward']]
data['nb_episode_steps'] = [int(v) for v in data['nb_episode_steps']]
data['nb_steps'] = [int(v) for v in data['nb_steps']]

In [None]:
os.mkdir('agents/{}'.format(name))  # If the directory does not exist we cannot write the file
with open(get_training_path(name), 'w') as f:
    json.dump(data, f)

Save agent to memory

In [None]:
dqn.save_weights(get_agent_path(name), overwrite=True)

## 4. Reloading Agent from memory and test ##

In [None]:
env = GridworldMultiAgentv1(seed=2)

In [None]:
states = env.observation_space.shape[0]
actions = env.action_space.n
model = build_model(states, actions, [32, 16], ['relu', 'relu'])
print(model.summary())
dqn = build_agent(model, actions, 0.01, EpsGreedyQPolicy(eps=0), 50000)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])

# Load weights
dqn.load_weights(get_agent_path(name))

In [None]:
nb_episodes = 10*THOUSAND

In [None]:
scores = dqn.test(env, nb_episodes=nb_episodes, visualize=False, verbose=0)

In [None]:
rewards = np.array(scores.history['episode_reward'])

In [None]:
np.savetxt(get_test_path(name, nb_episodes), rewards)