In [2]:
import tensorflow as tf
from keras import __version__
tf.keras.__version__ = __version__
from imp import reload

import time
import random
import numpy as np
import matplotlib.pyplot as plt
from IPython.display import clear_output

from keras.models import Sequential
from keras.layers import Dense, Activation, Flatten, Embedding, Reshape

from rl.agents.dqn import DQNAgent
from rl.policy import EpsGreedyQPolicy
from rl.memory import SequentialMemory

from env_class import BatteryManagementEnv

plt.style.use("ggplot")




In [3]:
env = BatteryManagementEnv(0)
env.reset()

array([  10.  ,    0.  ,    0.  ,    0.  ,    0.  ,    0.  ,    0.  ,
          0.  ,    0.  ,   79.25,  655.25, 1460.75, 2027.25, 2212.5 ,
       1915.75, 1184.25,  367.25,   19.25,    0.  ,    0.  ,    0.  ,
          0.  ,    0.  ,    0.  ,    0.  ])

In [4]:
from keras.layers import Dense, Input
from keras.models import Model

#build neural network for DQN
def build_model(states, actions):
    input = Input(shape=(1,states))
    x = Flatten()(input)
    x = Dense(16, activation='relu')(x)
    #output layer
    output = Dense(actions, activation='linear')(x)

    model = Model(inputs=input, outputs=output)

    return model

model = build_model(env.observation_space, env.nA)




In [5]:
from rl.agents.dqn import DQNAgent
from rl.policy import BoltzmannQPolicy, EpsGreedyQPolicy, LinearAnnealedPolicy
from rl.memory import SequentialMemory
from keras.src.saving import serialization_lib
serialization_lib.enable_unsafe_deserialization()
from tensorflow.keras.optimizers.legacy import Adam

# Then, define DQN agent in Keras-RL
memory = SequentialMemory(limit=20000, window_length=1)
policy = LinearAnnealedPolicy(EpsGreedyQPolicy(eps=0), 
                              attr='eps', value_max=1., value_min=.1, value_test=.05, nb_steps=10000)
dqn = DQNAgent(model=model, nb_actions=env.nA, memory=memory, policy=policy,
                nb_steps_warmup=500, target_model_update=1e-2, enable_double_dqn=True, enable_dueling_network=True)
dqn.compile(optimizer=Adam(learning_rate=1e-3), metrics=['mae'])




In [6]:
dqn.fit(env, nb_steps=5000, visualize=False, verbose=1, nb_max_episode_steps=365, log_interval=365)

Training for 5000 steps ...
Interval 1 (0 steps performed)



  updates=self.state_updates,


1 episodes - episode_reward: -2644.750 [-2644.750, -2644.750]

Interval 2 (365 steps performed)
1 episodes - episode_reward: -7546.050 [-7546.050, -7546.050] - loss: 5088628.513 - mae: 6032.303 - mean_q: 8929.632 - mean_eps: 0.945

Interval 3 (730 steps performed)
1 episodes - episode_reward: 2573.150 [2573.150, 2573.150] - loss: 3169536.984 - mae: 6262.835 - mean_q: 8350.224 - mean_eps: 0.918

Interval 4 (1095 steps performed)
1 episodes - episode_reward: -8491.700 [-8491.700, -8491.700] - loss: 4065077.925 - mae: 7218.654 - mean_q: 9489.335 - mean_eps: 0.885

Interval 5 (1460 steps performed)
1 episodes - episode_reward: -8976.250 [-8976.250, -8976.250] - loss: 4955540.913 - mae: 8127.511 - mean_q: 10560.412 - mean_eps: 0.852

Interval 6 (1825 steps performed)
1 episodes - episode_reward: -24969.750 [-24969.750, -24969.750] - loss: 5935057.754 - mae: 8804.125 - mean_q: 11371.567 - mean_eps: 0.819

Interval 7 (2190 steps performed)
1 episodes - episode_reward: -10804.200 [-10804.200, 

<keras.src.callbacks.History at 0x2da5a5febd0>

In [8]:
env_test = BatteryManagementEnv(470)
env_test.reset()

#test the model
dqn.test(env_test, nb_episodes=15, visualize=False, nb_max_episode_steps=1000)

Testing for 15 episodes ...
Episode 1: reward: 49260.750, steps: 365
Episode 2: reward: 49260.750, steps: 365
Episode 3: reward: 49260.750, steps: 365
Episode 4: reward: 49260.750, steps: 365
Episode 5: reward: 49260.750, steps: 365
Episode 6: reward: 49260.750, steps: 365
Episode 7: reward: 49260.750, steps: 365
Episode 8: reward: 49260.750, steps: 365
Episode 9: reward: 49260.750, steps: 365
Episode 10: reward: 49260.750, steps: 365
Episode 11: reward: 49260.750, steps: 365
Episode 12: reward: 49260.750, steps: 365
Episode 13: reward: 49260.750, steps: 365
Episode 14: reward: 49260.750, steps: 365
Episode 15: reward: 49260.750, steps: 365


<keras.src.callbacks.History at 0x2da5b1d19d0>