In [10]:
import tensorflow as tf
from keras import __version__
tf.keras.__version__ = __version__

import time
import random
import numpy as np
import matplotlib.pyplot as plt
from IPython.display import clear_output

from keras.models import Sequential
from keras.layers import Dense, Activation, Flatten, Embedding, Reshape

from rl.agents.dqn import DQNAgent
from rl.policy import EpsGreedyQPolicy
from rl.memory import SequentialMemory

import gym

from env_class import BatteryManagementEnv

plt.style.use("ggplot")

In [11]:
env = BatteryManagementEnv()
env.reset()

array([50.], dtype=float32)

In [12]:
print("Action Space {}".format(env.action_space))
print("State Space {}".format(env.observation_space))

Action Space 1
State Space 1


In [13]:
print(env.nA)

2


In [14]:
from keras.layers import Dense, Input
from keras.models import Model

#build neural network for DQN
def build_model(states, actions):
    input = Input(shape=(1,states))
    x = Flatten()(input)
    x = Dense(16, activation='relu')(x)
    #output layer
    output = Dense(actions, activation='linear')(x)

    model = Model(inputs=input, outputs=output)

    return model

model = build_model(env.observation_space, env.nA)

In [15]:
from rl.agents.dqn import DQNAgent
from rl.policy import BoltzmannQPolicy, EpsGreedyQPolicy, LinearAnnealedPolicy
from rl.memory import SequentialMemory
from keras.src.saving import serialization_lib
serialization_lib.enable_unsafe_deserialization()
from tensorflow.keras.optimizers.legacy import Adam

# Then, define DQN agent in Keras-RL
memory = SequentialMemory(limit=20000, window_length=1)
policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), 
                              attr='eps', value_max=1., value_min=.1, value_test=.05, nb_steps=10000)
dqn = DQNAgent(model=model, nb_actions=env.nA, memory=memory, policy=policy,
                nb_steps_warmup=500, target_model_update=1e-2, enable_double_dqn=True, enable_dueling_network=True)
dqn.compile(optimizer=Adam(learning_rate=1e-3), metrics=['mae'])

In [16]:
dqn.fit(env, nb_steps=100000, visualize=False, verbose=1, nb_max_episode_steps=200, log_interval=1000)

Training for 100000 steps ...
Interval 1 (0 steps performed)
  17/1000 [..............................] - ETA: 3s - reward: -17.6471   

  updates=self.state_updates,


 113/1000 [==>...........................] - ETA: 2s - reward: -9.7345 

1000 episodes - episode_reward: 0.400 [-100.000, 100.000] - loss: 648.844 - mae: 50.016 - mean_q: 78.116 - mean_eps: 0.932

Interval 2 (1000 steps performed)
1000 episodes - episode_reward: 11.400 [-100.000, 100.000] - loss: 0.000 - mae: 50.000 - mean_q: 100.000 - mean_eps: 0.865

Interval 3 (2000 steps performed)
1000 episodes - episode_reward: 20.400 [-100.000, 100.000] - loss: 0.000 - mae: 50.000 - mean_q: 100.000 - mean_eps: 0.775

Interval 4 (3000 steps performed)
1000 episodes - episode_reward: 29.600 [-100.000, 100.000] - loss: 0.000 - mae: 50.000 - mean_q: 100.000 - mean_eps: 0.685

Interval 5 (4000 steps performed)
1000 episodes - episode_reward: 44.200 [-100.000, 100.000] - loss: 0.000 - mae: 50.000 - mean_q: 100.000 - mean_eps: 0.595

Interval 6 (5000 steps performed)
1000 episodes - episode_reward: 46.000 [-100.000, 100.000] - loss: 0.000 - mae: 50.000 - mean_q: 100.000 - mean_eps: 0.505

Interval 7 (6000 steps performed)
1000 episodes - episode_reward: 58.200 [-100.000, 10

<keras.src.callbacks.History at 0x182301ae610>