In [39]:
import tensorflow as tf
from keras import __version__
tf.keras.__version__ = __version__
from imp import reload

import time
import random
import numpy as np
import matplotlib.pyplot as plt
from IPython.display import clear_output

from keras.models import Sequential
from keras.layers import Dense, Activation, Flatten, Embedding, Reshape

from rl.agents.dqn import DQNAgent
from rl.policy import EpsGreedyQPolicy
from rl.memory import SequentialMemory

from env_class import BatteryManagementEnv

plt.style.use("ggplot")

In [40]:
env = BatteryManagementEnv(24)
env.reset()

array([  10.  ,    0.  ,    0.  ,    0.  ,    0.  ,    0.  ,    0.  ,
          0.  ,    0.  ,   79.25,  655.25, 1460.75, 2027.25, 2212.5 ,
       1915.75, 1184.25,  367.25,   19.25,    0.  ,    0.  ,    0.  ,
          0.  ,    0.  ,    0.  ,    0.  ])

In [28]:
# from keras.layers import Dense, Input
# from keras.models import Model

# #build neural network for DQN
# def build_model(states, actions):
#     input = Input(shape=(1,states))
#     x = Flatten()(input)
#     x = Dense(16, activation='relu')(x)
#     #output layer
#     output = Dense(actions, activation='linear')(x)

#     model = Model(inputs=input, outputs=output)

#     return model

# model = build_model(env.observation_space, env.nA)

from keras.layers import Dense, Input, Flatten, Dropout, BatchNormalization
from keras.models import Model
from keras.optimizers import Adam

def build_enhanced_model(states, actions):
    # Define the input layer
    input_layer = Input(shape=(states,))  # Adjusted for a flat input vector

    # Flatten the input if it's multidimensional
    if len(input_layer.shape) > 2:
        x = Flatten()(input_layer)
    else:
        x = input_layer

    # First hidden layer
    x = Dense(32, activation='relu')(x)
    x = BatchNormalization()(x)  # Batch normalization layer
    x = Dropout(0.2)(x)  # Dropout layer

    x= Dense(64, activation='relu')(x)
    x = BatchNormalization()(x)
    x = Dropout(0.2)(x)

    # Second hidden layer
    x = Dense(32, activation='relu')(x)
    x = BatchNormalization()(x)
    x = Dropout(0.2)(x)

    # Output layer
    output = Dense(actions, activation='linear')(x)

    # Create the Keras Model
    model = Model(inputs=input_layer, outputs=output)
    model.compile(optimizer=Adam(lr=0.001), loss='mse')


    return model


model = build_model(env.observation_space, env.nA)


In [29]:
from rl.agents.dqn import DQNAgent
from rl.policy import BoltzmannQPolicy, EpsGreedyQPolicy, LinearAnnealedPolicy
from rl.memory import SequentialMemory
from keras.src.saving import serialization_lib
serialization_lib.enable_unsafe_deserialization()
from tensorflow.keras.optimizers.legacy import Adam

# Then, define DQN agent in Keras-RL
memory = SequentialMemory(limit=20000, window_length=1)
policy = LinearAnnealedPolicy(EpsGreedyQPolicy(eps=0), 
                              attr='eps', value_max=1., value_min=.1, value_test=.05, nb_steps=10000)
dqn = DQNAgent(model=model, nb_actions=env.nA, memory=memory, policy=policy,
                nb_steps_warmup=500, target_model_update=1e-2, enable_double_dqn=True, enable_dueling_network=True)
dqn.compile(optimizer=Adam(learning_rate=1e-3), metrics=['mae'])

In [44]:
dqn.fit(env, nb_steps=10000, visualize=False, verbose=1, nb_max_episode_steps=1, log_interval=1000)

Training for 5000 steps ...
Interval 1 (0 steps performed)
1000 episodes - episode_reward: -3.932 [-5.372, 0.000] - loss: 4.634 - mae: 3.716 - mean_q: -0.070 - mean_eps: 0.932

Interval 2 (1000 steps performed)
1000 episodes - episode_reward: -3.584 [-5.372, 0.000] - loss: 3.825 - mae: 3.661 - mean_q: -0.081 - mean_eps: 0.865

Interval 3 (2000 steps performed)
1000 episodes - episode_reward: -3.085 [-5.372, 0.000] - loss: 3.174 - mae: 3.624 - mean_q: -0.101 - mean_eps: 0.775

Interval 4 (3000 steps performed)
1000 episodes - episode_reward: -2.623 [-5.372, 0.000] - loss: 2.848 - mae: 3.566 - mean_q: -0.058 - mean_eps: 0.685

Interval 5 (4000 steps performed)
done, took 77.202 seconds


  saving_api.save_model(


In [None]:
env_test = BatteryManagementEnv(1000)
env_test.reset()

#test the model
dqn.test(env_test, nb_episodes=15, visualize=False, nb_max_episode_steps=1000)

Testing for 15 episodes ...
Episode 1: reward: 4666.227, steps: 365
Episode 2: reward: 4666.227, steps: 365
Episode 3: reward: 4666.227, steps: 365
Episode 4: reward: 4666.227, steps: 365
Episode 5: reward: 4666.227, steps: 365
Episode 6: reward: 4666.227, steps: 365
Episode 7: reward: 4666.227, steps: 365
Episode 8: reward: 4666.227, steps: 365
Episode 9: reward: 4666.227, steps: 365
Episode 10: reward: 4666.227, steps: 365
Episode 11: reward: 4666.227, steps: 365
Episode 12: reward: 4666.227, steps: 365
Episode 13: reward: 4666.227, steps: 365
Episode 14: reward: 4666.227, steps: 365
Episode 15: reward: 4666.227, steps: 365


<keras.src.callbacks.History at 0x23753397bd0>