In [1]:
import numpy as np
import gym
import random
import sys

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, Activation, Flatten, Embedding, Reshape, Input, Conv2D, MaxPool2D
from tensorflow.keras.optimizers import Adam

from rl.agents.dqn import DQNAgent
from rl.policy import EpsGreedyQPolicy
from rl.memory import SequentialMemory

from Engine import Blockudoku

print("Python: "+str(sys.version))
print("Tensorflow version: "+tf.__version__)
print("Keras version: "+tf.keras.__version__)


pygame 2.0.2 (SDL 2.0.16, Python 3.9.6)
Hello from the pygame community. https://www.pygame.org/contribute.html
Python: 3.9.6 (tags/v3.9.6:db3ff76, Jun 28 2021, 15:26:21) [MSC v.1929 64 bit (AMD64)]
Tensorflow version: 2.6.2
Keras version: 2.6.0


In [2]:
from rl.core import Processor
class CustomProcessor(Processor):
    '''
    acts as a coupling mechanism between the agent and the environment
    '''

    def process_state_batch(self, batch):
        '''
        Given a state batch, I want to remove the second dimension, because it's
        useless and prevents me from feeding the tensor into my CNN
        '''
        return np.squeeze(batch, axis=1)

In [3]:
env = Blockudoku()
env.render()

+-----+-----+-----+
| :[0;30;42m [0;0m: | : : | : : |
|[0;30;42m [0;0m:[0;30;42m [0;0m:[0;30;42m [0;0m| : : | : : |
| :[0;30;42m [0;0m: | : : | : : |
+-----+-----+-----+
| : : | : : | : : |
| : : | : : | : : |
| : : | : : | : : |
+-----+-----+-----+
| : : | : : | : : |
| : : | : : | : : |
| : : | : : | : : |
+-----+-----+-----+


In [4]:
print("Number of actions: %d" % env.action_space.n)
print("Number of states: %d" % env.observation_space.n)

Number of actions: 5
Number of states: 512


In [5]:
action_size = env.action_space.n
state_size = env.observation_space.n

In [6]:
np.random.seed(123)
env.seed(123)

In [7]:
env.reset()
np.moveaxis(env.step(env.action_space.sample())[0], 2, 0)

array([[[1., 1.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.]],

       [[1., 0.],
        [0., 1.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.]],

       [[0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.]],

       [[0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.]],

       [[0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.]],

       [[0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.]],

       [[0., 0.]

In [8]:
layers = [Input(shape=env.state.shape),
                      Conv2D(32, 3, activation="relu", padding="same" , name="Conv2D_layer1"),
                      Conv2D(64, 3, activation="relu", padding="same", name="Conv2D_layer2"),
                      Conv2D(64, 3, activation="relu", padding="same", name="Conv2D_layer3"),
                      Flatten(),
                      Dense(200, activation="relu", name="Dense_layer1"),
                      Dense(action_size, activation="linear", name="output")]


# layers = [Input(shape=(len(env.state.flatten()),)),
#                       Dense(69, activation="relu", name="Dense_layer1"),
#                       Dense(69, activation="relu", name="Dense_layer2"),
#                       Dense(action_size, activation="linear", name="output")]

model = Sequential(layers)

model.compile(loss=tf.keras.losses.Huber(), optimizer="adam")

print(model.summary())

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
Conv2D_layer1 (Conv2D)       (None, 9, 9, 32)          608       
_________________________________________________________________
Conv2D_layer2 (Conv2D)       (None, 9, 9, 64)          18496     
_________________________________________________________________
Conv2D_layer3 (Conv2D)       (None, 9, 9, 64)          36928     
_________________________________________________________________
flatten (Flatten)            (None, 5184)              0         
_________________________________________________________________
Dense_layer1 (Dense)         (None, 200)               1037000   
_________________________________________________________________
output (Dense)               (None, 5)                 1005      
Total params: 1,094,037
Trainable params: 1,094,037
Non-trainable params: 0
______________________________________________

In [10]:
memory = SequentialMemory(limit=50000, window_length=1)
policy = EpsGreedyQPolicy()
dqn = DQNAgent(model=model, nb_actions=action_size, memory=memory, nb_steps_warmup=500, target_model_update=1e-2, policy=policy, enable_double_dqn=True, processor=CustomProcessor())
dqn.compile(Adam(learning_rate=1e-3), metrics=['mae'])
dqn.fit(env, nb_steps=1000000, visualize=False, verbose=1, nb_max_episode_steps=99, log_interval=100000)

Training for 1000000 steps ...
Interval 1 (0 steps performed)






<keras.callbacks.History at 0x28d95742760>

In [11]:
dqn.test(env, nb_episodes=5, visualize=True, nb_max_episode_steps=100)

Testing for 5 episodes ...
Episode 1: reward: -100.000, steps: 100
Episode 2: reward: -967.000, steps: 100
Episode 3: reward: -916.000, steps: 100
Episode 4: reward: -100.000, steps: 100
Episode 5: reward: -991.000, steps: 100


<keras.callbacks.History at 0x28d95306f70>

In [None]:
dqn.save_weights('dqn_{}_weights.h5f'.format("BlockudokuTest"), overwrite=True)

In [None]:
dqn_loaded = DQNAgent(model=model, nb_actions=action_size, memory=memory, nb_steps_warmup=500, target_model_update=1e-2, policy=policy, enable_double_dqn=True, processor=CustomProcessor())
dqn_loaded.compile(Adam(learning_rate=1e-3), metrics=['mae'])
dqn_loaded.load_weights('dqn_{}_weights.h5f'.format("BlockudokuTest"))

In [None]:
dqn_loaded.test(env, nb_episodes=5, visualize=False, nb_max_episode_steps=100)