In [1]:
import numpy as np
import gym
import random
import sys
import pygame as pg

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, Activation, Flatten, Embedding, Reshape, Input, Conv2D, MaxPool2D
from tensorflow.keras.optimizers import Adam

from rl.agents.dqn import DQNAgent
from rl.policy import EpsGreedyQPolicy
from rl.memory import SequentialMemory

from Engine import Blockudoku


pygame 2.0.2 (SDL 2.0.16, Python 3.9.6)
Hello from the pygame community. https://www.pygame.org/contribute.html


In [2]:
from rl.core import Processor
class CustomProcessor(Processor):
    def process_state_batch(self, batch):
        return np.squeeze(batch, axis=1)

In [3]:
env = Blockudoku()
np.random.seed(123)
env.seed(123)

In [5]:
layers = [Input(shape=env.state.shape),
                      Conv2D(32, 3, activation="relu", padding="same", name="Conv2D_layer1"),
                      Conv2D(64, 3, activation="relu", padding="same", name="Conv2D_layer2"),
                      Conv2D(64, 3, activation="relu", padding="same", name="Conv2D_layer3"),
                      Flatten(),
                      Dense(200, activation="relu", name="Dense_layer1"),
                      Dense(env.action_space.n, activation="linear", name="output")]

model = Sequential(layers)

model.compile(loss=tf.keras.losses.Huber(), optimizer="adam")

print(model.summary())

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
Conv2D_layer1 (Conv2D)       (None, 9, 9, 32)          608       
_________________________________________________________________
Conv2D_layer2 (Conv2D)       (None, 9, 9, 64)          18496     
_________________________________________________________________
Conv2D_layer3 (Conv2D)       (None, 9, 9, 64)          36928     
_________________________________________________________________
flatten_1 (Flatten)          (None, 5184)              0         
_________________________________________________________________
Dense_layer1 (Dense)         (None, 200)               1037000   
_________________________________________________________________
output (Dense)               (None, 5)                 1005      
Total params: 1,094,037
Trainable params: 1,094,037
Non-trainable params: 0
______________________________________________

In [8]:
memory = SequentialMemory(limit=50000, window_length=1)
policy = EpsGreedyQPolicy()
dqn_loaded = DQNAgent(model=model, nb_actions=env.action_space.n, memory=memory, nb_steps_warmup=500, target_model_update=1e-2, policy=policy, enable_double_dqn=True, processor=CustomProcessor())
dqn_loaded.compile(Adam(learning_rate=1e-3), metrics=['mae'])
dqn_loaded.load_weights('dqn_{}_weights.h5f'.format("Blockudoku"))

In [11]:
pg.init()
screen = pg.display.set_mode([env.window_size.x, env.window_size.y])
dqn_loaded.test(env, nb_episodes=1, visualize=True, nb_max_episode_steps=10)
pg.quit()

Testing for 1 episodes ...
+-----+-----+-----+
| :[0;30;42m [0;0m: | : : | : : |
| : :[0;30;42m [0;0m| : : | : : |
| : : |[0;30;42m [0;0m: : | : : |
+-----+-----+-----+
| : : | : : | : : |
| : : | : : | : : |
| : : | : : | : : |
+-----+-----+-----+
| : : | : : | : : |
| : : | : : | : : |
| : : | : : | : : |
+-----+-----+-----+
+-----+-----+-----+
| : :[0;30;42m [0;0m| : : | : : |
| : : |[0;30;42m [0;0m: : | : : |
| : : | :[0;30;42m [0;0m: | : : |
+-----+-----+-----+
| : : | : : | : : |
| : : | : : | : : |
| : : | : : | : : |
+-----+-----+-----+
| : : | : : | : : |
| : : | : : | : : |
| : : | : : | : : |
+-----+-----+-----+
+-----+-----+-----+
| : : | : : | : : |
| : :[0;30;42m [0;0m| : : | : : |
| : : |[0;30;42m [0;0m: : | : : |
+-----+-----+-----+
| : : | :[0;30;42m [0;0m: | : : |
| : : | : : | : : |
| : : | : : | : : |
+-----+-----+-----+
| : : | : : | : : |
| : : | : : | : : |
| : : | : : | : : |
+-----+-----+-----+
+-----+-----+-----+
| : : | : : | : : |
| : : | :