In [1]:
from gym import Env
from gym.spaces import Discrete, Box
import numpy as np
import random
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Conv2D, Conv1D, Activation, MaxPooling2D, MaxPooling1D, Dropout
from tensorflow.keras.optimizers import Adam
import multiprocessing
import time

In [2]:
from pygametest import GameWindow

pygame 2.1.2 (SDL 2.0.18, Python 3.8.8)
Hello from the pygame community. https://www.pygame.org/contribute.html
Python version 3.8.8 (default, Apr 13 2021, 15:08:03) [MSC v.1916 64 bit (AMD64)]


In [3]:
class Warehouse(Env):
    #metadata = {'render.modes' : ['human']}
    def __init__(self):
        self.pygame = GameWindow(0,3)
        self.action_space = Discrete(5)
        self.observation_space = Box(low=0, high=5, shape=(16,9))

    def reset(self):
        del self.pygame
        self.pygame = GameWindow(0,3)
        obs = self.pygame.observe()
        return obs

    def step(self, action):
        self.pygame.action(action)
        obs = self.pygame.observe()
        reward = self.pygame.evaluate()
        done = self.pygame.is_done()
        # print(obs)
        return obs, reward, done, {}

    def render(self, mode="human", close=False):

        self.pygame.view1()

In [4]:
env = Warehouse()

In [5]:
states = env.observation_space.shape
actions = env.action_space.n

In [6]:
env.observation_space.sample()

array([[2.010287  , 0.5951203 , 3.413927  , 3.4624372 , 0.1866569 ,
        0.8811262 , 1.6807746 , 0.77554065, 1.7540894 ],
       [1.4924256 , 2.6860094 , 1.6215149 , 3.8299472 , 2.243309  ,
        0.30695233, 1.9747847 , 2.3260152 , 0.9534102 ],
       [4.6776314 , 2.4378057 , 1.5803722 , 3.9174738 , 4.4931297 ,
        2.5865722 , 2.9531877 , 0.8898067 , 4.8804255 ],
       [0.26014128, 4.1305194 , 4.5621777 , 1.8241673 , 3.9403489 ,
        2.2161758 , 2.6133175 , 1.2872525 , 0.48277915],
       [2.0532186 , 4.604703  , 0.84613186, 2.5214376 , 4.431688  ,
        4.03385   , 2.9571095 , 3.0689363 , 0.8004635 ],
       [2.3951945 , 3.8669455 , 1.2552848 , 1.4767684 , 0.43971702,
        1.4007096 , 3.208812  , 0.12159426, 2.6338673 ],
       [3.2500222 , 1.2650602 , 2.0015616 , 4.158668  , 2.6754534 ,
        2.7009432 , 3.1520097 , 0.02169772, 3.4315615 ],
       [4.659401  , 3.8325126 , 4.4450145 , 3.73297   , 1.177495  ,
        4.110231  , 4.829602  , 3.5069647 , 1.1715169 ],


In [7]:
def build_model():
    model = Sequential()
#     model.add(Convolution2D(64, 1, activation='relu',input_shape=(1,11,11)))
#     model.add(Flatten())
    model.add(Flatten(input_shape=(1,16,9)))
    model.add(Dense(512, activation='relu'))
    model.add(Dense(256, activation='relu'))
    model.add(Dense(5, activation='linear'))
    model.add(Flatten())
    return model

In [21]:
def build_model():
    model = Sequential()
    model.add(tf.keras.layers.Reshape((16,9,1), input_shape=(1,16,9)))
    model.add(Conv2D(256, (3,3)))  # OBSERVATION_SPACE_VALUES = (10, 10, 3) a 10x10 RGB image.
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2,2)))
    # model.add(Dropout(0.2))

    model.add(Conv2D(256, (3,3)))
    model.add(Activation('relu'))
    # model.add(MaxPooling2D(pool_size=(2,2)))
    # model.add(Dropout(0.2))

    model.add(Flatten())  # this converts our 3D feature maps to 1D feature vectors
    model.add(Dense(128))

    model.add(Dense(5, activation='linear'))  # ACTION_SPACE_SIZE = how many choices (9)
    return model

In [22]:
del model

In [23]:
model = build_model()

In [24]:
model.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 reshape_2 (Reshape)         (None, 16, 9, 1)          0         
                                                                 
 conv2d_4 (Conv2D)           (None, 14, 7, 256)        2560      
                                                                 
 activation_4 (Activation)   (None, 14, 7, 256)        0         
                                                                 
 max_pooling2d_2 (MaxPooling  (None, 7, 3, 256)        0         
 2D)                                                             
                                                                 
 conv2d_5 (Conv2D)           (None, 5, 1, 256)         590080    
                                                                 
 activation_5 (Activation)   (None, 5, 1, 256)         0         
                                                      

In [25]:
from rl.agents import DQNAgent
from rl.policy import BoltzmannQPolicy
from rl.memory import SequentialMemory
from rl.policy import LinearAnnealedPolicy, EpsGreedyQPolicy

In [26]:
def build_agent(model, actions):
#     policy = EpsGreedyQPolicy()
    policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1., value_min=.1, value_test=.2, nb_steps=10000)
    memory = SequentialMemory(limit=500000, window_length=1)
    dqn = DQNAgent(model=model, memory=memory, policy=policy,
                  nb_actions=actions, nb_steps_warmup=10, target_model_update=1e-2, enable_double_dqn=False)
    return dqn

In [27]:
dqn = build_agent(model, actions)
dqn.compile(Adam(lr=0.001), metrics=['mse'])

In [19]:
# dqn.compile(Adam(lr=0.003), metrics=['mae'])
history=dqn.fit(env, nb_steps=1000000, visualize=True, verbose=1)

Training for 1000000 steps ...
Interval 1 (0 steps performed)
    1/10000 [..............................] - ETA: 12:30 - reward: -0.1000

  updates=self.state_updates,


 1530/10000 [===>..........................] - ETA: 1:57 - reward: -0.0978done, took 21.311 seconds


In [46]:
_ = dqn.test(env, nb_episodes=10, visualize=True)

Testing for 10 episodes ...
Episode 1: reward: -38.400, steps: 1000
Episode 2: reward: -38.400, steps: 1000
Episode 3: reward: -38.400, steps: 1000
Episode 4: reward: -38.400, steps: 1000


KeyboardInterrupt: 

In [33]:
dqn.save_weights('dqn_weights.h5f', overwrite=True)

In [26]:
dqn.load_weights('dqn_weights.h5f')

In [None]:
dqn.save_weights('dqn_weightsCNN.h5f', overwrite=True)

In [28]:
dqn.load_weights('dqn_weightsCNN.h5f')


Two checkpoint references resolved to different objects (<keras.layers.convolutional.Conv2D object at 0x000001F9DB8814C0> and <keras.layers.core.activation.Activation object at 0x000001F9DBCB0CD0>).

Two checkpoint references resolved to different objects (<keras.layers.core.dense.Dense object at 0x000001F9DB8B5280> and <keras.layers.pooling.MaxPooling2D object at 0x000001F9DBDB0940>).

Two checkpoint references resolved to different objects (<keras.layers.core.dense.Dense object at 0x000001F9DB8CEE50> and <keras.layers.core.activation.Activation object at 0x000001F9DB85F100>).


ValueError: Layer weight shape (3, 3, 1, 256) not compatible with provided weight shape (3, 3, 1, 32)