In [1]:
from gym import Env
from gym.spaces import Discrete, Box
import numpy as np
import random
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Conv2D, Conv1D, Activation, MaxPooling2D, MaxPooling1D, Dropout
from tensorflow.keras.optimizers import Adam
import time

In [2]:
from pygameMultibot import GameWindow

pygame 2.1.2 (SDL 2.0.16, Python 3.8.10)
Hello from the pygame community. https://www.pygame.org/contribute.html
Python version 3.8.10 (default, Nov 26 2021, 20:14:08) 
[GCC 9.3.0]


In [3]:
class Warehouse(Env):
    #metadata = {'render.modes' : ['human']}
    def __init__(self):
        self.pygame = GameWindow(0,3)
        self.action_space = Discrete(64)
        self.observation_space = Box(low=0, high=6, shape=(11,11,2))

    def reset(self):
        del self.pygame
        self.pygame = GameWindow(0,3)
        obs = self.pygame.observe()
        return obs

    def step(self, action):
        self.pygame.action(action)
        obs = self.pygame.observe()
        reward = self.pygame.evaluate()
        done = self.pygame.is_done()
        # print(obs)
        return obs, reward, done, {}

    def render(self, mode="human", close=False):
#         time.sleep(0.05)
        self.pygame.view1()

In [4]:
env = Warehouse()

In [5]:
states = env.observation_space.shape
actions = env.action_space.n

In [6]:
env.observation_space.sample()

array([[[5.528211  , 2.7120852 ],
        [2.0416236 , 5.897837  ],
        [3.2325325 , 4.4592347 ],
        [2.8037658 , 3.9484258 ],
        [4.174781  , 5.2554154 ],
        [1.4468397 , 3.6038167 ],
        [5.3853016 , 5.0322666 ],
        [5.824172  , 1.0838099 ],
        [3.210542  , 0.00829318],
        [4.9332533 , 1.6022402 ],
        [2.1761384 , 2.8757365 ]],

       [[2.2778568 , 2.718314  ],
        [0.864502  , 5.1638656 ],
        [2.3480248 , 4.864171  ],
        [4.348325  , 1.4914045 ],
        [0.7242826 , 1.4250048 ],
        [1.4884032 , 1.9505359 ],
        [4.4506693 , 1.6876997 ],
        [0.2832982 , 3.0311847 ],
        [1.3480563 , 0.5170131 ],
        [0.8937338 , 0.17308237],
        [2.657525  , 4.6981    ]],

       [[1.5719843 , 0.02782865],
        [4.1780925 , 3.2914183 ],
        [4.6448436 , 1.0951991 ],
        [4.747745  , 1.3294277 ],
        [0.2593452 , 0.6669947 ],
        [5.6168413 , 4.5335593 ],
        [5.6027946 , 2.3946118 ],
        [5

In [7]:
def build_model():
    model = Sequential()
    model.add(tf.keras.layers.Reshape((11,11,2), input_shape=(1,11,11,2)))
    model.add(Conv2D(32, (3,3),strides=(1, 1),activation='relu'))  # OBSERVATION_SPACE_VALUES = (10, 10, 3) a 10x10 RGB image.
    # model.add(Activation('relu'))
    # model.add(MaxPooling2D(pool_size=(2,2)))
    # model.add(Dropout(0.2))

    model.add(Conv2D(64, (3,3),strides=(1, 1),activation='relu'))
    model.add(Conv2D(64, (3,3),strides=(1, 1),activation='relu'))
    # model.add(Activation('relu'))
    # model.add(MaxPooling2D(pool_size=(2,2)))
    # model.add(Dropout(0.2))

    model.add(Flatten())  # this converts our 3D feature maps to 1D feature vectors
    model.add(Dense(512,activation='relu'))

    model.add(Dense(actions, activation='linear'))  # ACTION_SPACE_SIZE = how many choices (9)
    return model

In [13]:
del model

In [14]:
model = build_model()

In [15]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
reshape_1 (Reshape)          (None, 11, 11, 2)         0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 9, 9, 32)          608       
_________________________________________________________________
conv2d_4 (Conv2D)            (None, 7, 7, 64)          18496     
_________________________________________________________________
conv2d_5 (Conv2D)            (None, 5, 5, 64)          36928     
_________________________________________________________________
flatten_1 (Flatten)          (None, 1600)              0         
_________________________________________________________________
dense_2 (Dense)              (None, 512)               819712    
_________________________________________________________________
dense_3 (Dense)              (None, 64)               

In [16]:
from rl.agents import DQNAgent
from rl.policy import BoltzmannQPolicy
from rl.memory import SequentialMemory
from rl.policy import LinearAnnealedPolicy, EpsGreedyQPolicy

In [17]:
def build_agent(model, actions):
#     policy = EpsGreedyQPolicy()
    policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1., value_min=.1, value_test=.2, nb_steps=10000)
    memory = SequentialMemory(limit=500000, window_length=1)
    dqn = DQNAgent(model=model, memory=memory, policy=policy,
                  nb_actions=actions, nb_steps_warmup=10000, target_model_update=1e-2, enable_double_dqn=True)
    return dqn

In [18]:
dqn = build_agent(model, actions)
dqn.compile(Adam(lr=0.001), metrics=['mae'])

2022-03-16 15:07:07.893180: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-03-16 15:07:07.893499: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-03-16 15:07:07.893722: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-03-16 15:07:07.893994: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-03-16 15:07:07.894214: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from S

In [None]:
dqn.compile(Adam(lr=0.00007), metrics=['mae'])
history=dqn.fit(env, nb_steps=1000000, visualize=True, verbose=1)

Training for 1000000 steps ...
Interval 1 (0 steps performed)


2022-03-16 15:07:44.806799: I tensorflow/stream_executor/cuda/cuda_dnn.cc:369] Loaded cuDNN version 8100


10 episodes - episode_reward: -437.829 [-822.245, -164.530]

Interval 2 (10000 steps performed)
10 episodes - episode_reward: -266.747 [-464.480, -124.120] - loss: 0.258 - mae: 11.590 - mean_q: 12.701 - mean_eps: 0.100

Interval 3 (20000 steps performed)
10 episodes - episode_reward: -463.429 [-893.790, -173.715] - loss: 0.297 - mae: 11.277 - mean_q: 12.628 - mean_eps: 0.100

Interval 4 (30000 steps performed)
10 episodes - episode_reward: -811.631 [-990.275, -692.810] - loss: 0.753 - mae: 21.600 - mean_q: 23.632 - mean_eps: 0.100

Interval 5 (40000 steps performed)
10 episodes - episode_reward: -574.347 [-783.870, -276.165] - loss: 1.257 - mae: 35.379 - mean_q: 37.897 - mean_eps: 0.100

Interval 6 (50000 steps performed)
 1319/10000 [==>...........................] - ETA: 3:14 - reward: -0.5683

In [38]:
_ = dqn.test(env, nb_episodes=1, visualize=True)

Testing for 1 episodes ...
Episode 1: reward: 87.100, steps: 1000


In [33]:
dqn.save_weights('dqn_weights.h5f', overwrite=True)

In [26]:
dqn.load_weights('dqn_weights.h5f')

In [25]:
dqn.save_weights('dqn_weightsCNN.h5f', overwrite=True)

In [19]:
dqn.load_weights('training_weights/five parcel2.h5f')

In [23]:
dqn.save_weights('dqn_weightsTemp.h5f', overwrite=True)

In [19]:
dqn.load_weights('dqn_weightsTemp.h5f')

In [23]:
dqn.save_weights('dqn_weightsTemp1.h5f', overwrite=True)

In [20]:
dqn.load_weights('dqn_weightsTemp1.h5f')