In [1]:
from gym import Env
from gym.spaces import Discrete, Box
import numpy as np
import random
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Conv2D, Conv1D, Activation, MaxPooling2D, MaxPooling1D, Dropout
from tensorflow.keras.optimizers import Adam
import time

In [2]:
from pygameMultibot1 import GameWindow

pygame 2.1.2 (SDL 2.0.18, Python 3.8.8)
Hello from the pygame community. https://www.pygame.org/contribute.html
Python version 3.8.8 (default, Apr 13 2021, 15:08:03) [MSC v.1916 64 bit (AMD64)]


In [38]:
class Warehouse(Env):
    #metadata = {'render.modes' : ['human']}
    def __init__(self):
        self.pygame = GameWindow(0,5,0)
        self.action_space = Discrete(64)
        self.observation_space = Box(low=0, high=6, shape=(11,11,2))

    def reset(self):
        del self.pygame
        self.pygame = GameWindow(0,5,0)
        obs = self.pygame.observe()
        return obs

    def step(self, action):
        self.pygame.action(action)
        obs = self.pygame.observe()
        reward = self.pygame.evaluate()
        done = self.pygame.is_done()
        # print(obs)
        # time.sleep(0.02)
        return obs, reward, done, {}

    def render(self, mode="human", close=False):
        time.sleep(0.05)
        self.pygame.view1()

In [39]:
env = Warehouse()

In [5]:
states = env.observation_space.shape
actions = env.action_space.n

In [6]:
env.observation_space.sample()

array([[[0.0789023 , 3.811914  ],
        [4.6018434 , 2.2909834 ],
        [5.6149526 , 4.0245204 ],
        [2.2161117 , 2.8083827 ],
        [2.5733173 , 5.030529  ],
        [5.4144006 , 0.9324109 ],
        [2.4334998 , 4.751833  ],
        [3.4453847 , 1.7505913 ],
        [2.911506  , 4.726684  ],
        [2.1155038 , 2.480137  ],
        [1.9422618 , 0.69904906]],

       [[2.6417649 , 5.5493026 ],
        [0.21703127, 0.44551903],
        [4.563553  , 0.6252595 ],
        [5.409165  , 4.7729125 ],
        [1.478918  , 5.3872733 ],
        [1.9799873 , 2.3694534 ],
        [5.980968  , 1.2028096 ],
        [5.426635  , 2.348487  ],
        [2.8981748 , 0.7032949 ],
        [3.462932  , 3.2890723 ],
        [2.203434  , 1.0648615 ]],

       [[5.938228  , 3.2477052 ],
        [4.778246  , 0.89458025],
        [3.8270166 , 0.4230178 ],
        [2.2365866 , 5.197233  ],
        [4.6698933 , 2.9473565 ],
        [1.6832515 , 0.6644926 ],
        [2.704645  , 1.7601866 ],
        [1

In [7]:
def build_model():
    model = Sequential()
    model.add(tf.keras.layers.Reshape((11,11,2), input_shape=(1,11,11,2)))
    model.add(Conv2D(32, (3,3),strides=(1, 1),activation='relu'))  # OBSERVATION_SPACE_VALUES = (10, 10, 3) a 10x10 RGB image.
    # model.add(Activation('relu'))
    # model.add(MaxPooling2D(pool_size=(2,2)))
    # model.add(Dropout(0.2))

    model.add(Conv2D(64, (3,3),strides=(1, 1),activation='relu'))
    model.add(Conv2D(64, (3,3),strides=(1, 1),activation='relu'))
    # model.add(Activation('relu'))
    # model.add(MaxPooling2D(pool_size=(2,2)))
    # model.add(Dropout(0.2))

    model.add(Flatten())  # this converts our 3D feature maps to 1D feature vectors
    model.add(Dense(512,activation='relu'))

    model.add(Dense(actions, activation='linear'))  # ACTION_SPACE_SIZE = how many choices (9)
    return model

In [13]:
del model

In [14]:
model = build_model()

In [15]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 reshape_1 (Reshape)         (None, 11, 11, 2)         0         
                                                                 
 conv2d_3 (Conv2D)           (None, 9, 9, 32)          608       
                                                                 
 conv2d_4 (Conv2D)           (None, 7, 7, 64)          18496     
                                                                 
 conv2d_5 (Conv2D)           (None, 5, 5, 64)          36928     
                                                                 
 flatten_1 (Flatten)         (None, 1600)              0         
                                                                 
 dense_2 (Dense)             (None, 512)               819712    
                                                                 
 dense_3 (Dense)             (None, 64)               

In [16]:
from rl.agents import DQNAgent
from rl.policy import BoltzmannQPolicy
from rl.memory import SequentialMemory
from rl.policy import LinearAnnealedPolicy, EpsGreedyQPolicy

In [17]:
def build_agent(model, actions):
#     policy = EpsGreedyQPolicy()
    policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1., value_min=.1, value_test=.2, nb_steps=10000)
    memory = SequentialMemory(limit=500000, window_length=1)
    dqn = DQNAgent(model=model, memory=memory, policy=policy,
                  nb_actions=actions, nb_steps_warmup=10000, target_model_update=1e-2, enable_double_dqn=True)
    return dqn

In [18]:
dqn = build_agent(model, actions)
dqn.compile(Adam(lr=0.001), metrics=['mae'])

In [28]:
dqn.compile(Adam(lr=0.00001), metrics=['mae'])
history=dqn.fit(env, nb_steps=1000000, visualize=False, verbose=1)

Training for 1000000 steps ...
Interval 1 (0 steps performed)
10 episodes - episode_reward: -411.343 [-966.370, 39.985]

Interval 2 (10000 steps performed)
10 episodes - episode_reward: 49.522 [13.260, 79.575] - loss: 0.184 - mae: 9.980 - mean_q: 10.799 - mean_eps: 0.100

Interval 3 (20000 steps performed)
10 episodes - episode_reward: 47.625 [7.450, 79.185] - loss: 0.188 - mae: 10.171 - mean_q: 11.000 - mean_eps: 0.100

Interval 4 (30000 steps performed)
10 episodes - episode_reward: 56.770 [18.060, 80.455] - loss: 0.182 - mae: 10.328 - mean_q: 11.168 - mean_eps: 0.100

Interval 5 (40000 steps performed)
10 episodes - episode_reward: 45.962 [12.610, 89.915] - loss: 0.184 - mae: 10.454 - mean_q: 11.295 - mean_eps: 0.100

Interval 6 (50000 steps performed)


In [43]:
_ = dqn.test(env, nb_episodes=1, visualize=True)

Testing for 1 episodes ...


KeyboardInterrupt: 

In [33]:
dqn.save_weights('dqn_weights.h5f', overwrite=True)

In [26]:
dqn.load_weights('dqn_weights.h5f')

In [27]:
dqn.save_weights('training_weights/collision 3 person ten parcel seven boulder directed.h5f', overwrite=True)

In [42]:
dqn.load_weights('training_weights/collision 3 person ten parcel seven boulder directed.h5f')

In [23]:
dqn.save_weights('dqn_weightsTemp.h5f', overwrite=True)

In [19]:
dqn.load_weights('dqn_weightsTemp.h5f')

In [21]:
dqn.save_weights('dqn_weightsTemp1.h5f', overwrite=True)

In [20]:
dqn.load_weights('dqn_weightsTemp1.h5f')