In [1]:
import gym
from PIL import Image
import numpy as np
from gym.utils import play

pygame 2.0.1 (SDL 2.0.14, Python 3.8.5)
Hello from the pygame community. https://www.pygame.org/contribute.html


In [2]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense,Activation,Flatten,Conv2D,Permute
from tensorflow.keras.optimizers import Adam

In [3]:
from rl.agents.dqn import DQNAgent
from rl.policy import LinearAnnealedPolicy,EpsGreedyQPolicy
from rl.memory import SequentialMemory
from rl.core import Processor
from rl.callbacks import FileLogger,ModelIntervalCheckpoint

In [4]:
env = gym.make("Pong-v0")

In [5]:
num_actions =env.action_space.n

In [6]:
num_actions

6

In [7]:
play.play(env)

In [7]:
IMG_SHAPE = (84,84)
WINDOW_LENGTH = 4

In [8]:
class ImageProcessor(Processor):
    def process_observation(self,observation):
        img = Image.fromarray(observation)
        img = img.resize(IMG_SHAPE)
        img = img.convert('L')
        img = np.array(img)
        return img.astype('uint8')
    def process_state_batch(self,batch):
        processed_batch = batch.astype('float32')/255.0
        return processed_batch

In [10]:
input_shape = (WINDOW_LENGTH,IMG_SHAPE[0],IMG_SHAPE[1])

In [11]:
S_Net = Sequential()
S_Net.add(Permute((2,3,1),input_shape = input_shape))
S_Net.add(Conv2D(32,(8,8),strides=(4,4),kernel_initializer='he_normal'))
S_Net.add(Activation('relu'))
S_Net.add(Conv2D(64,(4,4),strides =(2,2),kernel_initializer='he_normal'))
S_Net.add(Activation('relu'))
S_Net.add(Conv2D(64,(3,3),strides = (1,1),kernel_initializer='he_normal'))
S_Net.add(Activation('relu'))
S_Net.add(Flatten())
S_Net.add(Dense(512))
S_Net.add(Activation('relu'))
S_Net.add(Dense(num_actions))
S_Net.add(Activation('linear'))

In [12]:
S_Net.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
permute (Permute)            (None, 84, 84, 4)         0         
_________________________________________________________________
conv2d (Conv2D)              (None, 20, 20, 32)        8224      
_________________________________________________________________
activation (Activation)      (None, 20, 20, 32)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 9, 9, 64)          32832     
_________________________________________________________________
activation_1 (Activation)    (None, 9, 9, 64)          0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 7, 7, 64)          36928     
_________________________________________________________________
activation_2 (Activation)    (None, 7, 7, 64)          0

In [13]:
memory = SequentialMemory(limit=1000000,window_length=WINDOW_LENGTH)

In [15]:
processor = ImageProcessor()

In [16]:
policy = LinearAnnealedPolicy(EpsGreedyQPolicy(),
                             attr = 'eps',
                             value_max = 1.0,
                             value_min=0.1,
                             value_test=0.05,
                             nb_steps=1000000)

In [17]:
dqn = DQNAgent(model = S_Net,nb_actions=num_actions,policy=policy,
              memory=memory,processor=processor,nb_steps_warmup=50000,
              gamma = 0.99,target_model_update=10000,
              train_interval=4,delta_clip=1)

In [18]:
dqn.compile(Adam(learning_rate = 0.00025),metrics=['mae'])

In [19]:
weights_filename = 'dqn_pong_weights.h5f'
checkpoint_filename = 'dqn_checkpoint_pong_weights.h5f'
checkpoint_callback = ModelIntervalCheckpoint(checkpoint_filename,interval = 100000)


In [20]:
dqn.fit(env,nb_steps=1000,callbacks=[checkpoint_callback],log_interval = 500,
       visualize = False)

Training for 1000 steps ...
Interval 1 (0 steps performed)




Interval 2 (500 steps performed)
done, took 7.594 seconds


<tensorflow.python.keras.callbacks.History at 0x7fd9c44cb880>

In [20]:
#dqn.save_weights(weights_filename,overwrite=True)

In [21]:
dqn.test(env,nb_episodes = 1,visualize = True)

Testing for 1 episodes ...
Episode 1: reward: -21.000, steps: 1021


<tensorflow.python.keras.callbacks.History at 0x7fd783691d60>

In [21]:
"""A trained agent on the virtual environment"""
S_Net.load_weights("0weights/dqn_PONG_weights_1500000.h5f")
memory = SequentialMemory(limit=1000000,window_length=WINDOW_LENGTH)
policy = LinearAnnealedPolicy(EpsGreedyQPolicy(),
                             attr = 'eps',
                             value_max = 0.1,
                             value_min=0.1,
                             value_test=0.05,
                             nb_steps=1000000)
processor = ImageProcessor()
dqn = DQNAgent(model = S_Net,nb_actions=num_actions,policy=policy,
              memory=memory,processor=processor,nb_steps_warmup=50000,
              gamma = 0.99,target_model_update=10000,
              train_interval=4,delta_clip=1)
dqn.compile(Adam(learning_rate = 0.00025),metrics=['mae'])

In [23]:
dqn.test(env,nb_episodes =2,visualize=True)

Testing for 2 episodes ...
Episode 1: reward: -15.000, steps: 3342
Episode 2: reward: -15.000, steps: 4107


<tensorflow.python.keras.callbacks.History at 0x7fd9a4257fa0>