In [1]:
!pip install tensorflow==2.5.1 gym keras-rl2 gym[atari]

Defaulting to user installation because normal site-packages is not writeable


In [2]:
import gym
import random
import time

In [3]:
env = gym.make('ALE/SpaceInvaders-v5', render_mode='human')
height, width, channels = env.observation_space.shape
actions = env.action_space.n

In [4]:
env.unwrapped.get_action_meanings()

['NOOP', 'FIRE', 'RIGHT', 'LEFT', 'RIGHTFIRE', 'LEFTFIRE']

In [5]:
# episodes = 5
# for episode in range(1, episodes+1):
#     state = env.reset()
#     done = False
#     score = 0
    
#     while not done:
#         time.sleep(0.1)
#         action = random.choice([0,1,2,3,4,5])
#         n_state, reward, done, info = env.step(action)
#         score+=reward
#     print('Episode:{} score:{}'.format(episode, score))
# env.close()

In [6]:
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Convolution2D
from tensorflow.keras.optimizers import Adam

In [7]:
def build_model(height, width, channels, actions):
    model = Sequential()
    model.add(Convolution2D(32, (8,8), strides=(4,4), activation='relu', padding='same', input_shape=(3,height, width, channels)))
    model.add(Convolution2D(64, (4,4), strides=(2,22), padding='same', activation='relu'))
    model.add(Convolution2D(64, (3,3), padding='same', activation='relu'))
    model.add(Flatten())
    model.add(Dense(512,activation='relu'))
    model.add(Dense(256,activation='relu'))
    model.add(Dense(actions,activation='linear'))
    return model


In [15]:
del model

In [16]:
model = build_model(height, width, channels, actions)

In [17]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_3 (Conv2D)            (None, 3, 53, 40, 32)     6176      
_________________________________________________________________
conv2d_4 (Conv2D)            (None, 3, 27, 2, 64)      32832     
_________________________________________________________________
conv2d_5 (Conv2D)            (None, 3, 27, 2, 64)      36928     
_________________________________________________________________
flatten_1 (Flatten)          (None, 10368)             0         
_________________________________________________________________
dense_4 (Dense)              (None, 512)               5308928   
_________________________________________________________________
dense_5 (Dense)              (None, 256)               131328    
_________________________________________________________________
dense_6 (Dense)              (None, 6)                

In [18]:
from rl.agents import DQNAgent
from rl.memory import SequentialMemory
from rl.policy import LinearAnnealedPolicy, EpsGreedyQPolicy

In [19]:
from tensorflow.python.framework.ops import disable_eager_execution

In [20]:
disable_eager_execution()

In [21]:
def build_agent(model, actions):
    policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1, value_min=.1, value_test=.2, nb_steps=10000)
    memory = SequentialMemory(limit=1000, window_length=3)
    dqn = DQNAgent(model=model, memory=memory, policy=policy,
                  enable_dueling_network=True, dueling_type='avg',
                  nb_actions=actions, nb_steps_warmup=10000)
    return dqn

In [22]:
dqn = build_agent(model, actions)
dqn.compile(Adam(lr=1e-4) )
dqn.fit(env, nb_steps=10000, visualize=False, verbose=2)



Training for 10000 steps ...




  515/10000: episode: 1, duration: 48.469s, episode steps: 515, steps per second:  11, episode reward: 165.000, mean reward:  0.320 [ 0.000, 30.000], mean action: 2.452 [0.000, 5.000],  loss: --, mean_q: --, mean_eps: --
 1171/10000: episode: 2, duration: 47.626s, episode steps: 656, steps per second:  14, episode reward: 220.000, mean reward:  0.335 [ 0.000, 25.000], mean action: 2.363 [0.000, 5.000],  loss: --, mean_q: --, mean_eps: --
 1448/10000: episode: 3, duration: 19.646s, episode steps: 277, steps per second:  14, episode reward: 35.000, mean reward:  0.126 [ 0.000, 15.000], mean action: 2.350 [0.000, 5.000],  loss: --, mean_q: --, mean_eps: --
 1976/10000: episode: 4, duration: 36.625s, episode steps: 528, steps per second:  14, episode reward: 215.000, mean reward:  0.407 [ 0.000, 30.000], mean action: 2.360 [0.000, 5.000],  loss: --, mean_q: --, mean_eps: --
 2800/10000: episode: 5, duration: 56.460s, episode steps: 824, steps per second:  15, episode reward: 325.000, mean 

<tensorflow.python.keras.callbacks.History at 0x1874527ad30>

In [23]:
scores = dqn.test(env, nb_episodes=10, visualize=True)
print(np.mean(scores.history['episode_reward']))

Testing for 10 episodes ...


Error: render(mode='human') is deprecated. Please supply `render_mode` when constructing your environment, e.g., gym.make(ID, render_mode='human'). The new `render_mode` keyword argument supports DPI scaling, audio, and native framerates.

In [None]:
dqn.save_weights('SaveWeights/10k-Fast/dqn_weights.h5f')

In [None]:
del model, dqn

In [None]:
dqn.load_weights('SaveWeights/10k-Fast/dqn_weights.h5f')