In [53]:
import gym

In [54]:
env = gym.make('SpaceInvaders-v0')

In [55]:
episodes = 10

for episode in range(episodes):
    state = env.reset()
    done = False
    score = 0
    
    while not done:
        env.render()
        state, reward, done, info = env.step(env.action_space.sample())
        score += reward
    print('Episode: {}\nScore: {}'.format(episode+1, score))
    
env.close()

Episode: 1
Score: 35.0
Episode: 2
Score: 105.0
Episode: 3
Score: 210.0
Episode: 4
Score: 225.0
Episode: 5
Score: 150.0
Episode: 6
Score: 55.0
Episode: 7
Score: 30.0
Episode: 8
Score: 60.0
Episode: 9
Score: 125.0
Episode: 10
Score: 105.0


In [56]:
# Import Neural Network Packages
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Conv2D
from tensorflow.keras.optimizers import Adam

In [57]:
def build_model(height, width, channels, actions):
    model = Sequential()
    model.add(Conv2D(32, (8, 8), strides=(4, 4), activation='relu', input_shape=(3, height, width, channels)))
    model.add(Conv2D(64, (4, 4), strides=(2, 2), activation='relu'))
    # model.add(Conv2D(64, (4, 4), strides=(2, 2), activation='relu'))
    model.add(Flatten())
    model.add(Dense(512, activation='relu'))
    model.add(Dense(256, activation='relu'))
    # model.add(Dense(64, activation='relu'))
    model.add(Dense(actions, activation='linear'))
    return model

In [58]:
height, width, channels = env.observation_space.shape
actions = env.action_space.n

In [59]:
model = build_model(height, width, channels, actions)

In [60]:
# Importing keras-rl2 reinforcement learning functions
from rl.agents import DQNAgent
from rl.memory import SequentialMemory
from rl.policy import LinearAnnealedPolicy, EpsGreedyQPolicy

In [61]:
def build_agent(model, actions):
    policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1., value_min=.1, value_test=.2, nb_steps=10000)
    memory = SequentialMemory(limit=2000, window_length=3)
    dqn = DQNAgent(model=model, memory=memory, policy=policy, 
                   enable_dueling_network=True, dueling_type='avg',
                  nb_actions=actions, nb_steps_warmup=1000)
    return dqn

In [62]:
dqn = build_agent(model, actions)

In [63]:
dqn.compile(Adam(lr=0.0001))

In [64]:
dqn.fit(env, nb_steps=40000, visualize=True, verbose=1)

Training for 40000 steps ...
Interval 1 (0 steps performed)
Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.
16 episodes - episode_reward: 109.688 [45.000, 185.000] - loss: 1.044 - mean_q: 3.163 - mean_eps: 0.505 - ale.lives: 2.128

Interval 2 (10000 steps performed)
14 episodes - episode_reward: 100.357 [35.000, 135.000] - loss: 0.256 - mean_q: 3.142 - mean_eps: 0.100 - ale.lives: 1.924

Interval 3 (20000 steps performed)
14 episodes - episode_reward: 140.000 [10.000, 400.000] - loss: 0.223 - mean_q: 3.542 - mean_eps: 0.100 - ale.lives: 1.996

Interval 4 (30000 steps performed)
done, took 11137.928 seconds


<tensorflow.python.keras.callbacks.History at 0x7ffaf67216d0>

In [None]:
scores = dqn.test(env, nb_episodes=10, visualize=True)
print(np.mean(scores.history['episode_reward']))

Testing for 10 episodes ...
Episode 1: reward: 215.000, steps: 1030
Episode 2: reward: 115.000, steps: 537
Episode 3: reward: 85.000, steps: 703
Episode 4: reward: 30.000, steps: 361
Episode 5: reward: 240.000, steps: 854
Episode 6: reward: 30.000, steps: 351
Episode 7: reward: 50.000, steps: 452
Episode 8: reward: 30.000, steps: 363
Episode 9: reward: 90.000, steps: 600


In [65]:
dqn.save_weights('models/dqn.h5f')

In [None]:
dqn.load_weights('models/dqn.h5f')