In [36]:
import tensorflow
import gym
import random
import atari_py
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Convolution2D
from tensorflow.keras.optimizers import Adam
from rl.agents import DQNAgent
from rl.memory import SequentialMemory
from rl.policy import LinearAnnealedPolicy, EpsGreedyQPolicy

In [37]:
env = gym.make('SpaceInvaders-v4')
height, width, channels = env.observation_space.shape
actions = env.action_space.n

In [38]:
env.unwrapped.get_action_meanings()

['NOOP', 'FIRE', 'RIGHT', 'LEFT', 'RIGHTFIRE', 'LEFTFIRE']

In [39]:
episodes = 5
for episode in range(1, episodes+1):
    state = env.reset()
    done = False
    score = 0
    
    while not done:
        env.render()
        action = random.choice([0,1,2,3,4,5])
        n_state, reward, done, info = env.step(action)
        score += reward
    print('Episode:{} score:{}'.format(episode, score))
env.close()


Episode:1 score:105.0
Episode:2 score:110.0
Episode:3 score:215.0
Episode:4 score:150.0
Episode:5 score:245.0


In [45]:
def build_model(height, width, channels, actions):
    model = Sequential()
    model.add(Convolution2D(32, (8,8), strides=(6,6), activation='relu', input_shape=(3,height,width,channels)))

    model.add(Flatten())
 
    model.add(Dense(32, activation = 'relu'))
    model.add(Dense(actions, activation = 'linear'))
    return model


In [46]:
model = build_model(height, width, channels, actions)



In [55]:
def build_agent(model, actions):
    policy = LinearAnnealedPolicy(EpsGreedyQPolicy(),
                                  attr="eps", value_max = 1.,
                                  value_min = .1,
                                  value_test = 2,
                                  nb_steps = 10000
                                 )
    
    memory = SequentialMemory(limit =100, window_length =3)
    
    dqn = DQNAgent(model = model,
                   memory=memory,
                   policy=policy, 
                   enable_dueling_network = True,
                   dueling_type = 'avg',
                   nb_actions = actions,
                   nb_steps_warmup =1000
                  )
    return dqn

    
    

In [56]:
dqn = build_agent(model, actions)
dqn.compile(Adam(lr=1e-4))
dqn.fit(env, nb_steps = 10000, visualize=False, verbose =1)

Training for 10000 steps ...
Interval 1 (0 steps performed)
 1074/10000 [==>...........................] - ETA: 8:29 - reward: 0.2095done, took 61.914 seconds


<tensorflow.python.keras.callbacks.History at 0x28809912988>