In [1]:
import gym

In [2]:
env = gym.make('SpaceInvaders-v0')

### Random Agent

In [39]:
episodes = 10

for episode in range(1, episodes):
    state = env.reset()
    done = False
    score = 0 
    
    while not done:
        env.render()
        state, reward, done, info=env.step(env.action_space.sample())
        score += reward
    print('Episode : {}\nScore: {}\nInfo: {}'.format(episode,score,info))
    
env.close()

Episode : 1
Score: 180.0
Info: {'ale.lives': 0}
Episode : 2
Score: 270.0
Info: {'ale.lives': 0}
Episode : 3
Score: 60.0
Info: {'ale.lives': 0}
Episode : 4
Score: 65.0
Info: {'ale.lives': 0}
Episode : 5
Score: 35.0
Info: {'ale.lives': 0}
Episode : 6
Score: 80.0
Info: {'ale.lives': 0}
Episode : 7
Score: 110.0
Info: {'ale.lives': 0}
Episode : 8
Score: 35.0
Info: {'ale.lives': 0}
Episode : 9
Score: 105.0
Info: {'ale.lives': 0}


### DQNA

In [40]:
#### Import NN Packs
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense,Flatten,Conv2D
from tensorflow.keras.optimizers import Adam
from tensorflow.python.framework.ops import disable_eager_execution
disable_eager_execution()

In [41]:
def Bmodel(height, width, channels, actions):
    model = Sequential()
    model.add(Conv2D(32, (8,8), strides=(4,4), activation='relu', input_shape=(3, height, width, channels)))
    model.add(Conv2D(64, (4,4), strides=(2,2), activation='relu'))
    model.add(Conv2D(64, (4,4), strides=(2,2), activation='relu'))
    model.add(Conv2D(64, (4,4), strides=(2,2), activation='relu'))
    model.add(Flatten())
    model.add(Dense(512, activation='relu'))
    model.add(Dense(256, activation='relu'))
    model.add(Dense(64, activation='relu'))
    model.add(Dense(actions, activation='linear'))
    return model
    

In [42]:
height, width, channels = env.observation_space.shape
actions = env.action_space.n

In [52]:
model = Bmodel(height, width, channels, actions)
print(model.summary())

Model: "sequential_13"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_57 (Conv2D)           (None, 3, 51, 39, 32)     6176      
_________________________________________________________________
conv2d_58 (Conv2D)           (None, 3, 24, 18, 64)     32832     
_________________________________________________________________
conv2d_59 (Conv2D)           (None, 3, 11, 8, 64)      65600     
_________________________________________________________________
conv2d_60 (Conv2D)           (None, 3, 4, 3, 64)       65600     
_________________________________________________________________
flatten_7 (Flatten)          (None, 2304)              0         
_________________________________________________________________
dense_30 (Dense)             (None, 512)               1180160   
_________________________________________________________________
dense_31 (Dense)             (None, 256)             

In [44]:
#### Importing Keras-rl2

from rl.agents import DQNAgent
from rl.memory import SequentialMemory
from rl.policy import LinearAnnealedPolicy, EpsGreedyQPolicy

In [45]:
def Bagent(model, actions):
    policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1., value_min=.1, value_test=.2 ,nb_steps=9999)
    memory = SequentialMemory(limit=99999, window_length=3)
    dqn = DQNAgent(model=model, memory=memory, policy=policy,
                   nb_actions = actions, enable_dueling_network=True, 
                   nb_steps_warmup=9999)
    return dqn

In [46]:
dqn = Bagent(model, actions)

In [47]:
dqn.compile(Adam(learning_rate=0.001))

In [48]:
dqn.fit(env, nb_steps=99999, visualize=True ,verbose=1)

Training for 99999 steps ...
Interval 1 (0 steps performed)
12 episodes - episode_reward: 177.083 [5.000, 365.000] - ale.lives: 2.091

Interval 2 (10000 steps performed)
  336/10000 [>.............................] - ETA: 1:55:16 - reward: 0.1339done, took 407.058 seconds


<tensorflow.python.keras.callbacks.History at 0x1d51b167880>

In [37]:
scores = dqn.test(env, nb_episodes=50, visualize=True)
print(np.mean(scores.history['episode_reward']))

Testing for 50 episodes ...
Episode 1: reward: 130.000, steps: 1011
Episode 2: reward: 115.000, steps: 764
Episode 3: reward: 305.000, steps: 935
Episode 4: reward: 35.000, steps: 495
Episode 5: reward: 60.000, steps: 682
Episode 6: reward: 225.000, steps: 1298
Episode 7: reward: 395.000, steps: 1443
Episode 8: reward: 25.000, steps: 412
Episode 9: reward: 420.000, steps: 1128
Episode 10: reward: 90.000, steps: 698
Episode 11: reward: 55.000, steps: 1054
Episode 12: reward: 45.000, steps: 608
Episode 13: reward: 15.000, steps: 570
Episode 14: reward: 180.000, steps: 844
Episode 15: reward: 240.000, steps: 1064
Episode 16: reward: 350.000, steps: 995
Episode 17: reward: 235.000, steps: 1157
Episode 18: reward: 65.000, steps: 571
Episode 19: reward: 200.000, steps: 960
Episode 20: reward: 450.000, steps: 1659
Episode 21: reward: 245.000, steps: 883
Episode 22: reward: 235.000, steps: 1006
Episode 23: reward: 40.000, steps: 683
Episode 24: reward: 215.000, steps: 805
Episode 25: reward: 3

In [38]:
dqn.save_weights('models/dqn2.h5f')

In [None]:
# dqn.load_weights('models/dqn.h5f')