In [1]:
import gym

In [2]:
# create environment
env = gym.make('SpaceInvaders-v0')

A.L.E: Arcade Learning Environment (version +978d2ce)
[Powered by Stella]


In [5]:
# see env

episodes = 10 # attempts

for episode in range(1, episodes):
    state = env.reset()
    done = False
    score = 0
    
    while not done:
        env.render()
        state, reward, done, info = env.step(env.action_space.sample()) # action agent takes in each frame

        score += reward
    print("Episode: {}\tScore: {}".format(episode, score))

env.close()

Episode: 1	Score: 240.0
Episode: 2	Score: 80.0
Episode: 3	Score: 180.0
Episode: 4	Score: 135.0
Episode: 5	Score: 345.0
Episode: 6	Score: 110.0
Episode: 7	Score: 105.0
Episode: 8	Score: 160.0
Episode: 9	Score: 215.0


In [3]:
env.action_space # possible actions we can take within an environment

Discrete(6)

In [6]:
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Conv2D
from tensorflow.keras.optimizers import Adam

2022-02-14 11:06:19.068783: W tensorflow/stream_executor/platform/default/dso_loader.cc:59] Could not load dynamic library 'libcudart.so.10.1'; dlerror: libcudart.so.10.1: cannot open shared object file: No such file or directory
2022-02-14 11:06:19.068819: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


In [13]:

def build_model(height, width, channels, actions):
    model = Sequential()

    model.add(Conv2D(32, (8, 8), strides=(4, 4), activation='relu', input_shape=(3, height, width, channels)))
    model.add(Conv2D(64, (4, 4), strides=(2, 2), activation='relu'))

    model.add(Flatten())

    model.add(Dense(512, activation='relu'))
    model.add(Dense(256, activation='relu'))
    model.add(Dense(actions, activation='linear'))

    return model

In [15]:
height, width, channels = env.observation_space.shape  # get height width and channels
actions = env.action_space.n

In [22]:
del model

In [24]:
model = build_model(height, width, channels, actions)

In [18]:
from rl.agents import DQNAgent
from rl.memory import SequentialMemory  # dataset for model to train
from rl.policy import LinearAnnealedPolicy, EpsGreedyQPolicy  # policy with threshold, take action leading to highest reward

In [20]:
# rl agent

def build_agent(model, actions):
    policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1, value_min=.1, value_test=.2, nb_steps=10000)
    memory = SequentialMemory(limit=2000, window_length=3)

    dqn = DQNAgent(model=model, memory=memory, policy=policy, enable_dueling_network=True, dueling_type='avg', nb_actions=actions, nb_steps_warmup=1000)

    return dqn

In [25]:
dqn = build_agent(model, actions)

### training agent

In [27]:
dqn.compile(Adam(lr=0.0001))  # compile model

In [31]:
dqn.fit(env, nb_steps=40000, visualize=False, verbose=1)

Training for 1000 steps ...
Interval 1 (0 steps performed)
 1000/10000 [==>...........................] - ETA: 8:33 - reward: 0.1700done, took 57.080 seconds


<tensorflow.python.keras.callbacks.History at 0x7f75642cbc40>

In [33]:
scores = dqn.test(env, nb_episodes=10, visualize=True)

print(np.mean(scores.history['episode_reward']))

Testing for 10 episodes ...
Episode 1: reward: 225.000, steps: 1016
Episode 2: reward: 185.000, steps: 833
Episode 3: reward: 110.000, steps: 660
Episode 4: reward: 215.000, steps: 861
Episode 5: reward: 155.000, steps: 836
Episode 6: reward: 100.000, steps: 750
Episode 7: reward: 180.000, steps: 843
Episode 8: reward: 195.000, steps: 997
Episode 9: reward: 180.000, steps: 711
Episode 10: reward: 35.000, steps: 382
158.0


In [34]:
# add more layers
# train more
# increase memory size/ dataset
# increase lr

# saving and loading model

In [35]:
dqn.save_weights('models/dqn.h5f')

In [36]:
dqn.load_weights('models/dqn.h5f')