In [3]:
# Code Author: Chirag Mirani
# Date: January 24, 2022

# In this project, we are demonstrating how to load the OpenAI gym environment and take random actions.

# we are importing gym library to render space invadors
import gym


In [4]:
# loading space invaders pixel based environment
# For pixel based environment one should use neural network
env = gym.make('SpaceInvaders-v0')

In [5]:
episodes = 2  # we are going to play 10 Space Invaders game episodes

In [11]:
# start from Space Invaders game episode one and take random actions
for episode in range(1, episodes):  
    
    # first reset the environment and game state
    state = env.reset()
    done = False
    
    #initialize score to 0..
    score = 0
    
    #keep playing the game while the game is not done
    while not done:
        env.render()   #render the environment
        # take an action and get the next state, reward, whether we are done and information about the environment
        state, reward, done, info = env.step(env.action_space.sample())  # there are six actions we can take in space invaders. Start with random action
        score +=reward  # store rewards
    print ('Episode: {}\nScore: {}'.format(episode, score))  # prints out score for each game episodes

# don't forget to close the environment
env.close()

Episode: 1
Score: 20.0


In [6]:
# for reference print out number of actions
print(env.action_space)


Discrete(6)


In [7]:
# next we are going to use a neural network to figure out optimal actions.
# import numpy
import numpy as np

# sequence of layers are needed
from tensorflow.keras.models import Sequential

# these are different types of layers you can have a neural network
# Dense is a fully connected neural network
# Flatten layer just flattens the previous neutral network into a one dimensional array
# Convolution2d layer is needed to understand the images.  In this case, Space-Invador2.0
from tensorflow.keras.layers import Dense, Flatten, Conv2D

# Adam optimizer.  This optimizer will help us train the neural network such that it associates images with optimal actions
from tensorflow.keras.optimizers import Adam

In [8]:
# In this function, we are building our neural network. 
# these are pixels for our SpaceInvadersscreen (height, width, channels)
# Colored images typically have three channels, for the pixel value at the (row, column) coordinate for the red, green, and blue components.

# actions
# Main input: input shape = our space invaders image
# Main output: associate it with the six actions. 
def build_model(height, width, channels, actions):
    # sequence of layers
    model= Sequential()
    # take in the image and condense the image..
    # relu activation means the model is able to learn non-linear input/output relationships.  This allows for
    # more complicated relationships. Training the neural network will work like this.  Take in an input and associate it 
    # with the best action.
    model.add(Conv2D(32,(8,8), strides=(4,4), activation ='relu', input_shape=(3, height, width, channels)))
    #output unit 64, (4,4) and strides = (2,2) are filters that will help the model understand the image better
    model.add(Conv2D(64,(4,4), strides=(2,2),activation='relu'))
    
    #flatten image convolutions into one dimensional array
    model.add(Flatten())
    
    # send the flattened image to 512 neurons
    model.add(Dense(512,activation='relu'))
    
    # condense the associated image to 256 neurons
    model.add(Dense(256, activation='relu'))
    
    # output an action..
    model.add(Dense(actions, activation='linear'))
    return model

In [12]:
del model

In [13]:
#now we are ready to learn from any screen we pass into the model. 
# above we have our neural network setup. 
# Take in the observation space
height, width, channels = env.observation_space.shape
#specify number of actions.  This will be the output of our neural network
actions = env.action_space.n

model = build_model(height, width, channels, actions)
env.observation_space.shape

#importing keras-rl2
from rl.agents import DQNAgent
from rl.memory import SequentialMemory
from rl.policy import LinearAnnealedPolicy, EpsGreedyQPolicy

def build_agent(model, actions):
    policy=LinearAnnealedPolicy(EpsGreedyQPolicy(), attr="eps", value_max=1, value_min=0.1,  value_test=0.2, nb_steps=10000)
    memory = SequentialMemory(limit=2000, window_length =3)
    dqn=DQNAgent(model=model, memory=memory, policy=policy, enable_dueling_network=True, dueling_type='avg', nb_actions=actions,
                nb_steps_warmup=1000)
    
    return dqn





In [16]:
dqn =build_agent(model, actions)

In [17]:
dqn.compile(Adam(lr=0.0001))



In [18]:
dqn.fit(env,nb_steps=1000,visualize=False, verbose=1)

Training for 1000 steps ...
Interval 1 (0 steps performed)




  999/10000 [=>............................] - ETA: 5:55 - reward: 0.2102done, took 39.825 seconds


<keras.callbacks.History at 0x1fedc2baca0>

In [25]:
#env.close()
env.reset()
scores=dqn.test(env, nb_episodes=3, visualize=True)
print(np.mean(scores.history['episode_reward']))



Testing for 3 episodes ...
Episode 1: reward: 320.000, steps: 1115
Episode 2: reward: 170.000, steps: 935
Episode 3: reward: 170.000, steps: 801
220.0


In [24]:
env.close()


In [46]:
dqn.save_weights('models/ChiragsDQNSpaceInvadersRLModel.h5f')

In [15]:
dqn.load_weights('models/ChiragsDQNSpaceInvadersRLModel.h5f')
#env.close()

AttributeError: 'DQNAgent' object has no attribute 'target_model'

In [51]:
env.reset()

array([[[ 0,  0,  0],
        [ 0,  0,  0],
        [ 0,  0,  0],
        ...,
        [ 0,  0,  0],
        [ 0,  0,  0],
        [ 0,  0,  0]],

       [[ 0,  0,  0],
        [ 0,  0,  0],
        [ 0,  0,  0],
        ...,
        [ 0,  0,  0],
        [ 0,  0,  0],
        [ 0,  0,  0]],

       [[ 0,  0,  0],
        [ 0,  0,  0],
        [ 0,  0,  0],
        ...,
        [ 0,  0,  0],
        [ 0,  0,  0],
        [ 0,  0,  0]],

       ...,

       [[80, 89, 22],
        [80, 89, 22],
        [80, 89, 22],
        ...,
        [80, 89, 22],
        [80, 89, 22],
        [80, 89, 22]],

       [[80, 89, 22],
        [80, 89, 22],
        [80, 89, 22],
        ...,
        [80, 89, 22],
        [80, 89, 22],
        [80, 89, 22]],

       [[80, 89, 22],
        [80, 89, 22],
        [80, 89, 22],
        ...,
        [80, 89, 22],
        [80, 89, 22],
        [80, 89, 22]]], dtype=uint8)