# Imports

In [1]:
import numpy as np
import _pickle as pickle
import gym
import time
import matplotlib.pyplot as plt
import tensorflow as tf
import tensorflow.keras.layers as layers
import tensorflow.keras.initializers as initializers

# Model

In [3]:
env = gym.make("Pong-v0")
initializer = initializers.GlorotNormal

x_in = layers.Input(shape = (6400,1))
x = layers.Dense(200, kernel_initializer= initializer, activation="relu")(x_in)
x_out = layers.Dense(1, activation="sigmoid")(x)

model = tf.keras.Model(x_in, x_out)

model.compile(optimizer = "adam", loss = "mse", metrics = ["acc"])

In [4]:
env.observation_space

Box(0, 255, (210, 160, 3), uint8)

In [5]:
env.unwrapped.get_action_meanings()

# NOOP is the same as FIRE (standing still)
# LEFT is the same as LEFTFIRE (down)
# RIGHT is the same as RIGHTFIRE (up)

['NOOP', 'FIRE', 'RIGHT', 'LEFT', 'RIGHTFIRE', 'LEFTFIRE']

In [6]:
model.summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 6400, 1)]         0         
_________________________________________________________________
dense (Dense)                (None, 6400, 200)         400       
_________________________________________________________________
dense_1 (Dense)              (None, 6400, 1)           201       
Total params: 601
Trainable params: 601
Non-trainable params: 0
_________________________________________________________________


# Preprocessing

In [7]:
def prepro(input_frame):
    """ prepro 210x160x3 uint8 frame into 6400 (80x80) 1D float vector """
    input_frame = input_frame[34:194] # crop
    input_frame = input_frame[::2,::2,0] # downsample by factor of 2 (halves the resolution of the image)
    #This takes every other pixel in the image
    input_frame[input_frame == 144] = 0 # erase background (background type 1)
    input_frame[input_frame == 109] = 0 # erase background (background type 2)
    input_frame[input_frame != 0] = 1 # everything else (paddles, ball) just set to 1
    return input_frame.astype(np.float).ravel()

# Hyperparamters

In [8]:
render = False
prev_frame = None
game_dimensions = 80*80
gamma = 0.99
resume = False
batch_size = 10

In [9]:
x1 = np.asarray(total_change_in_frames)
y1 = np.asarray(all_actions)

model.fit(x = x1, y = y1, epochs = 100)

# Must all be numpy arrays

NameError: name 'total_change_in_frames' is not defined

# Main Code

In [10]:
observation = env.reset()
reward_sum = 0
eps = 1
episode_number = 0
running_reward = None
total_change_in_frames, total_rewards, all_actions = [], [], []

if resume:
    model.load_weights("ModelWeights")

while True:
    if render:
        env.render()
    
    curr_frame = prepro(observation)
    change_in_frame = curr_frame - prev_frame if prev_frame is not None else np.zeros(game_dimensions)
    prev_frame = curr_frame
    total_change_in_frames.append(change_in_frame)
    
    if np.random.random() < eps:
        action = env.action_space.sample()
    else:
        action = model.predict(change_in_frame)
        print(action)
    
    all_actions.append(action)
    
    observation, reward, done, _ = env.step(action) 
    
    total_rewards.append(reward)
    reward_sum += reward
    eps -= 0.001
    
    if done:
        running_reward = reward_sum if running_reward is None else running_reward * 0.99 + reward_sum * 0.01
        print ('resetting env. episode reward total was %f. running mean: %f' % (reward_sum, running_reward))
        reward_sum = 0
        observation = env.reset() # reset env
        prev_frame = None
        episode_number += 1
        
        if episode_number % batch_size == 0:
            
            model.save_weights("ModelWeights")
    
    if reward != 0: # Pong has either +1 or -1 reward exactly when game ends.
        print('ep %d: game finished, reward: %f' % (episode_number, reward) + ('' if reward == -1 else ' !!!!!!!!'))

[[[0.5]]

 [[0.5]]

 [[0.5]]

 ...

 [[0.5]]

 [[0.5]]

 [[0.5]]]


IndexError: arrays used as indices must be of integer (or boolean) type