In [None]:
import pickle as pickle
import numpy as np
model = pickle.load(open('model.pkl','rb'))
#print(model)

In [None]:
def sgm(x):
    y = 1.0/(1.0+np.exp(-x))
    return(y)

def dsgm(x):
    return(x*(1-x))

In [None]:
def pre_process_images(input_observation, prev_processed_observation, input_dimensions):
    """ Here we want to convert the image of 210x160x3 array into a 6400 float vector """
    processed_observation = input_observation[35:195] # crop
    processed_observation = down_sample(processed_observation) # Downsample function below below
    processed_observation = remove_color(processed_observation) # Remove color function implemented below
    processed_observation = remove_background(processed_observation) # Remove background function implemented below
    processed_observation[processed_observation != 0] = 1 # everything else (paddles, ball) just set to 1
    """Now we convert the 80X80 array (matrix) image into 1600X1 matrix"""
    processed_observation = processed_observation.astype(np.float).ravel()

    """Here we process the difference in succesive frames. Subtract the previous image frame from the 
    current frame """
    if prev_processed_observation is not None:
        input_observation = processed_observation - prev_processed_observation
    else:
        input_observation = np.zeros(input_dimensions)
    """ Save the previous image frame """
    prev_processed_observations = processed_observation
    return(input_observation, prev_processed_observation)

def down_sample(image):
    return image[::2, ::2, :]

def remove_color(image):
    return image[:, :, 0]

def remove_background(image):
    image[image == 144] = 0
    image[image == 109] = 0
    return image

In [None]:
def feed_forward_nets(model, image_matrix):
    """ Computing the hidden layer values and the output layer values"""
    hidden_layer = np.dot(model['layer_1']['weight'], image_matrix)+model['layer_1']['bias']
    hidden_layer = sgm(hidden_layer)
    output_layer = np.dot(hidden_layer, model['layer_2']['weight'])+model['layer_2']['bias']
    output_layer = sgm(output_layer)
    return(hidden_layer, output_layer)

In [None]:
def choose_action(probability):
    random_value = np.random.uniform()
    if random_value < probability:
        return 2      # the AI agent will move up in the openai gym
    else:
        return 3      # The AI agent will move down in the openai gym

In [None]:
import gym
env = gym.make("Pong-v0")

In [None]:
prev_process_images = None
input_dimensions = 80 * 80
image = env.reset()
while True:
    env.render()
    process_images, prev_process_images = pre_process_images(image, prev_process_images, input_dimensions)
    hidden_layers, probability = feed_forward_nets(model,process_images)
    action = choose_action(probability)

    """Action for up or down"""
    image, reward, done, info = env.step(action)
    #image = env.reset()
    if done:
        image=env.reset()
        prev_process_images = None