# Import necessary Libraries

In [1]:
import gym
import torch
import numpy as np
from ipynb.fs.full.BreakoutPPO import Agent
from utils import plot_learning_curve

# Preprocess Breakout Environment for Faster Training

In [2]:
# Preprocess image(Code from class)
def prepro(image):
    image = image[35:195]  # crop
    image = image[::2, ::2, 0]  # downsample by factor of 2
    image[image == 144] = 0  # erase background (background type 1)
    image[image == 109] = 0  # erase background (background type 2)
    image[image != 0] = 1  # everything else (paddles, ball) just set to 1
    return np.reshape(image, (1, 80, 80))

# Setup Breakout Environment

In [3]:
if __name__ == '__main__':
    # env = gym.make('BreakoutDeterministic-v4', render_mode = 'human')
    env = gym.make("BreakoutDeterministic-v4")
    N = 20
    batch_size = 5
    n_epochs = 4
    alpha = 0.00025 # learning rate / epsilon value I think

    raw_image = env.reset()
    preprocessed_image = prepro(raw_image)  # (1, 80, 80)

    flattened = preprocessed_image.flatten()
    agent = Agent(num_actions=env.action_space.n, batch_size=batch_size,
                  alpha=alpha, num_epochs=n_epochs, input_dims=preprocessed_image.shape)
    n_games = 15  # 45 mins for 100 iterations of training 

    figure_file = 'plots/Breakout_Conv.png'

    best_score = env.reward_range[0]
    score_history = []

    learn_iters = 0
    avg_score = 0
    n_steps = 0

    # Load model
    agent.load_models()

    for i in range(n_games):
        observation = env.reset()
        done = False
        score = 0
        while not done:
            observation = prepro(observation)  # need to preprocess each time
            action, prob, val = agent.choose_action(observation)
            if action == 2 or action == 3:
                print(action)
            observation_, reward, done, info = env.step(action)
            n_steps += 1
            score += reward
            # observation = prepro(observation)
            agent.remember(observation, action, prob, val, reward, done)
            if n_steps % N == 0:  # if true, it's time to perform learning function
                agent.learn()
                learn_iters += 1
            observation = observation_
        score_history.append(score)
        avg_score = np.mean(score_history[-100:])

        if avg_score > best_score:  # if best score found
            best_score = avg_score
            agent.save_models()

        print('episode', i, 'score %.1f' % score, 'avg score %.1f' % avg_score,
              'time_steps', n_steps, 'learning_steps', learn_iters)
    x = [i+1 for i in range(len(score_history))]
    plot_learning_curve(x, score_history, figure_file,
                        "Training Episodes", "Average Scores", "Breakout")


A.L.E: Arcade Learning Environment (version 0.7.4+069f8bd)
[Powered by Stella]
  deprecation(
  deprecation(
  "        value = F.relu(self.conv3(value))\n",


... loading models ...
... saving models ...
episode 0 score 0.0 avg score 0.0 time_steps 181 learning_steps 9
episode 1 score 0.0 avg score 0.0 time_steps 354 learning_steps 17
episode 2 score 0.0 avg score 0.0 time_steps 568 learning_steps 28


# Testing/Rendering Model

In [1]:
    env = gym.make('BreakoutDeterministic-v4', render_mode = 'human')
    N = 20
    batch_size = 5
    n_epochs = 4
    alpha = 0.0003 # learning rate / epsilon value I think

    raw_image = env.reset()
    preprocessed_image = prepro(raw_image)  # (1, 80, 80)

    flattened = preprocessed_image.flatten()
    agent = Agent(num_actions=env.action_space.n, batch_size=5,
                  alpha=0.0003, num_epochs=4, input_dims=preprocessed_image.shape)
    n_games = 1  # 4 mins for 100 iterations of training. Score does not improve though...

    figure_file = 'plots/Breakout_Conv.png' 

    best_score = env.reward_range[0]
    score_history = []

    learn_iters = 0
    avg_score = 0
    n_steps = 0

    # Load model
    agent.load_models()

    for i in range(n_games):
        observation = env.reset()
        done = False
        score = 0
        while not done:
            observation = prepro(observation)  # need to preprocess each time
            action, prob, val = agent.choose_action(observation)
            if action != 1:
                print(action)
            observation_, reward, done, info = env.step(action)
            n_steps += 1
            score += reward
            agent.remember(observation, action, prob, val, reward, done)
            if n_steps % N == 0:  # if true, it's time to perform learning function
                agent.learn()
                learn_iters += 1
            observation = observation_
        score_history.append(score)
        avg_score = np.mean(score_history[-100:])

        if avg_score > best_score:  # if best score found
            best_score = avg_score
            agent.save_models()

        print('episode', i, 'score %.1f' % score, 'avg score %.1f' % avg_score,
              'time_steps', n_steps, 'learning_steps', learn_iters)

    env.close()

NameError: name 'gym' is not defined