# Project 1: Navigation

The following code initialized the Unity environment. If a new agent is trained please restart the kernel.

In [1]:
import torch
from unityagents import UnityEnvironment

# create environment

env = UnityEnvironment(file_name='Reacher_20_Windows_x86_64/Reacher.exe', seed=0) # chnage path of environment if needed
brain_name = env.brain_names[0]
brain = env.brains[brain_name]

INFO:unityagents:
'Academy' started successfully!
Unity Academy name: Academy
        Number of Brains: 1
        Number of External Brains : 1
        Lesson number : 0
        Reset Parameters :
		goal_speed -> 1.0
		goal_size -> 5.0
Unity brain name: ReacherBrain
        Number of Visual Observations (per agent): 0
        Vector Observation space type: continuous
        Vector Observation space size (per agent): 33
        Number of stacked Vector Observation: 1
        Vector Action space type: continuous
        Vector Action space size (per agent): 4
        Vector Action descriptions: , , , 


## Trainig

The following code can be used to train the agent. You can change the configuration of the agent as well as which agent and network should be used in the [config.json](config.json) file. This is a list of the configurations:
- **name**: The name of the approach used for saving and loading the agent and its statistics
- **agent**: The name of the agent to use
- **num_episode**: The number of episodes used during training
- **agents**: The configurations for the different agent available

In [2]:
import numpy as np
import json
import os

from importlib import reload
import agents.agents
reload(agents.agents)
from agents.agents import get_agent
import utils.visualizations
reload(utils.visualizations)
import utils.visualizations as vis


# load current config
with open("config.json", "r") as f:
    config = json.load(f)

# create results folder
if not os.path.exists("./data"):
    os.mkdir("./data")
if not os.path.exists(f"./data/{config['name']}"):
    os.mkdir(f"./data/{config['name']}")

# copy current config to result folder
with open(f"./data/{config['name']}/config.json", "w") as f2:
    json.dump(config, f2)

# get env infos
env_info = env.reset(train_mode=True)[brain_name]
action_size = brain.vector_action_space_size
state_size = len(env_info.vector_observations[0])

# create agent
agent = get_agent(config["agent"], config["agents"], state_size, action_size, 0)

# trains the agent and collect statistics
scores = []
losses = []
for i_episode in range(1, config["num_episodes"]+1):
    score = 0

    env_info = env.reset(train_mode=True)[brain_name]
    state = env_info.vector_observations

    # one episode
    while True:
        # select action
        action = agent.act(state)

        # get feddback from environment
        env_info = env.step(action)[brain_name]
        next_state = env_info.vector_observations
        reward = env_info.rewards
        done = env_info.local_done
        loss = agent.step(state, action, reward, next_state, done)
        if loss is not None:
            losses.append(np.mean(loss))

        state = next_state
        score += np.mean(reward)
        if np.any(done):
            # episode ended
            break

    #statistics
    scores.append(score)
    if i_episode < 100:
        print('\rEpisode {}\tAverage Loss: {:.5f}\tAverage Score: {:.5f}'.format(i_episode, np.mean(losses), np.mean(scores)), end="")
    else:
        print('\rEpisode {}\tAverage Loss: {:.5f}\tAverage Score: {:.5f}'.format(i_episode, np.mean(losses[-100:]), np.mean(scores[-100:])), end="")
    if i_episode % 10 == 0:
        if i_episode < 100:
            print('\rEpisode {}\tAverage Loss: {:.5f}\tAverage Score: {:.5f}'.format(i_episode, np.mean(losses), np.mean(scores)))
        else:
            print('\rEpisode {}\tAverage Loss: {:.5f}\tAverage Score: {:.5f}'.format(i_episode, np.mean(losses[-100:]), np.mean(scores[-100:])))

# save agent and statistics
agent.save(config["name"])
np.savez(f"./data/{config['name']}/statistics.npz", scores=scores, losses=losses)

# plot statistics
for show in [False, True]:
    vis.plot_learning_curve([scores], [config["name"]], show=show)
    vis.plot_learning_curve2([losses], [config["name"]], show=show)


Episode 10	Average Loss: -0.37922	Average Score: 0.91280
Episode 20	Average Loss: -0.42165	Average Score: 0.77952
Episode 30	Average Loss: -0.44924	Average Score: 0.57840
Episode 40	Average Loss: -0.46576	Average Score: 0.78965
Episode 50	Average Loss: -0.47739	Average Score: 1.23719


KeyboardInterrupt: 

## Test

The following code can be used to test the saved agent for one episode. You can change which agent to test in the [config.json](config.json) file.

In [12]:
import json

from importlib import reload
import agents.agents
reload(agents.agents)
from agents.agents import get_agent
import utils.visualizations
reload(utils.visualizations)
import utils.visualizations as vis

# load current config
with open("config.json", "r") as f:
    config = json.load(f)
with open(f"./data/{config['name']}/config.json", "r") as f:
    config = json.load(f)

# get env infos
env_info = env.reset(train_mode=True)[brain_name]
action_size = brain.vector_action_space_size
state_size = len(env_info.vector_observations[0])

# create agent and load trained parameters
agent = get_agent(config["agent"], config["agents"], state_size, action_size, 0)
agent.load(config["name"])

# one episode test
env_info = env.reset(train_mode=False)[brain_name]
state = env_info.vector_observations
score = 0
while True:
    action = agent.act(state, test=True)
    env_info = env.step(action)[brain_name]
    next_state = env_info.vector_observations
    reward = env_info.rewards
    done = env_info.local_done
    score += np.mean(reward)
    state = next_state
    if np.any(done):
        break

# statistics
print("Score: {}".format(score))

Score: 28.268999368138566


If you don't need the environment anymore, you can close it.

In [None]:
env.close()

## Comparison

With the following code you can compare the statistics of different approaches. For this, specify the names of the approaches below (variable: "names").

In [2]:
import numpy as np

from importlib import reload
import utils.visualizations
reload(utils.visualizations)
import utils.visualizations as vis

# specify the names of the approaches to compare
names = ["gaussian_ppo", "gaussian_a2c", "ddpg"]

# load statistics
scores = []
avg_q_values = []
avg_target_diffs = []
for name in names:
    data = dict(np.load(f"./data/{name}/statistics.npz"))
    scores.append(data["scores"])

# plot statistics
vis.plot_learning_curve(scores, names)