# Project 1 - Navigation
---
<font color=black> In this notebook, it was implemented a DQN agent with Unity's Banana Collector environment modified by Udacity.</font>

<font color=red> This code is a adaptation from code provided by Deep Reinforcement Learning Nanodegree - Udacity.</font>

### 1. Import the Necessary Packages

In [None]:
from unityagents import UnityEnvironment
import random
import torch
import numpy as np
from collections import deque
import matplotlib.pyplot as plt
%matplotlib inline

### 2. Instantiate the Environment and Agent

Initialize the environment in the code cell below.

In [None]:
env = UnityEnvironment(file_name="Banana_Linux/Banana.x86_64")

In [None]:
# get the default brain
brain_name = env.brain_names[0]
brain = env.brains[brain_name]

Initialize the agent in the code cell below.

In [None]:
env_info = env.reset(train_mode=True)[brain_name]

from navigation_dqn_agent import Agent

state = env_info.vector_observations[0]
state_size = len(state)                         # number of states

action_size = brain.vector_action_space_size    # number of actions

agent = Agent(state_size=state_size, action_size=action_size, seed=0)

### 3. Watch a Smart Agent!

In the next code cell, you will load the trained weights from file to watch a smart agent!

In [None]:
# load the weights from file
agent.qnetwork_local.load_state_dict(torch.load('weights.pth'))

n_episode = 1800
t_max = 1000
results = []
repetitions = 30

for n in range(repetitions):
    print('\n -----------------  Repetition {}   -----------------------\n'.format(n))
    scores = []                        # list containing scores from each episode
    scores_window = deque(maxlen=100)  # last 100 scores
    for i_episode in range(1, n_episode+1):
        env_info = env.reset(train_mode=False)[brain_name] # reset the environment
        state = env_info.vector_observations[0]            # get the current state
        score = 0                                          # initialize the score
        for j in range(t_max):
            action = agent.act(state)                      # select an action
            env_info = env.step(action)[brain_name]        # send the action to the environment
            next_state = env_info.vector_observations[0]   # get the next state
            reward = env_info.rewards[0]                   # get the reward
            done = env_info.local_done[0]                  # see if episode has finished
            score += reward                                # update the score
            state = next_state                             # roll over the state to next time step
            if done:                                       # exit loop if episode finished
                break

        scores.append(score)              # save most recent score
        scores_window.append(score)       # save most recent score
        print('\rEpisode {}\tAverage Score: {:.2f}'.format(i_episode, np.mean(scores_window)), end="")
        if i_episode % 100 == 0:
            print('\rEpisode {}\tAverage Score: {:.2f}'.format(i_episode, np.mean(scores_window)))

        if np.mean(scores_window)>13.0:  # At least +13 as average reward over laster 100 episode
            print('\nEnvironment solved in {:d} episodes!\tAverage Score: {:.2f}'.format(i_episode, np.mean(scores_window)))
            break
            
    results.append((i_episode, np.mean(scores_window)))

print("\nScores: {}".format(results))

print('\n\n Average of episodes to solve the environment: {:.2f}'.format(np.mean(list(zip(*results))[0])))
print('\n Average of rewards: {:.2f}'.format(np.mean(list(zip(*results))[1])))
           

In [None]:
env.close()