In [None]:
### Train an RL agent using DQN architecture in a Unity environment (bananaModel)

In [1]:
from unityagents import UnityEnvironment
import numpy as np
import matplotlib.pyplot as plt
from collections import deque, defaultdict
import time
import sys
from tqdm import tqdm
import torch
import torch.nn as nn
import torch.nn.functional as F

In [2]:
env = UnityEnvironment(file_name="Banana.app")

INFO:unityagents:
'Academy' started successfully!
Unity Academy name: Academy
        Number of Brains: 1
        Number of External Brains : 1
        Lesson number : 0
        Reset Parameters :
		
Unity brain name: BananaBrain
        Number of Visual Observations (per agent): 0
        Vector Observation space type: continuous
        Vector Observation space size (per agent): 37
        Number of stacked Vector Observation: 1
        Vector Action space type: discrete
        Vector Action space size (per agent): 4
        Vector Action descriptions: , , , 


In [3]:
# get the default brain
brain_name = env.brain_names[0]
brain = env.brains[brain_name]

### Action space:
- `0` - walk forward 
- `1` - walk backward
- `2` - turn left
- `3` - turn right

### State space:
- `37` - dimensions.
- some samples include the agent's velocity.
- ray-based perception in the forward direction of the agent.

### Reward:

- `+1` - Yellow Banana collected.
- `-1` - Blue Banana collected.

In [13]:
# reset the environment
env_info = env.reset(train_mode=True)[brain_name]

# number of agents in the environment
print('Number of agents:', len(env_info.agents))

# number of actions
action_size = brain.vector_action_space_size
print('Number of actions:', action_size)

# examine the state space 
state = env_info.vector_observations[0]
print('States look like:', state)
state_size = len(state)
print('States have length:', state_size)

Number of agents: 1
Number of actions: 4
States look like: [1.         0.         0.         0.         0.71256876 0.
 0.         1.         0.         0.73509979 0.         0.
 0.         1.         0.         1.         0.         0.
 0.         0.31326672 0.         0.         1.         0.
 0.58369923 0.         0.         0.         1.         0.
 1.         0.         0.         0.         0.23518428 0.
 0.        ]
States have length: 37


In [5]:
env_info = env.reset(train_mode=False)[brain_name] # reset the environment
state = env_info.vector_observations[0]            # get the current state
score = 0                                          # initialize the score
i = 0
while True:
    i+=1
    action = np.random.randint(action_size)        # select an action
    env_info = env.step(action)[brain_name]        # send the action to the environment
    next_state = env_info.vector_observations[0]   # get the next state
    reward = env_info.rewards[0]                   # get the reward
    done = env_info.local_done[0]                  # see if episode has finished
    score += reward                                # update the score
    state = next_state                             # roll over the state to next time step
    if(reward != 0):
        print(reward)
    if done:                                       # exit loop if episode finished
        break
    
print("Score: {}".format(score))

1.0
Score: 1.0


In [6]:
print("iterations:",i)

iterations: 300


In [7]:
# (37x128) -> (128x64) -> (64x32) -> (32x4)
input_features = [state_size, 128, 64, 32]
output_features = [128, 64, 32, action_size]
state = torch.tensor(state)

In [38]:
class DQNetwork(nn.Module):
    """
    Defines the feed forward NN used for the DQN Agent.
    inherits nn.modules class.
    """
    def __init__(self, input_features, output_features, dropout_layers=[0.3, 0.1]):
        """
        Initializes the model.
        ------
        @Param:
        1. input_features: list of input dimensions for the NN.
        2. output_features: list of corresponding output dimensions.
        3. dropout_layers: list of dropout layers; keep_probs value (stochastic) of length < num_layers
        """
        super().__init__()
        self.state_size = input_features[0]#size of observational space
        self.action_size = output_features[-1] #size of action space
        self.FC = []#initialize list of FC layers
        self.Dropout = []#intitialize list of dropout layers
        
        #check to see if input_dim = output_dim
        if(len(input_features) != len(output_features)):
            raise ValueError("lengths do not match. input dimension MUST equal output dimensions")
        
        #check to see if dropout dim = L - 1:
        if(len(dropout_layers) >= len(input_features) - 1):
            raise ValueError("dropout layers dimensions do not match appropriate size")
            
        for input_unit, output_unit in zip(input_features, output_features):
            self.FC.append(nn.Linear(input_unit, output_unit, bias=True))#add Linear layers to the network
        
        #set dropout layers
        for prob in dropout_layers:
            self.Dropout.append(nn.Dropout(prob))#append dropout layers with keep_probs to NN.
            
    def forward(self, state):
        """
        Build a network that performs feed forward for one pass, by mapping input_space, S -> action, A.
        Returns the corresponding model.
        -------
        @param:
        1. state: (array_like) input state.
        @Return:
        - model: the corresponding model
        """
        state = torch.from_numpy(state).float().unsqueeze(0)
        for i in range(len(self.FC)):
            if(i == 0):
                X = F.relu(self.FC[0](state))
            else:
                X = F.relu(self.FC[i](X)) if (i < len(self.FC) - 1) else self.FC[i](X)
            if(i < len(self.Dropout)):
                X = self.Dropout[i](X)
        return X

In [39]:
model = DQNetwork(input_features, output_features)