# Collaboration and Competition

In this notebook, we're training two agents to play Tennis: control rackets to bounce a ball over a net

### 1. Packages

Let's import the necessary packages

In [1]:
from unityagents import UnityEnvironment
import numpy as np
import torch
import torch.nn as nn
import time
from ddpg_agent import Agent
from collections import deque
import matplotlib.pyplot as plt
%matplotlib inline

### 2. Start the environment

As we will start the environment please change the `file_name` parameter to match the location of the Unity environment that you downloaded.

In [2]:
env = UnityEnvironment(file_name='Tennis_Linux/Tennis.x86_64')
# get the default brain
brain_name = env.brain_names[0]
brain = env.brains[brain_name]

# reset the environment
env_info = env.reset(train_mode=True)[brain_name]

# number of agents in the environment
print('Number of agents:', len(env_info.agents))
num_agents = len(env_info.agents)

# number of actions
action_size = brain.vector_action_space_size
print('Number of actions:', action_size)

# examine the state space 
state = env_info.vector_observations[0]
print('States look like:', state)
state_size = len(state)
print('States have length:', state_size)

agent = Agent(state_size=state_size, action_size=action_size, num_agents= len(env_info.agents), random_seed=0)
print(agent.actor_local)
print(agent.critic_local)

INFO:unityagents:
'Academy' started successfully!
Unity Academy name: Academy
        Number of Brains: 1
        Number of External Brains : 1
        Lesson number : 0
        Reset Parameters :
		
Unity brain name: TennisBrain
        Number of Visual Observations (per agent): 0
        Vector Observation space type: continuous
        Vector Observation space size (per agent): 8
        Number of stacked Vector Observation: 3
        Vector Action space type: continuous
        Vector Action space size (per agent): 2
        Vector Action descriptions: , 


Number of agents: 2
Number of actions: 2
States look like: [ 0.          0.          0.          0.          0.          0.
  0.          0.          0.          0.          0.          0.
  0.          0.          0.          0.         -6.65278625 -1.5
 -0.          0.          6.83172083  6.         -0.          0.        ]
States have length: 24
Actor(
  (fc1): Linear(in_features=24, out_features=256, bias=True)
  (fc2): Linear(in_features=256, out_features=128, bias=True)
  (fc3): Linear(in_features=128, out_features=2, bias=True)
)
Critic(
  (fcs1): Linear(in_features=24, out_features=256, bias=True)
  (fc2): Linear(in_features=258, out_features=128, bias=True)
  (fc3): Linear(in_features=128, out_features=1, bias=True)
)


### 3. Train the agent using DDPG:

In [3]:
def ddpg(n_episodes=5000,train=True):
    scores_deque = deque(maxlen=100)
    scores = []
    avg_score_list=[]
    max_score = 0
    
    for i_episode in range(1, n_episodes+1):
        env_info = env.reset(train_mode=train)[brain_name]      
        states = env_info.vector_observations
        agent.reset()
        score = np.zeros(num_agents)
        
        while True:
            actions = agent.act(states)                        
            env_info = env.step(actions)[brain_name]           
            next_states = env_info.vector_observations         
            rewards = env_info.rewards                         
            dones = env_info.local_done                        
            agent.step(states, actions, rewards, next_states, dones)
            score += rewards                                   
            states = next_states                               
            if np.any(dones):                                  
                break

        scores_deque.append(np.mean(score))
        scores.append(np.mean(score))
        avg_score = np.mean(scores_deque)
        avg_score_list.append(avg_score)
        
        print('\rEpisode {}\tAverage Score: {:.3f}\tScore: {:.3f}'.format(i_episode,avg_score, np.mean(score)), end="")
        
        if i_episode % 100 == 0 or avg_score >0.5:
            print('\rEpisode {}\tAverage Score: {:.3f}'.format(i_episode,avg_score))
            
            torch.save(agent.actor_local.state_dict(), 'checkpoint_actor.pth')
            torch.save(agent.critic_local.state_dict(), 'checkpoint_critic.pth')
            
            if avg_score >0.5:
                print('\Environment solved in {:d} episodes!'.format(i_episode))
                break
    return scores, avg_score_list

In [4]:
scores, avg_score_list = ddpg()

Episode 100	Average Score: -0.003	Score: -0.005
Episode 200	Average Score: -0.003	Score: -0.005
Episode 300	Average Score: 0.001	Score: -0.0055
Episode 400	Average Score: 0.004	Score: -0.005
Episode 500	Average Score: 0.011	Score: -0.005
Episode 600	Average Score: 0.012	Score: 0.0455
Episode 700	Average Score: -0.002	Score: -0.005
Episode 800	Average Score: 0.006	Score: -0.0055
Episode 900	Average Score: 0.044	Score: 0.0455
Episode 1000	Average Score: 0.063	Score: 0.045
Episode 1100	Average Score: 0.063	Score: -0.005
Episode 1200	Average Score: 0.106	Score: 0.0955
Episode 1300	Average Score: 0.115	Score: 0.1455
Episode 1400	Average Score: 0.153	Score: 0.0455
Episode 1500	Average Score: 0.123	Score: 0.2455
Episode 1600	Average Score: 0.174	Score: 0.0955
Episode 1700	Average Score: 0.212	Score: 0.0955
Episode 1800	Average Score: 0.361	Score: 0.1955
Episode 1880	Average Score: 0.502	Score: 0.3955
\Environment solved in 1880 episodes!


### 4. Close the environment

In [None]:
env.close()

### 5. Plot the average scores over episodes

In [None]:
# plot the scores
fig = plt.figure()
ax = fig.add_subplot(111)
plt.plot(np.arange(1, len(scores)+1), scores)
plt.plot(np.arange(1, len(avg_score_list)+1), avg_score_list)
plt.ylabel('Score')
plt.xlabel('Episode #')
plt.savefig('rewards_per_episode.png')
plt.show()