In [1]:
from collections import deque
import numpy as np
import pandas as pd
import plotly.express as px
import torch
from unityagents import UnityEnvironment
from ddpg_agent import MultiAgent
import json

### Initialize the environment

In [2]:
env = UnityEnvironment(file_name='../Tennis.app')
brain_name = env.brain_names[0]
brain = env.brains[brain_name]

# reset the environment
env_info = env.reset(train_mode=True)[brain_name]
# number of agents in the environment
print('Number of agents:', len(env_info.agents))
# number of actions
action_size = brain.vector_action_space_size
print('Number of actions:', action_size)
# examine the state space 
state = env_info.vector_observations[0]
print('States look like:', state)
state_size = len(state)
print('States have length:', state_size)

INFO:unityagents:
'Academy' started successfully!
Unity Academy name: Academy
        Number of Brains: 1
        Number of External Brains : 1
        Lesson number : 0
        Reset Parameters :
		
Unity brain name: TennisBrain
        Number of Visual Observations (per agent): 0
        Vector Observation space type: continuous
        Vector Observation space size (per agent): 8
        Number of stacked Vector Observation: 3
        Vector Action space type: continuous
        Vector Action space size (per agent): 2
        Vector Action descriptions: , 


Number of agents: 2
Number of actions: 2
States look like: [ 0.          0.          0.          0.          0.          0.
  0.          0.          0.          0.          0.          0.
  0.          0.          0.          0.         -6.65278625 -1.5
 -0.          0.          6.83172083  6.         -0.          0.        ]
States have length: 24


#### Functions for training

We have two helper functions to run the training: 
1. run an episode in our environment, for a given agent
    - this can be also used to watch a completely random agent by setting `epsilon=1`
    - the epsilon value will control how much randomness the agent incorporates into its action selection
2. run through some number of episodes and train the agent while doing so
    - to run the function multiple times and continue training the agent where it left off, pass the returned `next_eps` of one iteration as the `start_eps` of the following one

In [3]:
def run_episode(agent, in_training=False, add_noise=False):
    """Run through an episode with a given agent.
    Use `in_training=True` to have the agent learn during the run."""
    env_info = env.reset(train_mode=in_training)[brain_name]
    states = env_info.vector_observations
    scores = np.zeros(2)
    while True:
        actions = agent.act(states, add_noise)
        env_info = env.step(actions)[brain_name]
        next_states = env_info.vector_observations
        rewards = env_info.rewards
        all_done = env_info.local_done
        if in_training:
            agent.step(states, actions, rewards, next_states, all_done)
        scores += rewards
        states = next_states
        if any(all_done):
            break
    return scores


def train_agent(agent, n_episodes=1, threshold=0.5):
    """Run a large number of episodes to train the agent and improve its average score.
    The current value of 
    """
    scores = []
    scores_window = deque(maxlen=100)
    for i_episode in range(1, n_episodes + 1):
        i_score = run_episode(agent, in_training=True)
        scores.append(max(i_score))
        scores_window.append(i_score)
        if i_episode % 1 == 0:
            print(f"Episode {i_episode}\tAverage Score: {np.mean(scores_window)}")
        if np.mean(scores_window) > threshold:
            print('\nEnvironment solved in {:d} episodes!\tAverage Score: {:.2f}'.format(i_episode, np.mean(scores_window)))
            for ix, ddpg_agent in enumerate(multi_ddpg_agent.agents, 1):
                torch.save(ddpg_agent.actor_local.state_dict(), f'agent_{ix}_first_score_actor_model.pth')
                torch.save(ddpg_agent.critic_local.state_dict(), f'agent_{ix}_first_score_critic_model.pth')
            break
    return scores

#### Create Agent

Create an multi-agent learner using the DDPG algorithm, passing the `state_size` and `action_size` of our environment

In [4]:
multi_ddpg_agent = MultiAgent(state_size=24, action_size=2, num_agents=2, random_seed=0)

In [5]:
LOAD_AGENTS = False
if LOAD_AGENTS:
    for ix, ddpg_agent in enumerate(multi_ddpg_agent.agents, 1):
        if torch.cuda.is_available():
            ddpg_agent.actor_local.load_state_dict(torch.load(f'agent_{ix}_first_score_actor_model.pth'))
            ddpg_agent.critic_local.load_state_dict(torch.load(f'agent_{ix}_first_score_critic_model.pth'))
            ddpg_agent.cuda() 
        else:
            ddpg_agent.actor_local.load_state_dict(torch.load(f'agent_{ix}_first_score_actor_model.pth',map_location={'cuda:0': 'cpu'}))
            ddpg_agent.critic_local.load_state_dict(torch.load(f'agent_{ix}_first_score_critic_model.pth',map_location={'cuda:0': 'cpu'}))

#### Watch an episode

In [6]:
scores = run_episode(multi_ddpg_agent)
print(f"Score: {max(scores)}")

Score: 0.0


It returns a bad score as expected.

#### Train the ddpg agents

Train the agent for 10000 episodes or until it has reached the minimum threshold considered to have solved this environment.

In [7]:
scores = train_agent(multi_ddpg_agent, n_episodes=10000)

Episode 1	Average Score: -0.004999999888241291
Episode 2	Average Score: -0.004999999888241291
Episode 3	Average Score: -0.004999999888241291
Episode 4	Average Score: -0.004999999888241291
Episode 5	Average Score: -0.004999999888241291
Episode 6	Average Score: -0.004999999888241291
Episode 7	Average Score: -0.004999999888241291
Episode 8	Average Score: -0.004999999888241291
Episode 9	Average Score: -0.004999999888241291
Episode 10	Average Score: -0.004999999888241291
Episode 11	Average Score: -0.004999999888241291
Episode 12	Average Score: -0.004999999888241291
Episode 13	Average Score: -0.004999999888241291
Episode 14	Average Score: 0.005714285985699722
Episode 15	Average Score: 0.005000000260770321
Episode 16	Average Score: 0.007500000298023224
Episode 17	Average Score: 0.006764706169419429
Episode 18	Average Score: 0.006111111388438278
Episode 19	Average Score: 0.005526316058086722
Episode 20	Average Score: 0.005000000260770321
Episode 21	Average Score: 0.004523809777484054
Episode 2

Episode 174	Average Score: 0.011500000357627868
Episode 175	Average Score: 0.011500000357627868
Episode 176	Average Score: 0.011500000357627868
Episode 177	Average Score: 0.011500000357627868
Episode 178	Average Score: 0.012000000365078449
Episode 179	Average Score: 0.012000000365078449
Episode 180	Average Score: 0.012000000365078449
Episode 181	Average Score: 0.012000000365078449
Episode 182	Average Score: 0.012000000365078449
Episode 183	Average Score: 0.012000000365078449
Episode 184	Average Score: 0.012000000365078449
Episode 185	Average Score: 0.01250000037252903
Episode 186	Average Score: 0.01250000037252903
Episode 187	Average Score: 0.01250000037252903
Episode 188	Average Score: 0.01250000037252903
Episode 189	Average Score: 0.01250000037252903
Episode 190	Average Score: 0.01250000037252903
Episode 191	Average Score: 0.01300000037997961
Episode 192	Average Score: 0.01350000038743019
Episode 193	Average Score: 0.01350000038743019
Episode 194	Average Score: 0.01350000038743019
Ep

Episode 346	Average Score: 0.02000000048428774
Episode 347	Average Score: 0.019500000476837157
Episode 348	Average Score: 0.019500000476837157
Episode 349	Average Score: 0.019000000469386578
Episode 350	Average Score: 0.019000000469386578
Episode 351	Average Score: 0.019000000469386578
Episode 352	Average Score: 0.018500000461935995
Episode 353	Average Score: 0.018500000461935995
Episode 354	Average Score: 0.018500000461935995
Episode 355	Average Score: 0.018500000461935995
Episode 356	Average Score: 0.018000000454485417
Episode 357	Average Score: 0.018000000454485417
Episode 358	Average Score: 0.018500000461935995
Episode 359	Average Score: 0.018500000461935995
Episode 360	Average Score: 0.018000000454485417
Episode 361	Average Score: 0.018000000454485417
Episode 362	Average Score: 0.017500000447034834
Episode 363	Average Score: 0.017500000447034834
Episode 364	Average Score: 0.018000000454485417
Episode 365	Average Score: 0.017500000447034834
Episode 366	Average Score: 0.017000000439

Episode 518	Average Score: 0.013950000451877713
Episode 519	Average Score: 0.013950000451877713
Episode 520	Average Score: 0.013450000444427132
Episode 521	Average Score: 0.013450000444427132
Episode 522	Average Score: 0.01340000044554472
Episode 523	Average Score: 0.0139000004529953
Episode 524	Average Score: 0.012400000430643559
Episode 525	Average Score: 0.011900000423192978
Episode 526	Average Score: 0.012900000438094139
Episode 527	Average Score: 0.012900000438094139
Episode 528	Average Score: 0.012400000421330333
Episode 529	Average Score: 0.011900000413879753
Episode 530	Average Score: 0.011950000412762165
Episode 531	Average Score: 0.011450000405311585
Episode 532	Average Score: 0.010950000397861004
Episode 533	Average Score: 0.010450000390410424
Episode 534	Average Score: 0.010450000390410424
Episode 535	Average Score: 0.009950000382959843
Episode 536	Average Score: 0.009450000366196037
Episode 537	Average Score: 0.009950000373646618
Episode 538	Average Score: 0.00995000037364

Episode 691	Average Score: 0.02945000066421926
Episode 692	Average Score: 0.03045000067912042
Episode 693	Average Score: 0.03045000067912042
Episode 694	Average Score: 0.030950000686571003
Episode 695	Average Score: 0.029950000671669842
Episode 696	Average Score: 0.029950000671669842
Episode 697	Average Score: 0.0284500006493181
Episode 698	Average Score: 0.027450000634416938
Episode 699	Average Score: 0.026450000619515777
Episode 700	Average Score: 0.026450000628829004
Episode 701	Average Score: 0.02545000061392784
Episode 702	Average Score: 0.02495000060647726
Episode 703	Average Score: 0.02495000060647726
Episode 704	Average Score: 0.02495000060647726
Episode 705	Average Score: 0.0239500005915761
Episode 706	Average Score: 0.023450000584125517
Episode 707	Average Score: 0.023450000584125517
Episode 708	Average Score: 0.023450000584125517
Episode 709	Average Score: 0.022450000569224356
Episode 710	Average Score: 0.022450000569224356
Episode 711	Average Score: 0.023450000584125517
Epi

Episode 864	Average Score: 0.04345000099390745
Episode 865	Average Score: 0.04445000100880861
Episode 866	Average Score: 0.043950001001358034
Episode 867	Average Score: 0.043950001001358034
Episode 868	Average Score: 0.04445000101812184
Episode 869	Average Score: 0.04395000101998448
Episode 870	Average Score: 0.04445000101812184
Episode 871	Average Score: 0.044450001027435064
Episode 872	Average Score: 0.044950001034885646
Episode 873	Average Score: 0.044950001034885646
Episode 874	Average Score: 0.044450001027435064
Episode 875	Average Score: 0.044950001034885646
Episode 876	Average Score: 0.045950001049786804
Episode 877	Average Score: 0.04545000105164945
Episode 878	Average Score: 0.04495000104419887
Episode 879	Average Score: 0.04545000105164945
Episode 880	Average Score: 0.04495000104419887
Episode 881	Average Score: 0.04495000104419887
Episode 882	Average Score: 0.04495000104419887
Episode 883	Average Score: 0.04495000104419887
Episode 884	Average Score: 0.044950001053512094
Epis

Episode 1036	Average Score: 0.03550000073388219
Episode 1037	Average Score: 0.03650000074878335
Episode 1038	Average Score: 0.03650000073947012
Episode 1039	Average Score: 0.03700000075623393
Episode 1040	Average Score: 0.03750000076368451
Episode 1041	Average Score: 0.038500000778585675
Episode 1042	Average Score: 0.038500000769272444
Episode 1043	Average Score: 0.038500000769272444
Episode 1044	Average Score: 0.03950000078417361
Episode 1045	Average Score: 0.040500000799074766
Episode 1046	Average Score: 0.04100000080652535
Episode 1047	Average Score: 0.04100000080652535
Episode 1048	Average Score: 0.04150000081397593
Episode 1049	Average Score: 0.04100000080652535
Episode 1050	Average Score: 0.04100000080652535
Episode 1051	Average Score: 0.04100000080652535
Episode 1052	Average Score: 0.04150000081397593
Episode 1053	Average Score: 0.04250000082887709
Episode 1054	Average Score: 0.04350000084377825
Episode 1055	Average Score: 0.044000000851228835
Episode 1056	Average Score: 0.04500

Episode 1207	Average Score: 0.05695000105537474
Episode 1208	Average Score: 0.05695000105537474
Episode 1209	Average Score: 0.05745000106282532
Episode 1210	Average Score: 0.058450001077726486
Episode 1211	Average Score: 0.05895000108517706
Episode 1212	Average Score: 0.05895000108517706
Episode 1213	Average Score: 0.05895000108517706
Episode 1214	Average Score: 0.057950001070275904
Episode 1215	Average Score: 0.057950001070275904
Episode 1216	Average Score: 0.057950001070275904
Episode 1217	Average Score: 0.058450001077726486
Episode 1218	Average Score: 0.058450001077726486
Episode 1219	Average Score: 0.058450001077726486
Episode 1220	Average Score: 0.057950001070275904
Episode 1221	Average Score: 0.057950001070275904
Episode 1222	Average Score: 0.05895000108517706
Episode 1223	Average Score: 0.05895000108517706
Episode 1224	Average Score: 0.059450001092627644
Episode 1225	Average Score: 0.05895000108517706
Episode 1226	Average Score: 0.059450001092627644
Episode 1227	Average Score: 0

Episode 1378	Average Score: 0.0655000011716038
Episode 1379	Average Score: 0.0655000011716038
Episode 1380	Average Score: 0.0655000011716038
Episode 1381	Average Score: 0.06600000117905439
Episode 1382	Average Score: 0.06600000117905439
Episode 1383	Average Score: 0.06600000117905439
Episode 1384	Average Score: 0.06650000118650495
Episode 1385	Average Score: 0.06650000118650495
Episode 1386	Average Score: 0.0655000011716038
Episode 1387	Average Score: 0.06600000117905439
Episode 1388	Average Score: 0.06650000118650495
Episode 1389	Average Score: 0.06600000117905439
Episode 1390	Average Score: 0.0655000011716038
Episode 1391	Average Score: 0.06600000117905439
Episode 1392	Average Score: 0.06500000116415322
Episode 1393	Average Score: 0.0655000011716038
Episode 1394	Average Score: 0.06450000115670264
Episode 1395	Average Score: 0.06500000116415322
Episode 1396	Average Score: 0.06450000115670264
Episode 1397	Average Score: 0.06500000116415322
Episode 1398	Average Score: 0.0655000011716038

Episode 1549	Average Score: 0.07900000136345625
Episode 1550	Average Score: 0.08000000137835742
Episode 1551	Average Score: 0.08050000138580798
Episode 1552	Average Score: 0.08100000139325857
Episode 1553	Average Score: 0.08050000138580798
Episode 1554	Average Score: 0.08050000138580798
Episode 1555	Average Score: 0.08100000139325857
Episode 1556	Average Score: 0.08050000138580798
Episode 1557	Average Score: 0.08100000139325857
Episode 1558	Average Score: 0.08100000139325857
Episode 1559	Average Score: 0.08100000139325857
Episode 1560	Average Score: 0.08100000139325857
Episode 1561	Average Score: 0.08050000138580798
Episode 1562	Average Score: 0.08050000138580798
Episode 1563	Average Score: 0.08050000138580798
Episode 1564	Average Score: 0.07900000136345625
Episode 1565	Average Score: 0.07950000137090683
Episode 1566	Average Score: 0.07950000137090683
Episode 1567	Average Score: 0.07950000137090683
Episode 1568	Average Score: 0.07950000137090683
Episode 1569	Average Score: 0.0800000013

Episode 1721	Average Score: 0.07890000136569142
Episode 1722	Average Score: 0.07890000136569142
Episode 1723	Average Score: 0.07890000137500465
Episode 1724	Average Score: 0.07890000137500465
Episode 1725	Average Score: 0.07890000137500465
Episode 1726	Average Score: 0.07990000138990581
Episode 1727	Average Score: 0.07990000138990581
Episode 1728	Average Score: 0.07990000138990581
Episode 1729	Average Score: 0.07890000137500465
Episode 1730	Average Score: 0.07990000138990581
Episode 1731	Average Score: 0.0804000013973564
Episode 1732	Average Score: 0.07990000138990581
Episode 1733	Average Score: 0.07990000138990581
Episode 1734	Average Score: 0.0804000013973564
Episode 1735	Average Score: 0.08090000140480696
Episode 1736	Average Score: 0.08140000141225755
Episode 1737	Average Score: 0.08390000144951046
Episode 1738	Average Score: 0.08440000145696104
Episode 1739	Average Score: 0.08390000144951046
Episode 1740	Average Score: 0.08390000144951046
Episode 1741	Average Score: 0.084400001456

Episode 1893	Average Score: 0.11550000192597508
Episode 1894	Average Score: 0.1150000019185245
Episode 1895	Average Score: 0.1150000019185245
Episode 1896	Average Score: 0.11450000192038715
Episode 1897	Average Score: 0.11500000192783773
Episode 1898	Average Score: 0.11850000197999179
Episode 1899	Average Score: 0.11900000198744237
Episode 1900	Average Score: 0.11900000198744237
Episode 1901	Average Score: 0.12000000200234354
Episode 1902	Average Score: 0.12050000200979412
Episode 1903	Average Score: 0.12150000202469528
Episode 1904	Average Score: 0.12200000204145908
Episode 1905	Average Score: 0.12050000201910734
Episode 1906	Average Score: 0.12050000201910734
Episode 1907	Average Score: 0.12050000201910734
Episode 1908	Average Score: 0.12000000201165677
Episode 1909	Average Score: 0.12050000201910734
Episode 1910	Average Score: 0.12100000202655792
Episode 1911	Average Score: 0.1215000020340085
Episode 1912	Average Score: 0.1215000020340085
Episode 1913	Average Score: 0.12250000204890

Episode 2065	Average Score: 0.12900000211782753
Episode 2066	Average Score: 0.12950000212527812
Episode 2067	Average Score: 0.13100000214762986
Episode 2068	Average Score: 0.13150000215508043
Episode 2069	Average Score: 0.13200000216253102
Episode 2070	Average Score: 0.13150000215508043
Episode 2071	Average Score: 0.13300000217743219
Episode 2072	Average Score: 0.13350000218488275
Episode 2073	Average Score: 0.13400000219233335
Episode 2074	Average Score: 0.13350000218488275
Episode 2075	Average Score: 0.13450000219978392
Episode 2076	Average Score: 0.1345000021904707
Episode 2077	Average Score: 0.13350000217556954
Episode 2078	Average Score: 0.13300000216811894
Episode 2079	Average Score: 0.1345000021904707
Episode 2080	Average Score: 0.13600000221282244
Episode 2081	Average Score: 0.13800000224262476
Episode 2082	Average Score: 0.13800000224262476
Episode 2083	Average Score: 0.13800000224262476
Episode 2084	Average Score: 0.13850000225007533
Episode 2085	Average Score: 0.140000002272

Episode 2237	Average Score: 0.22200000355020166
Episode 2238	Average Score: 0.2235000035725534
Episode 2239	Average Score: 0.2205000035278499
Episode 2240	Average Score: 0.21900000350549817
Episode 2241	Average Score: 0.21950000351294874
Episode 2242	Average Score: 0.21900000349618495
Episode 2243	Average Score: 0.21950000350363552
Episode 2244	Average Score: 0.22200000354088842
Episode 2245	Average Score: 0.22400000357069075
Episode 2246	Average Score: 0.22200000354088842
Episode 2247	Average Score: 0.22450000357814134
Episode 2248	Average Score: 0.22300000355578958
Episode 2249	Average Score: 0.22300000355578958
Episode 2250	Average Score: 0.22100000352598725
Episode 2251	Average Score: 0.22300000355578958
Episode 2252	Average Score: 0.22150000353343785
Episode 2253	Average Score: 0.22250000353902577
Episode 2254	Average Score: 0.2220000035315752
Episode 2255	Average Score: 0.22150000352412463
Episode 2256	Average Score: 0.2220000035315752
Episode 2257	Average Score: 0.22350000355392

Episode 2409	Average Score: 0.1635000026319176
Episode 2410	Average Score: 0.16250000261701644
Episode 2411	Average Score: 0.16250000261701644
Episode 2412	Average Score: 0.16250000261701644
Episode 2413	Average Score: 0.1655000026617199
Episode 2414	Average Score: 0.16200000260956585
Episode 2415	Average Score: 0.16150000260211528
Episode 2416	Average Score: 0.16200000260956585
Episode 2417	Average Score: 0.16100000259466468
Episode 2418	Average Score: 0.16500000265426934
Episode 2419	Average Score: 0.16500000265426934
Episode 2420	Average Score: 0.17450000279583036
Episode 2421	Average Score: 0.17700000283308326
Episode 2422	Average Score: 0.17850000285543502
Episode 2423	Average Score: 0.18050000288523735
Episode 2424	Average Score: 0.17800000284798442
Episode 2425	Average Score: 0.17850000285543502
Episode 2426	Average Score: 0.17850000285543502
Episode 2427	Average Score: 0.18050000288523735
Episode 2428	Average Score: 0.17750000284053385
Episode 2429	Average Score: 0.177000002833

Episode 2581	Average Score: 0.23500000369735063
Episode 2582	Average Score: 0.23650000371970237
Episode 2583	Average Score: 0.240500003779307
Episode 2584	Average Score: 0.24150000379420816
Episode 2585	Average Score: 0.23700000372715294
Episode 2586	Average Score: 0.23700000372715294
Episode 2587	Average Score: 0.23700000372715294
Episode 2588	Average Score: 0.2330000036675483
Episode 2589	Average Score: 0.23450000368990004
Episode 2590	Average Score: 0.23400000368244947
Episode 2591	Average Score: 0.23600000371225177
Episode 2592	Average Score: 0.23650000371970237
Episode 2593	Average Score: 0.23600000371225177
Episode 2594	Average Score: 0.2355000037048012
Episode 2595	Average Score: 0.2380000037420541
Episode 2596	Average Score: 0.23900000375695526
Episode 2597	Average Score: 0.2385000037495047
Episode 2598	Average Score: 0.24000000377185643
Episode 2599	Average Score: 0.24450000383891166
Episode 2600	Average Score: 0.24750000388361515
Episode 2601	Average Score: 0.2425000038091093

#### Plot the Scores

Environment solved in 2618 episodes!	Average Score: .52

In [16]:
env.close()