In [1]:
import gym
import random
import torch
import numpy as np
from collections import deque
import matplotlib.pyplot as plt
%matplotlib inline

from ddpg_agent import Agent

In [3]:
env = gym.make('BipedalWalker-v3')
env.seed(10)
agent = Agent(state_size=env.observation_space.shape[0], action_size=env.action_space.shape[0], random_seed=10)



In [None]:
def ddpg(n_episodes=2000, max_t=700):
    scores_deque = deque(maxlen=100)
    scores = []
    max_score = -np.Inf
    for i_episode in range(1, n_episodes+1):
        state = env.reset()
        agent.reset()
        score = 0
        for t in range(max_t):
            action = agent.act(state)
            next_state, reward, done, _ = env.step(action)
            agent.step(state, action, reward, next_state, done)
            state = next_state
            score += reward
            if done:
                break 
        scores_deque.append(score)
        scores.append(score)
        print('\rEpisode {}\tAverage Score: {:.2f}\tScore: {:.2f}'.format(i_episode, np.mean(scores_deque), score), end="")
        if i_episode % 100 == 0:
            torch.save(agent.actor_local.state_dict(), 'checkpoint_actor.pth')
            torch.save(agent.critic_local.state_dict(), 'checkpoint_critic.pth')
            print('\rEpisode {}\tAverage Score: {:.2f}'.format(i_episode, np.mean(scores_deque)))   
    return scores

scores = ddpg()

fig = plt.figure()
ax = fig.add_subplot(111)
plt.plot(np.arange(1, len(scores)+1), scores)
plt.ylabel('Score')
plt.xlabel('Episode #')
plt.show()



Episode 100	Average Score: -64.49	Score: -40.53
Episode 200	Average Score: -60.86	Score: -49.995
Episode 300	Average Score: -49.53	Score: -97.745
Episode 400	Average Score: -96.36	Score: -96.795
Episode 500	Average Score: -97.04	Score: -96.98
Episode 600	Average Score: -95.93	Score: -95.24
Episode 700	Average Score: -94.87	Score: -94.78
Episode 800	Average Score: -95.59	Score: -95.80
Episode 900	Average Score: -96.55	Score: -95.327
Episode 1000	Average Score: -97.39	Score: -96.46
Episode 1100	Average Score: -96.20	Score: -96.058
Episode 1200	Average Score: -96.41	Score: -96.042
Episode 1300	Average Score: -99.11	Score: -121.30
Episode 1400	Average Score: -95.99	Score: -95.9859
Episode 1500	Average Score: -96.76	Score: -96.046
Episode 1600	Average Score: -97.98	Score: -96.439
Episode 1700	Average Score: -98.46	Score: -96.130
Episode 1800	Average Score: -96.25	Score: -95.401
Episode 1900	Average Score: -92.32	Score: -96.269
Episode 1987	Average Score: -87.83	Score: -98.829

In [4]:
agent.actor_local.load_state_dict(torch.load('checkpoint_actor.pth'))
agent.critic_local.load_state_dict(torch.load('checkpoint_critic.pth'))

state = env.reset()
agent.reset()   
while True:
    action = agent.act(state)
    env.render()
    next_state, reward, done, _ = env.step(action)
    state = next_state
    if done:
        break
        
env.close()