In [1]:
import gym
import numpy as np
from ddpg_agent import Agent
import time
import matplotlib.pyplot as plt

In [2]:
if __name__ =="__main__":
    env = gym.make("Pendulum-v0")
    agent = Agent(input_dims=(env.observation_space.shape), n_actions=env.action_space.shape[0], alpha=0.001, beta=0.002,
                  env=env, gamma=0.99, mem_size=50000, tau=0.005, fc1_dims=400, fc2_dims=300, batch_size=64)
    n_games = 2001

    best_score = env.reward_range[0]
    score_history = []

    for i in range(n_games):
        observation = env.reset()
        done = False
        score = 0
        while not done:
            action = agent.choose_action(observation)
            observation_, reward, done, info = env.step(action)
            score += reward
            agent.store_transition(observation, action, reward,
                                   observation_, done)
            agent.learn()
            observation = observation_

        score_history.append(score)
        avg_score = np.mean(score_history[-100:])

        if avg_score > best_score:
            best_score = avg_score
        
        if 300 <= i and score_history[-2] < score_history[-1]:
            agent.save_models()

        print('episode {} score {:.1f} avg score {:.1f}'.
              format(i, score, avg_score))

  state = T.tensor([observation], dtype=T.float)
  states = T.tensor(states, dtype=T.float)
  states_ = T.tensor(states_, dtype=T.float)
  actions = T.tensor(actions, dtype=T.float)
  rewards = T.tensor(rewards, dtype=T.float)
  done = T.tensor(done)


episode 0 score -1426.8 avg score -1426.8
episode 1 score -944.0 avg score -1185.4
episode 2 score -1842.3 avg score -1404.4
episode 3 score -984.3 avg score -1299.3


In [None]:
def plot_learning_curve(x, scores):
    running_avg = np.zeros(len(scores))
    for i in range(len(running_avg)):
        running_avg[i] = np.mean(scores[max(0, i-100):(i+1)])
    plt.plot(x, running_avg)
    plt.title('Running average of previous 100 scores')

x = [i+1 for i in range(n_games)]
plot_learning_curve(x, score_history)