In [None]:
import os
import gym
import numpy as np
from ddpg_torch import Agent
from utils import plot_learning_curve

env = gym.make('LunarLanderContinuous-v2')
agent = Agent(alpha=0.0001, beta=0.001, 
                input_dims=env.observation_space.shape, tau=0.001,
                batch_size=64, fc1_dims=400, fc2_dims=300, 
                n_actions=env.action_space.shape[0], chkpt_dir=os.getcwd())
n_games = 1000
filename = 'LunarLander_alpha_' + str(agent.alpha) + '_beta_' + \
            str(agent.beta) + '_' + str(n_games) + '_games'
figure_file = 'plots/' + filename + '.png'

best_score = env.reward_range[0]
score_history = []
for i in range(n_games):
    observation = env.reset()
    done = False
    score = 0
    agent.noise.reset()
    while not done:
        action = agent.choose_action(observation)
        observation_, reward, done, info = env.step(action)
        agent.remember(observation, action, reward, observation_, done)
        agent.learn()
        score += reward
        observation = observation_
    score_history.append(score)
    avg_score = np.mean(score_history[-100:])

    if avg_score > best_score:
        best_score = avg_score
        agent.save_models()

    print('episode ', i, 'score %.1f' % score,
            'average score %.1f' % avg_score)
x = [i+1 for i in range(n_games)]
plot_learning_curve(x, score_history, figure_file)






... saving checkpoint ...
... saving checkpoint ...
... saving checkpoint ...
... saving checkpoint ...
episode  0 score -83.4 average score -83.4
episode  1 score -92.3 average score -87.9
... saving checkpoint ...
... saving checkpoint ...
... saving checkpoint ...
... saving checkpoint ...
episode  2 score -18.4 average score -64.7
episode  3 score -856.8 average score -262.8
episode  4 score -1343.2 average score -478.9
episode  5 score -811.4 average score -534.3
episode  6 score -966.0 average score -595.9
episode  7 score -2191.3 average score -795.4
episode  8 score -1015.0 average score -819.8
episode  9 score -1175.5 average score -855.3
episode  10 score -1070.3 average score -874.9
episode  11 score -1145.0 average score -897.4
episode  12 score -944.4 average score -901.0
episode  13 score -1305.1 average score -929.9
episode  14 score -1034.3 average score -936.8
episode  15 score -1528.7 average score -973.8
episode  16 score -1004.3 average score -975.6
episode  17 scor

episode  174 score -22.5 average score -102.4
episode  175 score -60.3 average score -95.7
episode  176 score -65.1 average score -89.5
episode  177 score -175.9 average score -86.4
episode  178 score -100.3 average score -80.6
episode  179 score -124.0 average score -75.3
episode  180 score -94.9 average score -69.2
... saving checkpoint ...
... saving checkpoint ...
... saving checkpoint ...
... saving checkpoint ...
episode  181 score -81.9 average score -64.0
... saving checkpoint ...
... saving checkpoint ...
... saving checkpoint ...
... saving checkpoint ...
episode  182 score -123.7 average score -60.7
episode  183 score -99.1 average score -61.5
episode  184 score -181.8 average score -63.0
episode  185 score -101.5 average score -63.3
episode  186 score -101.7 average score -63.6
... saving checkpoint ...
... saving checkpoint ...
... saving checkpoint ...
... saving checkpoint ...
episode  187 score -160.4 average score -60.5
episode  188 score -155.7 average score -61.7
epi

In [None]:
import os

In [None]:
os.getcwd()