In [1]:
import gym

env = gym.make("LunarLanderContinuous-v2")
env.seed(0)



[0]

In [2]:
from ddpg import DDPG
# from ddpg_cv import DDPGCV

actor = DDPG(env)

In [3]:
import itertools

def get_experiences(env, actor, episodes=50):
    for episode in range(0, episodes):
        obs = env.reset()
        done = False
        for t in itertools.count():
            action = actor.act(obs)
            obs2, reward, done, _ = env.step(action)
            actor.remember(obs, action, reward, obs2, done)
            obs = obs2
            if done:
                break

In [4]:
import itertools
import numpy as np

def train(env, actor, episodes=100):
    returns = []
    mean = 0
    for episode in range(1, episodes+1):
        obs = env.reset()
        score = 0
        for t in itertools.count():
            action = actor.act(obs)
            obs2, reward, done, _ = env.step(action)
            actor.remember(obs, action, reward, obs2, done)  
            actor.train(batch_size=128, epochs=1)
            obs = obs2
            score += reward
            if done:
                returns.append(score)
                mean = np.mean(returns[-10:])
                print("Episode: " + str(episode) + ", Score: " +str(score) + ", Mean Score: " + str(mean))
                if mean > 180:
                    return returns
                break    
    return returns

In [5]:
get_experiences(env, actor, episodes=25)

In [6]:
returns = train(env, actor, episodes=500)

Episode: 1, Score: -187.82603040465264, Mean Score: -187.82603040465264
Episode: 2, Score: -646.4415853716981, Mean Score: -417.1338078881754
Episode: 3, Score: -948.1126037184912, Mean Score: -594.126739831614
Episode: 4, Score: -1367.1596957368445, Mean Score: -787.3849788079217
Episode: 5, Score: -846.2108083751468, Mean Score: -799.1501447213667
Episode: 6, Score: -1193.9431603438509, Mean Score: -864.9489806584475
Episode: 7, Score: -814.3959988437927, Mean Score: -857.7271261134968
Episode: 8, Score: -711.7379067063484, Mean Score: -839.4784736876031
Episode: 9, Score: -1161.467084363463, Mean Score: -875.254985984921
Episode: 10, Score: -805.0570093605734, Mean Score: -868.2351883224861
Episode: 11, Score: -1035.9203977353159, Mean Score: -953.0446250555524
Episode: 12, Score: -941.0624876031533, Mean Score: -982.5067152786978
Episode: 13, Score: -844.3280981071631, Mean Score: -972.1282647175652
Episode: 14, Score: -756.692243345361, Mean Score: -911.0815194784169
Episode: 15, 

Episode: 117, Score: -215.3825011169618, Mean Score: -347.5733089880241
Episode: 118, Score: -235.9319647754359, Mean Score: -359.5216869730424
Episode: 119, Score: -769.5328513115953, Mean Score: -386.982686994823
Episode: 120, Score: -454.54144494701336, Mean Score: -412.36088702023807
Episode: 121, Score: -551.8155408101683, Mean Score: -455.2017906151692
Episode: 122, Score: -271.664169916511, Mean Score: -365.5550997149263
Episode: 123, Score: -2153.147861992697, Mean Score: -555.9524566559294
Episode: 124, Score: -280.58709695762076, Mean Score: -549.2152530250127
Episode: 125, Score: -363.02644394162104, Mean Score: -553.9328867100294
Episode: 126, Score: -265.77255633305504, Mean Score: -556.140243210268
Episode: 127, Score: -229.66139239267486, Mean Score: -557.5681323378393
Episode: 128, Score: -247.96875873506272, Mean Score: -558.771811733802
Episode: 129, Score: -392.62101950612623, Mean Score: -521.080628553255
Episode: 130, Score: -234.74887578487312, Mean Score: -499.10

Episode: 230, Score: -271.5512335575489, Mean Score: -281.3102245792748
Episode: 231, Score: -224.90652899258325, Mean Score: -289.12085968806883
Episode: 232, Score: -168.04061039560986, Mean Score: -283.8326471593369
Episode: 233, Score: -257.7074530565329, Mean Score: -196.75833374718863
Episode: 234, Score: -287.8159213488874, Mean Score: -219.24918758799578
Episode: 235, Score: -227.9119622400755, Mean Score: -233.7785696206135
Episode: 236, Score: -247.29370976346556, Mean Score: -230.23023586715354
Episode: 237, Score: -153.14249087635142, Mean Score: -221.67575883357048
Episode: 238, Score: -170.39315518936976, Mean Score: -220.06226592922607
Episode: 239, Score: -174.67216223019693, Mean Score: -218.34352276506215
Episode: 240, Score: -193.42808087654566, Mean Score: -210.53120749696183
Episode: 241, Score: -567.8380090556815, Mean Score: -244.82435550327165
Episode: 242, Score: -163.60119103091262, Mean Score: -244.3804135668019
Episode: 243, Score: -485.2474659776566, Mean S

KeyboardInterrupt: 

In [13]:
import imageio
import itertools
import numpy as np

def run_episode(env, model, render=False, record=False):
    images = []
    obs = env.reset()
    score = 0
    for t in itertools.count():
        if record:
            images.append(env.render(mode='rgb_array'))
        if render:
            env.render()
        action = model.act(obs)
        obs, reward, done, info = env.step(action)
        score += reward
        if done:
            print("Score: " + str(score))
            env.close()
            break
            
    if record:
        imageio.mimsave('img/LunarDDPG.gif', [np.array(img) for i, img in enumerate(images) if i%2 == 0], fps=29)

In [39]:
run_episode(env, actor, render=True, record=True)

Score: 203.66214747060678


In [40]:
import matplotlib.pyplot as plt

plt.plot(range(len(returns)), returns, label='Returns')
plt.legend()
plt.show()

NameError: name 'returns' is not defined