In [1]:
import gym

env = gym.make("LunarLanderContinuous-v2")
env.seed(0)



[0]

In [2]:
from ddpg import DDPG
from td3 import TD3

actor = TD3(env)

In [3]:
import itertools

def get_experiences(env, actor, episodes=50):
    for episode in range(0, episodes):
        obs = env.reset()
        done = False
        for t in itertools.count():
            action = actor.act(obs)
            obs2, reward, done, _ = env.step(action)
            actor.remember(obs, action, reward, obs2, done)
            obs = obs2
            if done:
                break

In [4]:
import itertools
import numpy as np

def train(env, actor, episodes=100):
    returns = []
    mean = 0
    for episode in range(1, episodes+1):
        obs = env.reset()
        score = 0
        for t in itertools.count():
            action = actor.act(obs)
            obs2, reward, done, _ = env.step(action)
            actor.remember(obs, action, reward, obs2, done)  
            actor.train(batch_size=256)
            obs = obs2
            score += reward
            if done:
                returns.append(score)
                mean = np.mean(returns[-10:])
                print("Episode: " + str(episode) + ", Score: " +str(score) + ", Mean Score: " + str(mean))
                if mean > 180:
                    return returns
                break    
    return returns

In [5]:
get_experiences(env, actor, episodes=25)

In [None]:
returns = train(env, actor, episodes=500)

Episode: 1, Score: -546.0058286997536, Mean Score: -546.0058286997536
Episode: 2, Score: -102.1111414060409, Mean Score: -324.05848505289725
Episode: 3, Score: -204.81715448622046, Mean Score: -284.311374864005
Episode: 4, Score: -158.28594656737988, Mean Score: -252.8050177898487
Episode: 5, Score: -113.47410062880428, Mean Score: -224.93883435763982
Episode: 6, Score: -19.256802905137732, Mean Score: -190.6584957822228
Episode: 7, Score: -283.8945032984567, Mean Score: -203.97792542739904
Episode: 8, Score: -99.29878208587333, Mean Score: -190.89303250970835
Episode: 9, Score: -127.05740958684099, Mean Score: -183.80018551827865
Episode: 10, Score: -453.94660479681176, Mean Score: -210.81482744613194
Episode: 11, Score: -363.0557747212094, Mean Score: -192.51982204827752
Episode: 12, Score: -311.2626037217583, Mean Score: -213.43496827984927
Episode: 13, Score: -427.4506675295366, Mean Score: -235.6983195841809
Episode: 14, Score: -421.80150755612, Mean Score: -262.0498756830549
Epis

In [None]:
import imageio
import itertools
import numpy as np

def run_episode(env, model, render=False, record=False):
    images = []
    obs = env.reset()
    score = 0
    for t in itertools.count():
        if record:
            images.append(env.render(mode='rgb_array'))
        if render:
            env.render()
        action = model.act(obs)
        obs, reward, done, info = env.step(action)
        score += reward
        if done:
            print("Score: " + str(score))
            env.close()
            break
            
    if record:
        imageio.mimsave('img/LunarTD3.gif', [np.array(img) for i, img in enumerate(images) if i%2 == 0], fps=29)

In [None]:
run_episode(env, actor, render=True, record=True)

In [None]:
import matplotlib.pyplot as plt

plt.plot(range(len(returns)), returns, label='Returns')
plt.legend()
plt.show()