In [None]:
import gym

env = gym.make("LunarLanderContinuous-v2")
env.seed(0)

In [None]:
from ddpg import DDPG
from td3 import TD3

actor = TD3(env)

In [None]:
import itertools

def get_experiences(env, actor, episodes=50):
    for episode in range(0, episodes):
        obs = env.reset()
        done = False
        for t in itertools.count():
            action = actor.act(obs)
            obs2, reward, done, _ = env.step(action)
            actor.remember(obs, action, reward, obs2, done)
            obs = obs2
            if done:
                break

In [None]:
import itertools
import numpy as np

def train(env, actor, episodes=100):
    returns = []
    mean = 0
    for episode in range(1, episodes+1):
        obs = env.reset()
        score = 0
        for t in itertools.count():
            action = actor.act(obs)
            obs2, reward, done, _ = env.step(action)
            actor.remember(obs, action, reward, obs2, done)  
            actor.train(batch_size=100)
            obs = obs2
            score += reward
            if done:
                returns.append(score)
                mean = np.mean(returns[-10:])
                print("Episode: " + str(episode) + ", Score: " +str(score) + ", Mean Score: " + str(mean))
                if mean > 180:
                    return returns
                break    
    return returns

In [None]:
get_experiences(env, actor, episodes=25)

In [None]:
returns = train(env, actor, episodes=1500)

In [None]:
import imageio
import itertools
import numpy as np

def run_episode(env, model, render=False, record=False):
    images = []
    obs = env.reset()
    score = 0
    for t in itertools.count():
        if record:
            images.append(env.render(mode='rgb_array'))
        if render:
            env.render()
        action = model.act(obs, noise=False)
        obs, reward, done, info = env.step(action)
        score += reward
        if done:
            print("Score: " + str(score))
            env.close()
            break
            
    if record:
        imageio.mimsave('img/LunarTD3.gif', [np.array(img) for i, img in enumerate(images) if i%2 == 0], fps=29)

In [None]:
run_episode(env, actor, render=True, record=False)

In [None]:
import matplotlib.pyplot as plt

plt.plot(range(len(returns)), returns, label='Returns')
plt.legend()
plt.show()