In [None]:
!pip install swig 
!pip install box2d-py
!pip install gym[box2d]

In [2]:
import gym
import random
from create_agent import Agent
from utils import plot_learning_curve
import numpy as np

In [None]:
from matplotlib import animation
import matplotlib.pyplot as plt

def display_frames_as_gif(frames):
    patch = plt.imshow(frames[0])
    plt.axis('off')
    def animate(i):
        patch.set_data(frames[i])

    anim = animation.FuncAnimation(plt.gcf(), animate, frames = len(frames), interval=5)
    anim.save('./lunar_lander_result.gif', writer='imagemagick', fps=30)

In [None]:
env = gym.make('LunarLander-v2')
agent = Agent(gamma=0.99, epsilon=1.0, batch_size=64, n_actions=4, eps_end=0.01,
              input_dims=[8], lr=0.001)
scores, eps_history = [], []
n_games = 500

for i in range(n_games):
    score = 0
    done = False
    observation = env.reset()
    while not done:
        action = agent.choose_action(observation)
        observation_, reward, done, info = env.step(action)
        score += reward
        agent.store_transition(observation, action, reward,
                                observation_, done)
        agent.learn()
        observation = observation_
    scores.append(score)
    eps_history.append(agent.epsilon)

    avg_score = np.mean(scores[-100:])

    print('episode ', i, 'score %.2f' % score,
            'average score %.2f' % avg_score,
            'epsilon %.2f' % agent.epsilon)
x = [i+1 for i in range(n_games)]
filename = 'lunar_lander.png'
plot_learning_curve(x, scores, eps_history, filename)

In [None]:
score = 0
done = False
observation = env.reset()
frames = []
while not done:
    frames.append(env.render(mode = 'rgb_array'))
    action = agent.choose_action(observation)
    observation_, reward, done, info = env.step(action)
    score += reward
    observation = observation_
print('Total score : %.2f'%score)
display_frames_as_gif(frames)