# LunarLander GoogleColab Version

## Downloads and installs

In [0]:
!pip install pyglet
!pip install pyopengl
!pip install pyvirtualdisplay
!pip install gym[box2d]
!apt-get -y install xvfb freeglut3-dev ffmpeg> /dev/null

## Import and def

In [0]:
import os
from google.colab import drive

import matplotlib.pyplot as plt
import matplotlib.animation
%matplotlib inline
from IPython import display
from IPython.display import HTML
from pyvirtualdisplay import Display

import gym
from stable_baselines import DQN

In [0]:
drive.mount('/content/drive')

display = Display(visible=0, size=(1024, 768))
display.start()

os.environ["DISPLAY"] = ":" + str(display.display) + "." + str(display.screen)

addframes = lambda frames, env: frames.append(env.render(mode = 'rgb_array'))

def show_env(frames):
    plt.figure(figsize=(frames[0].shape[1] / 72.0, frames[0].shape[0] / 72.0), dpi = 72)
    patch = plt.imshow(frames[0])
    plt.axis('off')
    animate = lambda i: patch.set_data(frames[i])
    ani = matplotlib.animation.FuncAnimation(plt.gcf(), animate, frames=len(frames), interval = 50)
    return ani.to_jshtml()

In [0]:
# train model or load existing one
train = True

## Random

In [0]:
frames = []
env = gym.make("LunarLander-v2")
for i_episode in range(3):
    observation = env.reset()
    for t in range(100):
        addframes(frames, env)
        action = env.action_space.sample()
        observation, reward, done, info = env.step(action)
        if done:
            print("Episode finished after {} timesteps".format(t+1))
            break
env.close()
HTML(show_env(frames))

## Deep Q-Learning

In [0]:
# This is example code from https://github.com/hill-a/stable-baselines
if train:
  env = gym.make('LunarLander-v2')
  model = DQN('MlpPolicy', env, learning_rate=2e-3, prioritized_replay=True, verbose=1)
  model.learn(total_timesteps=int(1e5))

In [0]:
if train: 
  model.save("dqn_lunar_new")
  model.save("/content/drive/My Drive/data/dqn_lunar_new")

In [0]:
if not train: 
  model = DQN.load("/content/drive/My Drive/data/dqn_lunar_new")

In [0]:
obs = env.reset()
frames = []
for i_episode in range(3):
  observation = env.reset()
  for t in range(1000):
      addframes(frames, env)
      action, _states = model.predict(obs)
      obs, rewards, dones, info = env.step(action)
      if dones:
          print("Episode finished after {} timesteps".format(t+1))
          break
HTML(show_env(frames))