# Deep Q-Network (DQN)
---
This notebook is a implementation of a DQN agent to solve OpenAI Gym's LunarLander-v2 environment running on a Windows10 machine.

### 1. Import the Necessary Packages and set global variables

In [1]:
!pip3 install gym
!pip3 install Box2D
!pip3 install pyglet==1.5.0
!pip install gym[box2d]
import gym
import random
import torch
import numpy as np
from collections import deque
import matplotlib.pyplot as plt
%matplotlib inline


Collecting box2d-py~=2.3.5 (from gym[box2d])
  Using cached https://files.pythonhosted.org/packages/98/c2/ab05b5329dc4416b5ee5530f0625a79c394a3e3c10abe0812b9345256451/box2d-py-2.3.8.tar.gz
Installing collected packages: box2d-py
  Running setup.py install for box2d-py: started
    Running setup.py install for box2d-py: finished with status 'error'
    Complete output from command C:\Dev\Python\RL\Lunar_Lander\venv\Scripts\python.exe -u -c "import setuptools, tokenize;__file__='C:\\Users\\JUSTIN~1\\AppData\\Local\\Temp\\pip-install-y4pgwuz3\\box2d-py\\setup.py';f=getattr(tokenize, 'open', open)(__file__);code=f.read().replace('\r\n', '\n');f.close();exec(compile(code, __file__, 'exec'))" install --record C:\Users\JUSTIN~1\AppData\Local\Temp\pip-record-c_lxlplx\install-record.txt --single-version-externally-managed --compile --install-headers C:\Dev\Python\RL\Lunar_Lander\venv\include\site\python3.6\box2d-py:
    Using setuptools (version 40.8.0).
    running install
    running build
  

Command "C:\Dev\Python\RL\Lunar_Lander\venv\Scripts\python.exe -u -c "import setuptools, tokenize;__file__='C:\\Users\\JUSTIN~1\\AppData\\Local\\Temp\\pip-install-y4pgwuz3\\box2d-py\\setup.py';f=getattr(tokenize, 'open', open)(__file__);code=f.read().replace('\r\n', '\n');f.close();exec(compile(code, __file__, 'exec'))" install --record C:\Users\JUSTIN~1\AppData\Local\Temp\pip-record-c_lxlplx\install-record.txt --single-version-externally-managed --compile --install-headers C:\Dev\Python\RL\Lunar_Lander\venv\include\site\python3.6\box2d-py" failed with error code 1 in C:\Users\JUSTIN~1\AppData\Local\Temp\pip-install-y4pgwuz3\box2d-py\


### 2. Instantiate the Environment and untrained Agents

Initialize the environment in the code cell below.

In [2]:
# Set up Enviornment
env = gym.make('LunarLander-v2')
env.seed(88)

# Set up Agents
from dqn_agent import Agent, Agent_Double, Prioritized_Agent, Prioritized_Double_Agent
baseline_agent = Agent(state_size=8, action_size=4, seed=88)
double_agent = Agent_Double(state_size=8, action_size=4, seed=88)
prioritized_agent = Prioritized_Agent(state_size=8, action_size=4, seed=88)
prioritized_double_agent = Prioritized_Double_Agent(state_size=8, action_size=4, seed=88)




### 3. Watch untrained Agent

In [7]:
def watch_agent(Agent, env, trained=True):
    """
    Function used to watch the Agent perform on the Lunar_Lander_v2 enviornment
    :param Agent: Class of the Agent
    """
    # load the weights from file
    if trained:
        Agent.qnetwork_local.load_state_dict(torch.load(f"C:\Dev\Python\RL\Lunar_Lander\checkpoints\\{Agent}.pth"))

    # Watch the Agent perform
    for i in range(5):
        state = env.reset()
        done = False
        while not done:
            env.render()
            action = Agent.act(state)
            state, reward, done, _ = env.step(action)

    env.close()

watch_agent(baseline_agent, env, False)

### 4. Function used to train Agents

In [8]:
def simulation(Agent, n_episodes=2000, max_t=1000, eps_start=1.0, eps_end=0.01, eps_decay=0.995):
    """Deep Q-Learning.
    
    Params
    ======
        Agent (class): This is the class of Agent from dqn_agent.py that you want to use
        n_episodes (int): maximum number of training episodes
        max_t (int): maximum number of timesteps per episode
        eps_start (float): starting value of epsilon, for epsilon-greedy action selection
        eps_end (float): minimum value of epsilon
        eps_decay (float): multiplicative factor (per episode) for decreasing epsilon
    """
    scores = []                        # list containing scores from each episode
    scores_window = deque(maxlen=100)  # last 100 scores
    eps = eps_start                    # initialize epsilon
    for i_episode in range(1, n_episodes+1):
        state = env.reset()
        score = 0
        for t in range(max_t):
            action = Agent.act(state, eps)
            next_state, reward, done, _ = env.step(action)
            Agent.step(state, action, reward, next_state, done)
            state = next_state
            score += reward
            if done:
                break 
        scores_window.append(score)       # save most recent score
        scores.append(score)              # save most recent score
        eps = max(eps_end, eps_decay*eps) # decrease epsilon
        print('\rEpisode {}\tAverage Score: {:.2f}'.format(i_episode, np.mean(scores_window)), end="")
        if i_episode % 100 == 0:
            print('\rEpisode {}\tAverage Score: {:.2f}'.format(i_episode, np.mean(scores_window)))
    torch.save(baseline_agent.qnetwork_local.state_dict(), f"C:\Dev\Python\RL\Lunar_Lander\checkpoints\\{Agent}.pth")
    return scores


### 5. Function used to graph simulation results

In [9]:
def graph_results(agent_scores, save_graph=True):
    """ Used to grpah the results of agents from simulations

    :param agent_scores: list of tuples of agent name and a array of scores (agent_name, scores)
    :param save_graph: bool used to save the graph in images
    :return:
    """
    # Setup up graph
    fig, ax = plt.subplots()
    ax.set(xlabel="Episode #", ylabel='Score', title="Agent Comparison for Lunar_Lander_V2")
    ax.plot(np.arange(len(baseline_scores)), np.ones(len(baseline_scores))*200, color='black', linestyle='dashed', label="Solved")
    ax.grid()
    save_name = ""
    # Graph all the results
    for name, score in enumerate(agent_scores):
        ax.plot(np.arange(len(score)), score, label=f"{name} Agent")
        save_name += f"{name}_"
    ax.legend()

    if save_graph:
        filepath = f"C:\Dev\Python\RL\Lunar_Lander\images\\{save_name}.png"
        fig.savefig(filepath)
    plt.show()

### 6. Train the Agents and view the results!

In [None]:
''' Train the network and save the scores. Need to update this for different simulations as necessary. '''
baseline_scores = simulation(baseline_agent)
double_scores = simulation(double_agent)
# Prioritized Replay was too slow
#prioritized_scores = simulation(prioritized_agent)
#prioritized_double_scores = simulation(prioritized_double_agent)
#graph_results([("DQN", baseline_scores),("Prioritized Experience Replay", prioritized_scores)], save_graph=True)
#graph_results([("Double DQN", double_scores),("Prioritized Experience Replay", prioritized_double_scores)], save_graph=True)


Episode 100	Average Score: -195.52
Episode 200	Average Score: -174.48
Episode 300	Average Score: -16.948
Episode 400	Average Score: -66.03
Episode 500	Average Score: 71.351
Episode 600	Average Score: 74.05
Episode 700	Average Score: 144.28
Episode 800	Average Score: 152.52
Episode 900	Average Score: 158.58
Episode 1000	Average Score: 172.44
Episode 1100	Average Score: 201.37
Episode 1200	Average Score: 195.24
Episode 1300	Average Score: 203.16
Episode 1400	Average Score: 203.55
Episode 1500	Average Score: 212.33
Episode 1600	Average Score: 210.98
Episode 1700	Average Score: 222.07
Episode 1800	Average Score: 238.41
Episode 1900	Average Score: 226.33
Episode 2000	Average Score: 235.52
Episode 100	Average Score: -198.74
Episode 200	Average Score: -187.16
Episode 300	Average Score: -197.17
Episode 400	Average Score: -117.25
Episode 500	Average Score: -61.053
Episode 600	Average Score: -55.00
Episode 700	Average Score: -47.51
Episode 800	Average Score: 1.7978
Episode 900	Average Score: 94.

### 7. Watch a Smart Agent!

This code cell will load the trained weights from file to watch the smart agent!

In [8]:
watch_agent(double_agent, env)

