# Deep Q-Network (DQN)
---
This notebook contains an implementation of a DQN Agent that solves OpenAI Gym's LunarLander-v2 environment.

### 1. Import Necessary Packages:

In [1]:
import gym
import random
import numpy as np
from collections import namedtuple, deque
import matplotlib.pyplot as plt
%matplotlib inline

# pytorch imports
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

from src.Model.main import QNetwork
from src.Memory.RelpyBuffer import Experience
from src.Agent.dqn import DQN
from src.Agent.config import DQN_soft_update
torch.cuda.is_available()

True

### 2. Instantiate the Environment and Agent

In [2]:
env = gym.make('LunarLander-v2')
env.reset()
print('State shape: ', env.observation_space.shape)
print('Number of Actions: ', env.action_space.n)

State shape:  (8,)
Number of Actions:  4


#### Set up the Agent:

In [3]:
config = DQN_soft_update()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
net = QNetwork(env.observation_space.shape[0], env.action_space.n, config.SEED)
Memory = Experience(env.action_space.n, config.BUFFER_SIZE, config.BATCH_SIZE, device, config.SEED)
dqn_Agent = DQN(net, env.observation_space.shape[0], env.action_space.n , Memory, device, config)

### 3. Train the Agent with DQN:

In [4]:
def dqn(agent, n_episodes=4000, max_t=200, eps_start=1.0, eps_end=0.01, eps_decay=0.99):
    """Deep Q-Learning.
    
    Params
    ======
        n_episodes (int): maximum number of training episodes
        max_t (int): maximum number of timesteps per episode
        eps_start (float): starting value of epsilon, for epsilon-greedy action selection
        eps_end (float): minimum value of epsilon
        eps_decay (float): multiplicative factor (per episode) for decreasing epsilon
    """
    scores = []                        # list containing scores from each episode
    scores_window = deque(maxlen=100)  # last 100 scores
    eps = eps_start                    # initialize epsilon
    for i_episode in range(1, n_episodes+1):
        state = env.reset()[0]
        score = 0
        for t in range(max_t):
            action = agent.act(state, eps)
            next_state, reward, done, _, _ = env.step(action)
            agent.step(state, action, reward, next_state, done)
            state = next_state
            score += reward
            if done:
                break 
        scores_window.append(score)       # save most recent score
        scores.append(score)              # save most recent score
        eps = max(eps_end, eps_decay*eps) # decrease epsilon
        print('\rEpisode {}\tAverage Score: {:.2f}'.format(i_episode, np.mean(scores_window)), end="")
        if i_episode % 100 == 0:
            print('\rEpisode {}\tAverage Score: {:.2f}'.format(i_episode, np.mean(scores_window)))
            torch.save(agent.qnetwork_local.state_dict(),"agent_checkpoint\checkpoint" + str(i_episode)+".pth")
        if np.mean(scores_window)>=140.0:
            print('\nEnvironment solved in {:d} episodes!\tAverage Score: {:.2f}'.format(i_episode-100, np.mean(scores_window)))
            torch.save(agent.qnetwork_local.state_dict(), 'agent_checkpoint\checkpoint.pth')
            break
    return scores

scores = dqn(agent = dqn_Agent)

  if not isinstance(terminated, (bool, np.bool8)):


Episode 100	Average Score: -79.55
Episode 200	Average Score: 20.811
Episode 300	Average Score: 20.44
Episode 400	Average Score: 29.31
Episode 500	Average Score: 35.30
Episode 600	Average Score: 33.63
Episode 700	Average Score: 31.22
Episode 800	Average Score: 59.98
Episode 900	Average Score: 61.63
Episode 1000	Average Score: 63.66
Episode 1100	Average Score: 51.19
Episode 1200	Average Score: 63.08
Episode 1300	Average Score: 78.97
Episode 1400	Average Score: 97.21
Episode 1500	Average Score: 113.06
Episode 1600	Average Score: 118.49
Episode 1700	Average Score: 110.14
Episode 1800	Average Score: 104.68
Episode 1900	Average Score: 78.458
Episode 2000	Average Score: 102.63
Episode 2100	Average Score: 72.900
Episode 2200	Average Score: 78.72
Episode 2300	Average Score: 91.48
Episode 2400	Average Score: 102.57
Episode 2500	Average Score: 95.833
Episode 2600	Average Score: 22.49
Episode 2700	Average Score: 88.12
Episode 2800	Average Score: 98.42
Episode 2900	Average Score: 91.297
Episode 300

### 4. Watch a Smart Agent!

In [7]:
# import required module
import glob
import os
 
# assign directory
directory = 'agent_checkpoint/'
# iterate over files in
# that directory
files =  sorted(os.listdir(directory),key=len)
for file in files:
    # load the weights from file
    agent = dqn_Agent
    agent.qnetwork_local.load_state_dict(torch.load(directory+file))
    print(file)
    for i in range(1):
        env = gym.make('LunarLander-v2',render_mode="human")
        state = env.reset()[0]

        for j in range(200):
            action = agent.act(state)
            env.render()
            state, reward, done, _, _ = env.step(action)
            if done:
                break

    env.close()

checkpoint100.pth
checkpoint200.pth
checkpoint300.pth
checkpoint400.pth
checkpoint500.pth
checkpoint600.pth
checkpoint700.pth
checkpoint800.pth
checkpoint900.pth
checkpoint1000.pth
checkpoint1100.pth
checkpoint1200.pth
checkpoint1300.pth
checkpoint1400.pth
checkpoint1500.pth
checkpoint1600.pth
checkpoint1700.pth
checkpoint1800.pth
checkpoint1900.pth
checkpoint2000.pth
checkpoint2100.pth
checkpoint2200.pth
checkpoint2300.pth
checkpoint2400.pth
checkpoint2500.pth
checkpoint2600.pth
checkpoint2700.pth
checkpoint2800.pth
checkpoint2900.pth
checkpoint3000.pth
checkpoint3100.pth
checkpoint3200.pth
checkpoint3300.pth
checkpoint3400.pth
checkpoint3500.pth
checkpoint3600.pth
checkpoint3700.pth
checkpoint3800.pth
checkpoint3900.pth
checkpoint4000.pth
