# Reinforcement Learning - Deep Q Network

In [1]:
import random
import torch
import numpy as np

In [2]:
%matplotlib inline
import matplotlib.pyplot as plt
import sys

assert sys.version_info[:3] >= (3, 6, 0), "Make sure you have Python 3.6 installed!"

## 1. Deep Q-Network (DQN)

In [3]:
import gym
from project.environments.customEnvironments import WindyGridWorld
# env = gym.envs.make("CartPole-v1")
# env = gym.envs.make("MountainCar-v0")


In [4]:
# env is a TimeLimit wrapper around an env, so use env.env to look into the env (but otherwise you can forget about this)
# ??env.env

### 2.5 Put it all together

In [5]:
def run_DQN():
    from DQN import DeepQN, LinearQN
    from DQN import train_QNet
    from DQN import run_episodes


    # Let's run it!
    num_episodes = 100
    batch_size = 64
    discount_factor = 0.8
    learn_rate = 1e-3
    num_hidden = 128
    seed = 42  # This is not randomly chosen

    # We will seed the algorithm (before initializing QNetwork!) for reproducibility
    random.seed(seed)
    torch.manual_seed(seed)
    env.seed(seed)

    my_DQN = DeepQN(in_features=4, out_features=2, discount_factor=discount_factor)
    # my_DQN = LinearQN()
    episode_durations = run_episodes(env, my_DQN, num_episodes, batch_size, learn_rate, semi_grad=True)

    # And see the results
    def smooth(x, N):
        cumsum = np.cumsum(np.insert(x, 0, 0)) 
        return (cumsum[N:] - cumsum[:-N]) / float(N)

    plt.plot(smooth(episode_durations, 10))
    plt.title('Episode durations per episode')

# run_DQN()

## 1. Deep SARSA-Network (DQN)

In [6]:
def run_deepSARSA():
    from DSN import SARSANetwork, run_episodes

    # Let's run it!
    num_episodes = 100
    batch_size = 64
    discount_factor = 0.8
    learn_rate = 1e-3
    num_hidden = 128
    seed = 42  # This is not randomly chosen

    # We will seed the algorithm (before initializing QNetwork!) for reproducibility
    random.seed(seed)
    torch.manual_seed(seed)
    env.seed(seed)


    my_DSN = SARSANetwork(in_features=4, num_hidden=num_hidden, out_features=2, discount_factor=discount_factor)
    episode_durations = run_episodes(env, my_DSN, num_episodes, batch_size, learn_rate, semi_grad=False)
    
    # And see the results
    def smooth(x, N):
        cumsum = np.cumsum(np.insert(x, 0, 0)) 
        return (cumsum[N:] - cumsum[:-N]) / float(N)

    plt.plot(smooth(episode_durations, 10))
    plt.title('Episode durations per episode')

# run_deepSARSA()

If you want to test/submit your solution **restart the kernel, run all cells and submit the dqn_autograde.py file into codegrade.**

In [7]:
from project.networks import SARSANetwork, DeepQNetwork
from project.policies import EpsilonGreedyPolicy

from project.train_network import train_episodes
from project.test_network import test_episodes

In [9]:
# env = WindyGridWorld()
env = gym.envs.make("CartPole-v1")

num_episodes = 500
batch_size = 1
discount_factor = 1
learn_rate = 1e-3
semi_grad = True
use_replay = False
seed = 42

print(env.reset())
dq_network = SARSANetwork(in_features=4, out_features=2, discount_factor=discount_factor)
dq_policy = EpsilonGreedyPolicy(dq_network, steps_e=10000)
episode_durations, losses = train_episodes(env, dq_policy, num_episodes, batch_size, learn_rate, semi_grad=semi_grad, use_replay=use_replay)



[ 0.00839396  0.02602053 -0.0090631   0.003036  ]
 Episode 0 finished after 15 steps
 Episode 10 finished after 18 steps
 Episode 20 finished after 9 steps
 Episode 30 finished after 25 steps
 Episode 40 finished after 10 steps
 Episode 50 finished after 12 steps
 Episode 60 finished after 20 steps
 Episode 70 finished after 13 steps
 Episode 80 finished after 28 steps
 Episode 90 finished after 19 steps
 Episode 100 finished after 13 steps
 Episode 110 finished after 18 steps
 Episode 120 finished after 11 steps
 Episode 130 finished after 28 steps
 Episode 140 finished after 17 steps
 Episode 150 finished after 26 steps
 Episode 160 finished after 9 steps
 Episode 170 finished after 42 steps
 Episode 180 finished after 14 steps
 Episode 190 finished after 15 steps
 Episode 200 finished after 13 steps
 Episode 210 finished after 15 steps
 Episode 220 finished after 13 steps
 Episode 230 finished after 13 steps
 Episode 240 finished after 13 steps
 Episode 250 finished after 8 steps
 E