In [None]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

%cd
%cd /home/adityasidharta/git/Pytorch_DQN

%env PROJECT_PATH = /home/adityasidharta/git/Pytorch_DQN

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

import gym

In [None]:
from src.agent import *
from src.arch import *
from src.config import *
from src.envs import *
from src.learner import *
from src.memory import *
from src.policy import *
from src.cacher import *

# Cartpole

In [None]:
BATCH_SIZE = 128
GAMMA = 0.999
EPS_START = 0.95
EPS_END = 0.05
EPS_DECAY = 400
N_TARGET_UPDATE = 10
N_EPISODE = 500
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
MEMORY_SPACE = 50000
OBSERVATION_SPACE = (4,)
ACTION_SPACE = 2

In [None]:
arch = CartNet(1000).to(DEVICE)
config = Config(BATCH_SIZE, GAMMA, EPS_START, EPS_END, EPS_DECAY, 
                N_TARGET_UPDATE, N_EPISODE, DEVICE, MEMORY_SPACE, 
                OBSERVATION_SPACE,
                ACTION_SPACE)
envs = gym.make('CartPole-v0').unwrapped
learner = Learner(arch, optim.RMSprop)
memory = Memory(MEMORY_SPACE, OBSERVATION_SPACE)
policy = EGreedy()
cacher = Cacher()
agent = Agent(learner, memory, policy, envs, config, cacher)

print(envs.action_space)
print(envs.observation_space)

In [None]:
agent.train_agent()

In [None]:
agent.play_agent(10)

In [None]:
agent.cacher.plot_cacher('train_reward')

In [None]:
agent.cacher.plot_cacher('play_reward')

# Lunar Lander

In [None]:
BATCH_SIZE = 128
GAMMA = 0.999
EPS_START = 0.95
EPS_END = 0.05
EPS_DECAY = 8000
N_TARGET_UPDATE = 10
N_EPISODE = 10000
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
MEMORY_SPACE = 50000
OBSERVATION_SPACE = (8,)
ACTION_SPACE = 4

In [None]:
arch = LunarNet(1000).to(DEVICE)
config = Config(BATCH_SIZE, GAMMA, EPS_START, EPS_END, EPS_DECAY, 
                N_TARGET_UPDATE, N_EPISODE, DEVICE, MEMORY_SPACE, 
                OBSERVATION_SPACE,
                ACTION_SPACE)
envs = gym.make('LunarLander-v2').unwrapped
learner = Learner(arch, optim.RMSprop)
memory = Memory(MEMORY_SPACE, OBSERVATION_SPACE)
policy = EGreedy()
cacher = Cacher()
agent = Agent(learner, memory, policy, envs, config, cacher)

print(envs.action_space)
print(envs.observation_space)

In [None]:
agent.train_agent(N_EPISODE)

In [None]:
agent.play_agent(10)

In [None]:
agent.cacher.plot_cacher('train_reward')

In [None]:
agent.cacher.plot_cacher('play_reward')