Skip to content

Commit

Permalink
OOP refactoring
Browse files Browse the repository at this point in the history
  • Loading branch information
adityasidharta committed Dec 9, 2018
1 parent b161120 commit e4371db
Show file tree
Hide file tree
Showing 15 changed files with 174 additions and 54 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Expand Up @@ -100,5 +100,8 @@ venv.bak/
# mkdocs documentation
/site

# idea
.idea/

# mypy
.mypy_cache/
File renamed without changes.
7 changes: 0 additions & 7 deletions model/agent.py

This file was deleted.

13 changes: 0 additions & 13 deletions model/config.py

This file was deleted.

2 changes: 1 addition & 1 deletion notebooks/reinforcement_q_learning.ipynb
Expand Up @@ -682,7 +682,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.6"
"version": "3.6.5"
}
},
"nbformat": 4,
Expand Down
Empty file added src/__init__.py
Empty file.
12 changes: 12 additions & 0 deletions src/agent.py
@@ -0,0 +1,12 @@
class Agent(object):
def __init__(self, learner, memory, policy, value_function, envs, config):
self.learner = learner
self.memory = memory
self.policy = policy
self.value_function = value_function
self.envs = envs
self.config = config

def train_agent(self, n_iteration):
pass

21 changes: 21 additions & 0 deletions src/config.py
@@ -0,0 +1,21 @@
import torch

BATCH_SIZE = 128
GAMMA = 0.999
EPS_START = 0.9
EPS_END = 0.05
EPS_DECAY = 200
TARGET_UPDATE = 10
DEVICE = torch.device('cuda' if torch.cuda.is_available() else "cpu")


class Config():
def __init__(self, batch_size, gamma, eps_start, eps_end, eps_decay, target_update, device):
self.batch_size = batch_size
self.gamma = gamma
self.eps_start = eps_start
self.eps_end = eps_end
self.eps_decay = eps_decay
self.target_update = target_update
self.device = device

File renamed without changes.
29 changes: 17 additions & 12 deletions model/q_function.py → src/learner.py
@@ -1,39 +1,44 @@
import torch


# TODO finish Q-Function
class Q_Function(object):
class Optimize_Molel(object):
def __init__(self, old_qnet, new_qnet, optimizer):
self.new_qnet = new_qnet
self.old_qnet = old_qnet
self.old_qnet.load_state_dict(self.old_qnet.state_dict())
self.old_qnet = self.old_qnet.eval()
self.optimizer = optimizer
self.torch_device = self.config['DEVICE']
self.gamma =self.config['GAMMA']
self.batch_size = self.config['BATCH_SIZE']

def calc_q(self, state_tensor):
with torch.no_grad():
return self.old_qnet(state_tensor).cpu().numpy()

def optimize_new_qnet(self, batch_size, memory, config):
def optimize_new_qnet(self, memory, config):
torch_device = config.device
gamma = config.gamma
batch_size = config.batch_size


if len(memory) < batch_size:
pass
else:
state_tensor, action_tensor, reward_tensor, next_state_tensor, finish_tensor = memory.sample(
batch_size, torch=True, device=config["DEVICE"]
batch_size, return_tensor=True, torch_device=torch_device
)

finish_index = torch.nonzero(finish_tensor.view(-1)).view(-1)
cur_q = self.new_qnet(state_tensor)
cur_qa = cur_q.gather(action_tensor)

n_actions = cur_q.shape[1]
cur_qa = cur_q.gather(1, action_tensor)

with torch.no_grad():
unfinished_next_state_tensor = next_state_tensor[~finish_tensor]
next_q = torch.zeros_like(cur_q)
next_q[~finish_tensor] = old_net(unfinished_next_state_tensor)
unfinished_next_state_tensor = next_state_tensor[finish_index, :]
next_q = self.old_net(unfinished_next_state_tensor)
next_qa = next_q.max(1)[0]
exp_qa = reward_tensor + (gamma * next_qa)

exp_qa = reward_tensor + (config["GAMMA"] * next_qa)
# LOSS FUNCTION
# LOSS FUNCTION

raise NotImplementedError()
39 changes: 20 additions & 19 deletions model/memory.py → src/memory.py
@@ -1,14 +1,15 @@
import random
import numpy as np
import torch


# State, Action, Reward, Next State
class Memory(object):
def __init__(self, capacity, n_state):
self.capacity = capacity
self.memory = list()
self.position = 0
self.n_state = n_state
self.memory = []
self.position = 0

def __len__(self):
return len(self.memory)
Expand All @@ -21,33 +22,33 @@ def update_position(self):
if self.position == self.capacity:
self.position = 0

def save(self, state, action, reward, next_state, finish):
def save(self, state: list, action: int, reward: float, next_state: list, finish: int):
assert len(state) == self.n_state
if finish:
next_state = [np.nan] * self.n_state
next_state = [np.nan] * self.n_state if finish else next_state
if self.is_memory_full():
self.memory[self.position] = (state, action, reward, next_state, finish)
else:
self.memory.extend([state, action, reward, next_state, finish])
self.memory.append([state, action, reward, next_state, finish])
self.update_position()

def sample(self, sample_size, torch=False, device=None):
def sample(self, sample_size, return_tensor=False, torch_device=None):
if torch_device is None:
torch_device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
if sample_size > len(self.memory):
raise ValueError("Sample size is bigger than the memory size")
else:
sample_list = random.sample(self.memory, sample_size)
state = np.stack([x[0] for x in sample_list])
action = np.stack([x[1] for x in sample_list])
reward = np.stack([x[2] for x in sample_list])
next_state = np.stack([x[3] for x in sample_list])
finish = np.stack([x[4] for x in sample_list])
if torch:
assert device is not None
state_tensor = torch.from_numpy(state).to(device)
action_tensor = torch.from_numpy(action).to(device)
reward_tensor = torch.from_numpy(reward).to(device)
next_state_tensor = torch.from_numpy(next_state).to(device)
finish_tensor = torch.from_numpy(finish).to(device)
state = np.stack([x[0] for x in sample_list]).astype(float)
action = np.stack([x[1] for x in sample_list]).astype(int)
reward = np.stack([x[2] for x in sample_list]).astype(float)
next_state = np.stack([x[3] for x in sample_list]).astype(float)
finish = np.stack([x[4] for x in sample_list]).astype(int)
if return_tensor:
state_tensor = torch.from_numpy(state).to(torch_device, dtype=torch.float)
action_tensor = torch.from_numpy(action).to(torch_device, dtype=torch.int)
reward_tensor = torch.from_numpy(reward).to(torch_device, dtype=torch.float)
next_state_tensor = torch.from_numpy(next_state).to(torch_device, dtype=torch.float)
finish_tensor = torch.from_numpy(finish).to(torch_device, dtype=torch.int)
return (
state_tensor,
action_tensor,
Expand Down
3 changes: 3 additions & 0 deletions model/policy.py → src/policy.py
Expand Up @@ -19,6 +19,9 @@ def calc_eps_threshold(self):
def update_n_actions(self):
self.n_actions = self.n_actions + 1

def reset_n_actions(self):
self.n_actions = 0

def select_action(self, value_function):
eps_threshold = self.calc_eps_threshold()
self.update_n_actions()
Expand Down
4 changes: 2 additions & 2 deletions model/arch.py → src/value_function.py
@@ -1,9 +1,9 @@
from torch import nn as nn


class CartNet(nn.Module):
class ValueFunction(nn.Module):
def __init__(self, hidden_dim):
super(CartNet, self).__init__()
super(ValueFunction, self).__init__()
self.hidden_dim = hidden_dim
self.model_list = [
nn.Linear(4, hidden_dim),
Expand Down
Empty file added test/__init__.py
Empty file.
95 changes: 95 additions & 0 deletions test/test_memory.py
@@ -0,0 +1,95 @@
from src.memory import Memory
import random
import torch


def create_transitions(state=None,
action=None,
reward=None,
next_state=None,
finish=None,
n_state=None):
n_state = 4 if n_state is None else n_state
state = [random.random() for x in range(n_state)] if state is None else state
action = random.sample([0, 1], 1)[0] if action is None else action
reward = random.random() if reward is None else reward
next_state = [random.random() for x in range(n_state)] if next_state is None else next_state
finish = random.sample([False, True], 1)[0] if finish is None else finish
return state, action, reward, next_state, finish


def test_init():
memory = Memory(10, 4)
assert len(memory) == 0
assert not memory.is_memory_full()
assert memory.position == 0
assert memory.n_state == 4
assert memory.capacity == 10


def test_len():
memory = Memory(10,4)
for idx in range(5):
state, action, reward, next_state, finish = create_transitions()
memory.save(state, action, reward, next_state, finish)
assert len(memory) == 5


def test_is_memory_full():
memory = Memory(10, 4)
assert not memory.is_memory_full()
for idx in range(5):
state, action, reward, next_state, finish = create_transitions()
memory.save(state, action, reward, next_state, finish)
assert not memory.is_memory_full()
for idx in range(5):
state, action, reward, next_state, finish = create_transitions()
memory.save(state, action, reward, next_state, finish)
assert memory.is_memory_full()
for idx in range(5):
state, action, reward, next_state, finish = create_transitions()
memory.save(state, action, reward, next_state, finish)
assert memory.is_memory_full()


def test_update_postiion():
memory = Memory(10, 4)
assert memory.position == 0
for idx in range(5):
state, action, reward, next_state, finish = create_transitions()
memory.save(state, action, reward, next_state, finish)
assert memory.position == 5
for idx in range(5):
state, action, reward, next_state, finish = create_transitions()
memory.save(state, action, reward, next_state, finish)
assert memory.position == 0
for idx in range(5):
state, action, reward, next_state, finish = create_transitions()
memory.save(state, action, reward, next_state, finish)
assert memory.position == 5


def test_sample():
memory = Memory(10, 4)
for idx in range(5):
state, action, reward, next_state, finish = create_transitions(
state = [idx, idx, idx, idx],
reward = idx,
next_state = [idx, idx, idx, idx]
)
memory.save(state, action, reward, next_state, finish)
sampled_state, sampled_action, sampled_reward, sampled_next_state, sampled_finish = memory.sample(3)
assert sampled_state.shape == (3, 4)
assert sampled_next_state.shape == (3, 4)
assert sampled_state[:, 0] == sampled_reward

tensor_state, tensor_action, tensor_reward, tensor_next_state, tensor_finish = memory.sample(3, return_tensor=True)
assert tensor_state.shape == torch.Size([3, 4])
assert tensor_next_state.shape == torch.Size([3, 4])

assert tensor_state.type() == 'torch.cuda.FloatTensor'
assert tensor_action.type() == 'torch.cuda.IntTensor'
assert tensor_reward.type() == 'torch.cuda.FloatTensor'
assert tensor_next_state.type() == 'torch.cuda.FloatTensor'
assert tensor_finish.type() == 'torch.cuda.IntTensor'

0 comments on commit e4371db

Please sign in to comment.