Skip to content

Commit

Permalink
update prednet
Browse files Browse the repository at this point in the history
  • Loading branch information
ShigekiKarita committed Aug 5, 2015
1 parent 658909e commit 758ef91
Show file tree
Hide file tree
Showing 4 changed files with 65 additions and 37 deletions.
5 changes: 3 additions & 2 deletions main.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
from src.gravesnet import GravesPredictionNet
from src.train import optimize, OptimizationSizes


if __name__ == '__main__':
sizes = OptimizationSizes(
epoch_size=1000,
train_size=4,
eval_size=16,
train_size=1,
eval_size=4,
mini_batch_size=1
)
model = GravesPredictionNet(nhidden=400)
Expand Down
47 changes: 27 additions & 20 deletions src/gravesnet.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@


def gauss_bernoulli_params(m, y):
y_mixws, y_means, y_stdds, y_corrs, y_e = split_axis_by_widths(y, [m, 2 * m, 2 * m, m, 1])
width = [m, 2 * m, 2 * m, m, 1]
y_mixws, y_means, y_stdds, y_corrs, y_e = split_axis_by_widths(y, width)
y_mixws = F.softmax(y_mixws)
y_means0, y_means1 = split_axis_by_widths(y_means, 2)
y_stdds0, y_stdds1 = split_axis_by_widths(F.exp(y_stdds), 2)
Expand Down Expand Up @@ -47,43 +48,49 @@ class GravesPredictionNet(chainer.FunctionSet):
"""

def __init__(self, nhidden=100, ngauss=30):
ninput=3
ninput = 3
super(GravesPredictionNet, self).__init__(
l1_first=F.Linear(ninput, 4 * nhidden, nobias=True),
l1_recur=F.Linear(nhidden, 4 * nhidden),
l1_a=F.Linear(ninput + nhidden, nhidden),
l1_x=F.Linear(ninput + 2 * nhidden, 3 * nhidden),

l2_first=F.Linear(ninput, 4 * nhidden, nobias=True),
l2_recur=F.Linear(nhidden, 4 * nhidden),
l2_input=F.Linear(nhidden, 4 * nhidden, nobias=True),
l2_a=F.Linear(ninput + 2 * nhidden, nhidden),
l2_x=F.Linear(ninput + 3 * nhidden, 3 * nhidden),

l3_first=F.Linear(ninput, 4 * nhidden, nobias=True),
l3_recur=F.Linear(nhidden, 4 * nhidden),
l3_input=F.Linear(nhidden, 4 * nhidden, nobias=True),
l3_a=F.Linear(ninput + 2 * nhidden, nhidden),
l3_x=F.Linear(ninput + 3 * nhidden, 3 * nhidden),

l4=F.Linear(nhidden * 3, 1 + ngauss * 6)
)

def initial_state(self, minibatch_size, context, label, train=True):
state = dict()
nhidden = self.l1_recur.W.shape[1]
nhidden = self.l1_a.W.shape[0]
shape = (minibatch_size, nhidden)
for n in range(1, 4):
state.update({
'%s%s' % (label, n): chainer.Variable(context(numpy.zeros(shape, dtype=numpy.float32)), volatile=not train)
'%s%s' % (label, n):
chainer.Variable(
context(numpy.zeros(shape, dtype=numpy.float32)),
volatile=not train)
})
return state

def bottle_neck(self, hidden_state, lstm_cells, x_data, train):
x = chainer.Variable(x_data, volatile=not train)

h1_in = self.l1_first(x) + self.l1_recur(hidden_state['h1'])
c1, h1 = F.lstm(lstm_cells['c1'], h1_in)

a1 = self.l1_a(F.concat((x, hidden_state["h1"])))
x1 = self.l1_x(F.concat((x, hidden_state["h1"], lstm_cells["c1"])))
c1, h1 = peephole_lstm(lstm_cells['c1'], a1, x1)
h1 = gradient_clip(h1, 10.0)
h2_in = self.l2_first(x) + self.l2_recur(hidden_state['h2']) + self.l2_input(h1)
c2, h2 = F.lstm(lstm_cells['c2'], h2_in)

a2 = self.l2_a(F.concat((x, hidden_state["h2"], h1)))
x2 = self.l2_x(F.concat((x, hidden_state["h2"], h1, lstm_cells["c2"])))
c2, h2 = peephole_lstm(lstm_cells['c2'], a2, x2)
h2 = gradient_clip(h2, 10.0)
h3_in = self.l3_first(x) + self.l3_recur(hidden_state['h3']) + self.l3_input(h2)
c3, h3 = F.lstm(lstm_cells['c3'], h3_in)

a3 = self.l2_a(F.concat((x, hidden_state["h3"], h2)))
x3 = self.l2_x(F.concat((x, hidden_state["h3"], h2, lstm_cells["c3"])))
c3, h3 = peephole_lstm(lstm_cells['c3'], a3, x3)
h3 = gradient_clip(h3, 10.0)

y = self.l4(F.concat((h1, h2, h3)))
Expand Down Expand Up @@ -197,4 +204,4 @@ def forward(model, x_list):
from chainer import optimizers

def train(model, x_list):
opt = optimizers.RMSpropGraves()
opt = optimizers.RMSpropGraves()
48 changes: 33 additions & 15 deletions src/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,9 @@


class OptimizationSizes(object):
def __init__(self, epoch_size=1000, train_size=1, eval_size=8, mini_batch_size=1):
def __init__(self,
epoch_size=1000, train_size=1,
eval_size=8, mini_batch_size=1):
self.epoch = epoch_size
self.train = train_size
self.eval = eval_size
Expand All @@ -31,7 +33,9 @@ def load_dataset(path):
def mini_batch(mb_size, xs, index):
xs_size = xs.shape[0]
jump = xs_size // mb_size
return numpy.array([xs[(jump * j + index) % xs_size] for j in range(mb_size)])
return numpy.array(
[xs[(jump * j + index) % xs_size] for j in range(mb_size)]
)


def reshape2d(x):
Expand Down Expand Up @@ -66,10 +70,11 @@ def evaluate(context, model, lstm_cells: chainer.Variable,
e = es[i]
total_seq_len += len(e) - 1
hidden_state = model.initial_state(1, context, "h", train=False)

for t in range(len(es[i]) - 1):
ci, cx, ce = create_inout(context, x, e, t, mean, stddev)
hidden_state, lstm_cells, loss = model.forward_one_step(hidden_state, lstm_cells, ci, cx, ce, train=False)
hidden_state, lstm_cells, loss = model.forward_one_step(
hidden_state, lstm_cells, ci, cx, ce, train=False
)
total += loss.data.reshape(())

set_volatile(lstm_cells, False)
Expand Down Expand Up @@ -113,10 +118,14 @@ def optimize(model, sizes: OptimizationSizes, data_dir: str):
e = es[i]
seq_len = len(e)
hidden_state = model.initial_state(sizes.mini_batch, context, "h")
accum_loss = chainer.Variable(context(numpy.zeros((), dtype=numpy.float32)))
accum_loss = chainer.Variable(
context(numpy.zeros((), dtype=numpy.float32))
)
for t in range(seq_len - 1):
inout = create_inout(context, x, e, t, mean, stddev)
hidden_state, lstm_cells, loss_t = model.forward_one_step(hidden_state, lstm_cells, *inout)
hidden_state, lstm_cells, loss_t = model.forward_one_step(
hidden_state, lstm_cells, *inout
)
accum_loss += loss_t
total_loss += loss_t.data.reshape(())
n_point += 1
Expand All @@ -129,11 +138,12 @@ def optimize(model, sizes: OptimizationSizes, data_dir: str):

now = time.time()
t_loss = chainer.cuda.to_cpu(total_loss)
print('epoch {}, iter {}, loss/point: {:.6f}, loss/seq: {:.6f}, point/sec: {:.2f} '.format(
epoch, n,
t_loss / n_point,
t_loss / sizes.train,
float(n_point) / (now - prev)))
print(
'epoch {}, iter {}, loss/point: {:.6f}, loss/seq: {:.6f}, point/sec: {:.2f} '.format(
epoch, n,
t_loss / n_point,
t_loss / sizes.train,
float(n_point) / (now - prev)))
sys.stdout.flush()
loss_point_train += t_loss / n_point
loss_seq_train += t_loss
Expand All @@ -145,7 +155,9 @@ def optimize(model, sizes: OptimizationSizes, data_dir: str):
pickle.dump(model, open('model_%08d' % n_eval, 'wb'), -1)
for k, v in lstm_cells.items():
d = chainer.cuda.to_cpu(v.data)
pickle.dump(d, open('lstm_{}_{:08d}'.format(k, n_eval), 'wb'), -1)
pickle.dump(
d, open('lstm_{}_{:08d}'.format(k, n_eval), 'wb'), -1
)

n_eval += 1
print("eval-%08d" % n_eval)
Expand All @@ -154,8 +166,14 @@ def optimize(model, sizes: OptimizationSizes, data_dir: str):
loss_seq_train / sizes.eval))
sys.stdout.flush()
lstm_copy = lstm_cells.copy()
loss_point, loss_seq = evaluate(context, model, lstm_copy, sizes, txs, tes, mean, stddev)
print('\ttest: [loss/point: {:.6f}, loss/seq: {:.6f}]'.format(loss_point, loss_seq))
loss_point, loss_seq = evaluate(
context, model, lstm_copy, sizes, txs, tes, mean, stddev
)
print(
'\ttest: [loss/point: {:.6f}, loss/seq: {:.6f}]'.format(
loss_point, loss_seq
)
)
sys.stdout.flush()
loss_point_train = 0.0
loss_seq_train = 0.0
Expand All @@ -169,4 +187,4 @@ def parse_args():
parser.add_argument('--gpu', '-g', default=0, type=int,
help='GPU ID (negative value indicates CPU)')
args = parser.parse_args()
return args
return args
2 changes: 2 additions & 0 deletions test/test_gaussian_mixture_2d.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import numpy
from numpy.random import uniform, binomial

import chainer
from chainer import cuda
from chainer import gradient_check
Expand All @@ -12,6 +13,7 @@
from src.functions.gaussian_mixture_2d_ref import gaussian_mixture_2d_ref
from src import gravesnet


if cuda.available:
cuda.init()

Expand Down

0 comments on commit 758ef91

Please sign in to comment.