Skip to content

Commit

Permalink
export model in training part
Browse files Browse the repository at this point in the history
  • Loading branch information
ShigekiKarita committed Jul 31, 2015
1 parent b68c7c6 commit 0792d8d
Show file tree
Hide file tree
Showing 2 changed files with 64 additions and 25 deletions.
83 changes: 61 additions & 22 deletions main.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import time
import argparse
import pickle
import sys

import chainer
import chainer.cuda
Expand All @@ -10,6 +11,7 @@

from src.gravesnet import GravesPredictionNet


parser = argparse.ArgumentParser()
parser.add_argument('--model', default='res/model', type=str,
help='Trained model')
Expand All @@ -23,13 +25,12 @@
eval_len = 8
n_epoch = 1000
use_gpu = args.gpu != -1
n_hidden = 400
n_hidden = 100
model = GravesPredictionNet(n_hidden)

range = six.moves.range
mod = numpy
context = lambda x: x

if chainer.cuda.available and use_gpu:
print("use gpu")
chainer.cuda.init(args.gpu)
Expand Down Expand Up @@ -68,22 +69,33 @@ def create_inout(context, x, e, i, mean, stddev):
return tuple(reshape2d(i) for i in (xe, t_x, t_e))


def set_volatile(state, volatile):
for v in state.values():
v.volatile = volatile
return state


def evaluate(context, state, xs, es, mean, stddev):
# TODO: init hidden-state (but use LSTM-state)
state = set_volatile(state, True)
total = mod.zeros(())
for v in state.values():
v.volatile = True
indices = numpy.arange(len(es))
numpy.random.shuffle(indices)
total_seqlen = 0

for i in indices[:eval_len]:
x = xs[i]
e = es[i]
total_seqlen += len(e) - 1

for t in range(len(es[i]) - 1):
ci, cx, ce = create_inout(context, x, e, t, mean, stddev)
state_ev, loss = model.forward_one_step(state, ci, cx, ce, train=False)
total += loss.data.reshape(())
for v in state.values():
v.volatile = False
return chainer.cuda.to_cpu(total) / eval_len

state = set_volatile(state, False)
t_loss = chainer.cuda.to_cpu(total)
return t_loss / seq_len, t_loss / eval_len


if __name__ == '__main__':
Expand All @@ -93,6 +105,7 @@ def evaluate(context, state, xs, es, mean, stddev):
mean, stddev = pickle.load(open(d + "trainset_mean_std.npy", "rb"))
print("load dataset")
print("train", len(es), "test", len(tes))
sys.stdout.flush()

state = model.initial_state(mb_size, context)
accum_loss = chainer.Variable(mod.zeros((), dtype=numpy.float32))
Expand All @@ -103,19 +116,26 @@ def evaluate(context, state, xs, es, mean, stddev):
indices = numpy.arange(len(es))
prev = time.time()
n_point = 0
loss_point_train = 0.0
loss_seq_train = 0.0
n_eval = 0
for epoch in range(n_epoch):
numpy.random.shuffle(indices)
for i, n in zip(indices, range(len(es))):
x = xs[i]
e = es[i]
seq_len = len(e)

# TODO: init hidden-state
for t in range(seq_len - 1):
inout = create_inout(context, x, e, t, mean, stddev)
state, loss_t = model.forward_one_step(state, *inout)
accum_loss += loss_t
total_loss += loss_t.data.reshape(())
n_point += 1
try:
inout = create_inout(context, x, e, t, mean, stddev)
state, loss_t = model.forward_one_step(state, *inout)
accum_loss += loss_t
total_loss += loss_t.data.reshape(())
n_point += 1
except chainer.cuda.drv.MemoryError:
print("stop BPTT by out-of-memory")
break

rmsprop.zero_grads()
accum_loss.backward()
Expand All @@ -125,16 +145,35 @@ def evaluate(context, state, xs, es, mean, stddev):

if (n + 1) % update_len == 0:
now = time.time()
throuput = float(n_point) / (now - prev)
average_loss = chainer.cuda.to_cpu(total_loss) / update_len
print('iter {} training loss: {:.6f} ({:.2f} point/sec)'.format(
n + 1, average_loss, throuput))
prev = now
t_loss = chainer.cuda.to_cpu(total_loss)
print('epoch {}, iter {}, loss/point: {:.6f}, loss/seq: {:.6f}, point/sec: {:.2f} '.format(
epoch, n,
t_loss / n_point,
t_loss / update_len,
float(n_point) / (now - prev)))
sys.stdout.flush()
loss_point_train += t_loss / n_point
loss_seq_train += t_loss
total_loss.fill(0)
n_point = 0
prev = now

if (n + 1) % eval_len == 0:
print("evaluating ...")
ev_loss = evaluate(context, state, txs, tes, mean, stddev)
print('test loss: {}'.format(ev_loss))
# pickle.dump(model, open('model%04d' % (i+1), 'wb'), -1)
pickle.dump(model, open('model_%08d' % n_eval, 'wb'), -1)
for k, v in state.items():
d = chainer.cuda.to_cpu(v.data)
pickle.dump(d, open('state_{}_{:08d}'.format(k, n_eval), 'wb'), -1)

n_eval += 1
print("eval-%08d" % n_eval)
print('\ttrain: [loss/point: {:.6f}, loss/seq: {:.6f}]'.format(
loss_point_train / eval_len,
loss_seq_train / eval_len))
sys.stdout.flush()
loss_point, loss_seq = evaluate(context, state, txs, tes, mean, stddev)
print('\ttest: [loss/point: {:.6f}, loss/seq: {:.6f}]'.format(loss_point, loss_seq))
sys.stdout.flush()
sys.stdout.flush()
loss_point_train = 0.0
loss_seq_train = 0.0
prev = time.time()
6 changes: 3 additions & 3 deletions src/gravesnet.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,15 +54,15 @@ def __init__(self, nhidden=100, ngauss=30):
)

def initial_state(self, minibatch_size, context, train=True):
# FIXME: separate initialization of hidden (tmp params) and LSTM (consistent params)
state = dict()
nhidden = self.l1_recur.W.shape[1]
shape = (minibatch_size, nhidden)
make_v = lambda : chainer.Variable(context(numpy.zeros(shape, dtype=numpy.float32)), volatile=not train)
for n in range(1, 4):
state.update(
{
'h%s' % n: make_v(),
'c%s' % n: make_v()
'h%s' % n: chainer.Variable(context(numpy.zeros(shape, dtype=numpy.float32)), volatile=not train),
'c%s' % n: chainer.Variable(context(numpy.zeros(shape, dtype=numpy.float32)), volatile=not train)
}
)
return state
Expand Down

0 comments on commit 0792d8d

Please sign in to comment.