Skip to content

Commit

Permalink
fix train
Browse files Browse the repository at this point in the history
  • Loading branch information
ShigekiKarita committed Jul 28, 2015
1 parent 3874fa9 commit 64dc4e7
Show file tree
Hide file tree
Showing 2 changed files with 65 additions and 30 deletions.
10 changes: 5 additions & 5 deletions src/gravesnet.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,21 +54,21 @@ def __init__(self, nhidden=100, ngauss=30):
l4=F.Linear(nhidden * 3, 1 + ngauss * 6)
)

def initial_state(self, minibatch_size, context, mod):
def initial_state(self, minibatch_size, context, train=True):
state = dict()
nhidden = self.l1_recur.W.shape[1]
shape = (minibatch_size, nhidden)
make_v = lambda m, s: chainer.Variable(context(m.zeros(s, dtype=numpy.float32)))
make_v = lambda : chainer.Variable(context(numpy.zeros(shape, dtype=numpy.float32)), volatile=not train)
for n in range(1, 4):
state.update(
{
'h%s' % n: make_v(mod, shape),
'c%s' % n: make_v(mod, shape)
'h%s' % n: make_v(),
'c%s' % n: make_v()
}
)
return state

def forward_one_step(self, x_data, t_x_data, t_e_data, state, train=True):
def forward_one_step(self, state, x_data, t_x_data, t_e_data, train=True):
x = chainer.Variable(x_data, volatile=not train)
t_x = chainer.Variable(t_x_data, volatile=not train)
t_e = chainer.Variable(t_e_data, volatile=not train)
Expand Down
85 changes: 60 additions & 25 deletions train_prednet.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,50 +9,82 @@
import six
import numpy

from src.dataset import parse_IAMxml
from src.gravesnet import GravesPredictionNet

range = six.moves.range

model = GravesPredictionNet()
mod = chainer.cuda
mb_size = 64
bp_len = 100
update_len = 1000
n_epoch = 1000
use_gpu = True
n_hidden = 400
model = GravesPredictionNet(n_hidden)


range = six.moves.range
mod = numpy
context = lambda x: x
if chainer.cuda.available:
if chainer.cuda.available and use_gpu:
print("use gpu")
chainer.cuda.init()
model.to_gpu()
mod = chainer.cuda
context = chainer.cuda.to_gpu

def mini_batch(mb_size, storage, index):

def load_dataset(path):
xs, es = pickle.load(open(path, 'rb'))
return numpy.float32(xs), numpy.int32(es)


def mini_batch(mb_size, xs, index):
xs_size = xs.shape[0]
jump = xs_size // mb_size
return numpy.array([storage[(jump * j + index) % xs_size] for j in range(mb_size)])
return numpy.array([xs[(jump * j + index) % xs_size] for j in range(mb_size)])


def create_inout(mb_size, context, xs, es, i):
x_batch = mini_batch(mb_size, xs, i)
e_batch = mini_batch(mb_size, es, i)
xe_batch = context(numpy.concatenate((x_batch, e_batch), axis=1).astype(numpy.float32))
t_x_batch = context(mini_batch(mb_size, xs, i + 1))
t_e_batch = context(mini_batch(mb_size, es, i + 1))
return xe_batch, t_x_batch, t_e_batch


def evaluate(xs, es, context):
total = mod.zeros(())
state = model.initial_state(1, context, False)
m = 1000
n = numpy.random.randint(0, 100000)
for i in range(n, n + m):
x = context(numpy.concatenate((xs[i], es[i]), axis=1).reshape(1, 3).astype(numpy.float32))
tx = context(xs[i+1]).reshape(1, 2)
te = context(es[i+1]).reshape(1, 1)
# inout = create_inout(1, context, xs, es, i)
state, loss = model.forward_one_step(state, x, tx, te, train=False)
total += loss.data.reshape(())
return chainer.cuda.to_cpu(total) / m


if __name__ == '__main__':
xs, es = pickle.load("res/trainset.npy") #parse_IAMxml("res/strokesz.xml")
t = 0
mb_size = 8
n_hidden = 100
bp_len = 100
update_len = 1000
n_epoch = 1000

state = model.initial_state((mb_size, n_hidden), context, numpy)
xs, es = load_dataset("res/trainset.npy")
txs, tes = load_dataset("res/testset_v.npy")
print("train", es.shape, "test", tes.shape)

print("load dataset")

state = model.initial_state(mb_size, context)
accum_loss = chainer.Variable(mod.zeros((), dtype=numpy.float32))
rmsprop = chainer.optimizers.RMSpropGraves()
rmsprop.setup(model.collect_parameters())
total_loss = mod.zeros(())
prev = time.time()

train_data = numpy.concatenate((xs, es),axis=1)
es = es.astype(numpy.int32)
jump = xs.shape[0] // mb_size

for i in six.moves.range(jump * n_epoch):
xe_batch = context(numpy.array(mini_batch(mb_size, train_data, i)))
t_x_batch = context(numpy.array(mini_batch(mb_size, xs, i + 1)))
t_e_batch = context(numpy.array(mini_batch(mb_size, es, i + 1)))
state, loss_i = model.forward_one_step(xe_batch, t_x_batch, t_e_batch, state)
inout = create_inout(mb_size, context, xs, es, i)
state, loss_i = model.forward_one_step(state, *inout)
accum_loss += loss_i
total_loss += loss_i.data.reshape(())

Expand All @@ -64,11 +96,14 @@ def mini_batch(mb_size, storage, index):
rmsprop.update()

if (i + 1) % update_len == 0:
ev_loss = evaluate(txs, tes, context)
now = time.time()
throuput = float(update_len) / (now - prev)
avg_loss = math.exp(chainer.cuda.to_cpu(total_loss) / update_len)
print('iter {} training loss: {:.2f} ({:.2f} iters/sec)'.format(
avg_loss = chainer.cuda.to_cpu(total_loss) / update_len
print('iter {} training loss: {:.6f} ({:.2f} iters/sec)'.format(
i + 1, avg_loss, throuput))
print('test loss: {}'.format(ev_loss))
prev = now
total_loss.fill(0)

# pickle.dump(model, open('model%04d' % (i+1), 'wb'), -1)

0 comments on commit 64dc4e7

Please sign in to comment.