update prednet

ShigekiKarita · Aug 5, 2015 · 758ef91 · 758ef91
1 parent 658909e
commit 758ef91
Show file tree

Hide file tree

Showing 4 changed files with 65 additions and 37 deletions.
diff --git a/main.py b/main.py
@@ -1,11 +1,12 @@
 from src.gravesnet import GravesPredictionNet
 from src.train import optimize, OptimizationSizes
 
+
 if __name__ == '__main__':
     sizes = OptimizationSizes(
         epoch_size=1000,
-        train_size=4,
-        eval_size=16,
+        train_size=1,
+        eval_size=4,
         mini_batch_size=1
     )
     model = GravesPredictionNet(nhidden=400)

diff --git a/src/gravesnet.py b/src/gravesnet.py
@@ -10,7 +10,8 @@
 
 
 def gauss_bernoulli_params(m, y):
-    y_mixws, y_means, y_stdds, y_corrs, y_e = split_axis_by_widths(y, [m, 2 * m, 2 * m, m, 1])
+    width = [m, 2 * m, 2 * m, m, 1]
+    y_mixws, y_means, y_stdds, y_corrs, y_e = split_axis_by_widths(y, width)
     y_mixws = F.softmax(y_mixws)
     y_means0, y_means1 = split_axis_by_widths(y_means, 2)
     y_stdds0, y_stdds1 = split_axis_by_widths(F.exp(y_stdds), 2)
@@ -47,43 +48,49 @@ class GravesPredictionNet(chainer.FunctionSet):
     """
 
     def __init__(self, nhidden=100, ngauss=30):
-        ninput=3
+        ninput = 3
         super(GravesPredictionNet, self).__init__(
-            l1_first=F.Linear(ninput,  4 * nhidden, nobias=True),
-            l1_recur=F.Linear(nhidden, 4 * nhidden),
+            l1_a=F.Linear(ninput + nhidden, nhidden),
+            l1_x=F.Linear(ninput + 2 * nhidden, 3 * nhidden),
 
-            l2_first=F.Linear(ninput,  4 * nhidden, nobias=True),
-            l2_recur=F.Linear(nhidden, 4 * nhidden),
-            l2_input=F.Linear(nhidden, 4 * nhidden, nobias=True),
+            l2_a=F.Linear(ninput + 2 * nhidden, nhidden),
+            l2_x=F.Linear(ninput + 3 * nhidden, 3 * nhidden),
 
-            l3_first=F.Linear(ninput,  4 * nhidden, nobias=True),
-            l3_recur=F.Linear(nhidden, 4 * nhidden),
-            l3_input=F.Linear(nhidden, 4 * nhidden, nobias=True),
+            l3_a=F.Linear(ninput + 2 * nhidden, nhidden),
+            l3_x=F.Linear(ninput + 3 * nhidden, 3 * nhidden),
 
             l4=F.Linear(nhidden * 3, 1 + ngauss * 6)
         )
 
     def initial_state(self, minibatch_size, context, label, train=True):
         state = dict()
-        nhidden = self.l1_recur.W.shape[1]
+        nhidden = self.l1_a.W.shape[0]
         shape = (minibatch_size, nhidden)
         for n in range(1, 4):
             state.update({
-                '%s%s' % (label, n): chainer.Variable(context(numpy.zeros(shape, dtype=numpy.float32)), volatile=not train)
+                '%s%s' % (label, n):
+                chainer.Variable(
+                    context(numpy.zeros(shape, dtype=numpy.float32)),
+                    volatile=not train)
             })
         return state
 
     def bottle_neck(self, hidden_state, lstm_cells, x_data, train):
         x = chainer.Variable(x_data, volatile=not train)
-
-        h1_in = self.l1_first(x) + self.l1_recur(hidden_state['h1'])
-        c1, h1 = F.lstm(lstm_cells['c1'], h1_in)
+
+        a1 = self.l1_a(F.concat((x, hidden_state["h1"])))
+        x1 = self.l1_x(F.concat((x, hidden_state["h1"], lstm_cells["c1"])))
+        c1, h1 = peephole_lstm(lstm_cells['c1'], a1, x1)
         h1 = gradient_clip(h1, 10.0)
-        h2_in = self.l2_first(x) + self.l2_recur(hidden_state['h2']) + self.l2_input(h1)
-        c2, h2 = F.lstm(lstm_cells['c2'], h2_in)
+
+        a2 = self.l2_a(F.concat((x, hidden_state["h2"], h1)))
+        x2 = self.l2_x(F.concat((x, hidden_state["h2"], h1, lstm_cells["c2"])))
+        c2, h2 = peephole_lstm(lstm_cells['c2'], a2, x2)
         h2 = gradient_clip(h2, 10.0)
-        h3_in = self.l3_first(x) + self.l3_recur(hidden_state['h3']) + self.l3_input(h2)
-        c3, h3 = F.lstm(lstm_cells['c3'], h3_in)
+
+        a3 = self.l2_a(F.concat((x, hidden_state["h3"], h2)))
+        x3 = self.l2_x(F.concat((x, hidden_state["h3"], h2, lstm_cells["c3"])))
+        c3, h3 = peephole_lstm(lstm_cells['c3'], a3, x3)
         h3 = gradient_clip(h3, 10.0)
 
         y = self.l4(F.concat((h1, h2, h3)))
@@ -197,4 +204,4 @@ def forward(model, x_list):
 from chainer import optimizers
 
 def train(model, x_list):
-    opt = optimizers.RMSpropGraves()
+    opt = optimizers.RMSpropGraves()
diff --git a/src/train.py b/src/train.py
@@ -10,7 +10,9 @@
 
 
 class OptimizationSizes(object):
-    def __init__(self, epoch_size=1000, train_size=1, eval_size=8, mini_batch_size=1):
+    def __init__(self,
+                 epoch_size=1000, train_size=1,
+                 eval_size=8, mini_batch_size=1):
         self.epoch = epoch_size
         self.train = train_size
         self.eval = eval_size
@@ -31,7 +33,9 @@ def load_dataset(path):
 def mini_batch(mb_size, xs, index):
     xs_size = xs.shape[0]
     jump = xs_size // mb_size
-    return numpy.array([xs[(jump * j + index) % xs_size] for j in range(mb_size)])
+    return numpy.array(
+        [xs[(jump * j + index) % xs_size] for j in range(mb_size)]
+    )
 
 
 def reshape2d(x):
@@ -66,10 +70,11 @@ def evaluate(context, model, lstm_cells: chainer.Variable,
         e = es[i]
         total_seq_len += len(e) - 1
         hidden_state = model.initial_state(1, context, "h", train=False)
-
         for t in range(len(es[i]) - 1):
             ci, cx, ce = create_inout(context, x, e, t, mean, stddev)
-            hidden_state, lstm_cells, loss = model.forward_one_step(hidden_state, lstm_cells, ci, cx, ce, train=False)
+            hidden_state, lstm_cells, loss = model.forward_one_step(
+                hidden_state, lstm_cells, ci, cx, ce, train=False
+            )
             total += loss.data.reshape(())
 
     set_volatile(lstm_cells, False)
@@ -113,10 +118,14 @@ def optimize(model, sizes: OptimizationSizes, data_dir: str):
             e = es[i]
             seq_len = len(e)
             hidden_state = model.initial_state(sizes.mini_batch, context, "h")
-            accum_loss = chainer.Variable(context(numpy.zeros((), dtype=numpy.float32)))
+            accum_loss = chainer.Variable(
+                context(numpy.zeros((), dtype=numpy.float32))
+            )
             for t in range(seq_len - 1):
                 inout = create_inout(context, x, e, t, mean, stddev)
-                hidden_state, lstm_cells, loss_t = model.forward_one_step(hidden_state, lstm_cells, *inout)
+                hidden_state, lstm_cells, loss_t = model.forward_one_step(
+                    hidden_state, lstm_cells, *inout
+                )
                 accum_loss += loss_t
                 total_loss += loss_t.data.reshape(())
                 n_point += 1
@@ -129,11 +138,12 @@ def optimize(model, sizes: OptimizationSizes, data_dir: str):
 
                 now = time.time()
                 t_loss = chainer.cuda.to_cpu(total_loss)
-                print('epoch {}, iter {}, loss/point: {:.6f}, loss/seq: {:.6f}, point/sec: {:.2f} '.format(
-                    epoch, n,
-                    t_loss / n_point,
-                    t_loss / sizes.train,
-                    float(n_point) / (now - prev)))
+                print(
+                    'epoch {}, iter {}, loss/point: {:.6f}, loss/seq: {:.6f}, point/sec: {:.2f} '.format(
+                        epoch, n,
+                        t_loss / n_point,
+                        t_loss / sizes.train,
+                        float(n_point) / (now - prev)))
                 sys.stdout.flush()
                 loss_point_train += t_loss / n_point
                 loss_seq_train += t_loss
@@ -145,7 +155,9 @@ def optimize(model, sizes: OptimizationSizes, data_dir: str):
                 pickle.dump(model, open('model_%08d' % n_eval, 'wb'), -1)
                 for k, v in lstm_cells.items():
                     d = chainer.cuda.to_cpu(v.data)
-                    pickle.dump(d, open('lstm_{}_{:08d}'.format(k, n_eval), 'wb'), -1)
+                    pickle.dump(
+                        d, open('lstm_{}_{:08d}'.format(k, n_eval), 'wb'), -1
+                    )
 
                 n_eval += 1
                 print("eval-%08d" % n_eval)
@@ -154,8 +166,14 @@ def optimize(model, sizes: OptimizationSizes, data_dir: str):
                     loss_seq_train / sizes.eval))
                 sys.stdout.flush()
                 lstm_copy = lstm_cells.copy()
-                loss_point, loss_seq = evaluate(context, model, lstm_copy, sizes, txs, tes, mean, stddev)
-                print('\ttest:  [loss/point: {:.6f}, loss/seq: {:.6f}]'.format(loss_point, loss_seq))
+                loss_point, loss_seq = evaluate(
+                    context, model, lstm_copy, sizes, txs, tes, mean, stddev
+                )
+                print(
+                    '\ttest:  [loss/point: {:.6f}, loss/seq: {:.6f}]'.format(
+                        loss_point, loss_seq
+                    )
+                )
                 sys.stdout.flush()
                 loss_point_train = 0.0
                 loss_seq_train = 0.0
@@ -169,4 +187,4 @@ def parse_args():
     parser.add_argument('--gpu', '-g', default=0, type=int,
                         help='GPU ID (negative value indicates CPU)')
     args = parser.parse_args()
-    return args
+    return args
diff --git a/test/test_gaussian_mixture_2d.py b/test/test_gaussian_mixture_2d.py
@@ -2,6 +2,7 @@
 
 import numpy
 from numpy.random import uniform, binomial
+
 import chainer
 from chainer import cuda
 from chainer import gradient_check
@@ -12,6 +13,7 @@
 from src.functions.gaussian_mixture_2d_ref import gaussian_mixture_2d_ref
 from src import gravesnet
 
+
 if cuda.available:
     cuda.init()