Yet another batch normalization PR #3229

Merged
merged 2 commits into from Oct 23, 2015
Jump to file or symbol
Failed to load files and symbols.
+1,139 −2
Split
@@ -0,0 +1,28 @@
+# reduce learning rate after 120 epochs (60000 iters) by factor 0f 10
+# then another factor of 10 after 10 more epochs (5000 iters)
+
+# The train/test net protocol buffer definition
+net: "examples/cifar10/cifar10_full_sigmoid_train_test.prototxt"
+# test_iter specifies how many forward passes the test should carry out.
+# In the case of CIFAR10, we have test batch size 100 and 100 test iterations,
+# covering the full 10,000 testing images.
+test_iter: 10
+# Carry out testing every 1000 training iterations.
+test_interval: 1000
+# The base learning rate, momentum and the weight decay of the network.
+base_lr: 0.001
+momentum: 0.9
+#weight_decay: 0.004
+# The learning rate policy
+lr_policy: "step"
+gamma: 1
+stepsize: 5000
+# Display every 200 iterations
+display: 100
+# The maximum number of iterations
+max_iter: 60000
+# snapshot intermediate results
+snapshot: 10000
+snapshot_prefix: "examples/cifar10_full_sigmoid"
+# solver mode: CPU or GPU
+solver_mode: GPU
@@ -0,0 +1,28 @@
+# reduce learning rate after 120 epochs (60000 iters) by factor 0f 10
+# then another factor of 10 after 10 more epochs (5000 iters)
+
+# The train/test net protocol buffer definition
+net: "examples/cifar10/cifar10_full_sigmoid_train_test_bn.prototxt"
+# test_iter specifies how many forward passes the test should carry out.
+# In the case of CIFAR10, we have test batch size 100 and 100 test iterations,
+# covering the full 10,000 testing images.
+test_iter: 10
+# Carry out testing every 1000 training iterations.
+test_interval: 1000
+# The base learning rate, momentum and the weight decay of the network.
+base_lr: 0.001
+momentum: 0.9
+#weight_decay: 0.004
+# The learning rate policy
+lr_policy: "step"
+gamma: 1
+stepsize: 5000
+# Display every 200 iterations
+display: 100
+# The maximum number of iterations
+max_iter: 60000
+# snapshot intermediate results
+snapshot: 10000
+snapshot_prefix: "examples/cifar10_full_sigmoid_bn"
+# solver mode: CPU or GPU
+solver_mode: GPU
@@ -0,0 +1,212 @@
+name: "CIFAR10_full"
+layer {
+ name: "cifar"
+ type: "Data"
+ top: "data"
+ top: "label"
+ include {
+ phase: TRAIN
+ }
+ transform_param {
+ mean_file: "examples/cifar10/mean.binaryproto"
+ }
+ data_param {
+ source: "examples/cifar10/cifar10_train_lmdb"
+ batch_size: 111
+ backend: LMDB
+ }
+}
+layer {
+ name: "cifar"
+ type: "Data"
+ top: "data"
+ top: "label"
+ include {
+ phase: TEST
+ }
+ transform_param {
+ mean_file: "examples/cifar10/mean.binaryproto"
+ }
+ data_param {
+ source: "examples/cifar10/cifar10_test_lmdb"
+ batch_size: 1000
+ backend: LMDB
+ }
+}
+layer {
+ name: "conv1"
+ type: "Convolution"
+ bottom: "data"
+ top: "conv1"
+ param {
+ lr_mult: 1
+ }
+ param {
+ lr_mult: 2
+ }
+ convolution_param {
+ num_output: 32
+ pad: 2
+ kernel_size: 5
+ stride: 1
+ weight_filler {
+ type: "gaussian"
+ std: 0.0001
+ }
+ bias_filler {
+ type: "constant"
+ }
+ }
+}
+layer {
+ name: "pool1"
+ type: "Pooling"
+ bottom: "conv1"
+ top: "pool1"
+ pooling_param {
+ pool: MAX
+ kernel_size: 3
+ stride: 2
+ }
+}
+
+
+
+layer {
+ name: "Sigmoid1"
+ type: "Sigmoid"
+ bottom: "pool1"
+ top: "Sigmoid1"
+}
+
+layer {
+ name: "conv2"
+ type: "Convolution"
+ bottom: "Sigmoid1"
+ top: "conv2"
+ param {
+ lr_mult: 1
+ }
+ param {
+ lr_mult: 2
+ }
+ convolution_param {
+ num_output: 32
+ pad: 2
+ kernel_size: 5
+ stride: 1
+ weight_filler {
+ type: "gaussian"
+ std: 0.01
+ }
+ bias_filler {
+ type: "constant"
+ }
+ }
+}
+
+
@Hrant-Khachatrian

Hrant-Khachatrian Nov 19, 2015

Somewhat unrelated to batch normalization, but is it intentional to use conv -> pooling -> sigmoid in the first layer and conv -> sigmoid -> pooling in the second layer?

@ducha-aiki

ducha-aiki Nov 19, 2015

Contributor

Not. Intention was to reduce memory usage by conv -> pooling -> sigmoid, but missed it in 2nd layer.

+layer {
+ name: "Sigmoid2"
+ type: "Sigmoid"
+ bottom: "conv2"
+ top: "Sigmoid2"
+}
+layer {
+ name: "pool2"
+ type: "Pooling"
+ bottom: "Sigmoid2"
+ top: "pool2"
+ pooling_param {
+ pool: AVE
+ kernel_size: 3
+ stride: 2
+ }
+}
+layer {
+ name: "conv3"
+ type: "Convolution"
+ bottom: "pool2"
+ top: "conv3"
+ convolution_param {
+ num_output: 64
+ pad: 2
+ kernel_size: 5
+ stride: 1
+ weight_filler {
+ type: "gaussian"
+ std: 0.01
+ }
+ bias_filler {
+ type: "constant"
+ }
+ }
+ param {
+ lr_mult: 1
+ }
+ param {
+ lr_mult: 1
+ }
+
+}
+
+layer {
+ name: "Sigmoid3"
+ type: "Sigmoid"
+ bottom: "conv3"
+ top: "Sigmoid3"
+}
+
+layer {
+ name: "pool3"
+ type: "Pooling"
+ bottom: "Sigmoid3"
+ top: "pool3"
+ pooling_param {
+ pool: AVE
+ kernel_size: 3
+ stride: 2
+ }
+}
+
+layer {
+ name: "ip1"
+ type: "InnerProduct"
+ bottom: "pool3"
+ top: "ip1"
+ param {
+ lr_mult: 1
+ decay_mult: 0
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ inner_product_param {
+ num_output: 10
+ weight_filler {
+ type: "gaussian"
+ std: 0.01
+ }
+ bias_filler {
+ type: "constant"
+ }
+ }
+}
+layer {
+ name: "accuracy"
+ type: "Accuracy"
+ bottom: "ip1"
+ bottom: "label"
+ top: "accuracy"
+ include {
+ phase: TEST
+ }
+}
+layer {
+ name: "loss"
+ type: "SoftmaxWithLoss"
+ bottom: "ip1"
+ bottom: "label"
+ top: "loss"
+}
Oops, something went wrong.