/
grbm.yaml
151 lines (127 loc) · 6.65 KB
/
grbm.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
# pylearn2 tutorial example: cifar_grbm_smd.yaml by Ian Goodfellow
# Yoshihiro Tanaka has modified several points for my programs.
#
# Read the README file before reading this file
#
# This is an example of yaml file, which is the main way that an experimenter
# interacts with pylearn2.
#
# A yaml file is very similar to a python dictionary, with a bit of extra
# syntax.
# The !obj tag allows us to create a specific class of object. The text after
# the : indicates what class should be loaded. This is followed by a pair of
# braces containing the arguments to that class's __init__ method.
#
# Here, we allocate a Train object, which represents the main loop of the
# training script. The train script will run this loop repeatedly. Each time
# through the loop, the model is trained on data from a training dataset, then
# saved to file.
#
# Author: Yoshihiro Tanaka <feria.primavera@amil.com>
# forked from https://github.com/laughing/grbm_sample
!obj:pylearn2.train.Train {
# The !pkl tag is used to create an object from a pkl file. Here we retrieve
# the dataset made by make_dataset.py and use it as our training dataset.
dataset: &data !obj:pylearn2.datasets.hoge_dataset.HogeDataset {
which_set: "train",
base_path: "${PYLEARN2_DATA_PATH}/train_test", # default: "${PYLEARN2_DATA_PATH}/hoge"
image_to_csv: True, # default: False
image_size: 128, # default: 128
color: False # default: False
},
# Next we make the model to be trained. It is a Binary Gaussian RBM
model: !obj:pylearn2.models.rbm.GaussianBinaryRBM {
# The RBM needs 192 visible units (its inputs are 8x8 patches with 3
# color channels)
# This parameter must be "image_size * image_size * color".
# If color is True : nvis = image_size * image_size * 3
# False: nvis = image_size * image_size * 1
nvis : 16384,
# We'll use 400 hidden units for this RBM. That's a small number but we
# want this example script to train quickly.
nhid : 80,
# The elements of the weight matrices of the RBM will be drawn
# independently from U(-0.05, 0.05)
irange : 0.05,
# There are many ways to parameterize a GRBM. Here we use a
# parameterization that makes the correspondence to denoising
# autoencoders more clear.
energy_function_class : !obj:pylearn2.energy_functions.rbm_energy.grbm_type_1 {},
# Some learning algorithms are capable of estimating the standard
# deviation of the visible units of a GRBM successfully, others are not
# and just fix the standard deviation to 1. We're going to show off
# and learn the standard deviation.
learn_sigma : True,
# Learning works better if we provide a smart initialization for the
# parameters. Here we start sigma at .4 , which is about the same
# standard deviation as the training data. We start the biases on the
# hidden units at -2, which will make them have fairly sparse
# activations.
init_sigma : .4,
init_bias_hid : -2.,
# Some GRBM training algorithms can't handle the visible units being
# noisy and just use their mean for all computations. We will show off
# and not use that hack here.
mean_vis : False,
# One hack we will make is we will scale back the gradient steps on the
# sigma parameter. This way we don't need to worry about sigma getting
# too small prematurely (if it gets too small too fast the learning
# signal gets weak).
sigma_lr_scale : 1e-3
},
# Next we need to specify the training algorithm that will be used to train
# the model. Here we use stochastic gradient descent.
algorithm: !obj:pylearn2.training_algorithms.sgd.SGD {
# The learning rate determines how big of steps the learning algorithm
# takes. Here we use fairly big steps initially because we have a
# learning rate adjustment scheme that will scale them down if
# necessary.
learning_rate : 1e-1,
# Each gradient step will be based on this many examples
batch_size : 16,
# We'll monitor our progress by looking at the first 20 batches of the
# training dataset. This is an estimate of the training error. To be
# really exhaustive, we could use the entire training set instead,
# or to avoid overfitting, we could use held out data instead.
monitoring_batches : 1,
monitoring_dataset : *data,
# Here we specify the objective function that stochastic gradient
# descent should minimize. In this case we use denoising score
# matching, which makes this RBM behave as a denoising autoencoder.
# See
# Pascal Vincent. "A Connection Between Score Matching and Denoising
# Auutoencoders." Neural Computation, 2011
# for details.
cost : !obj:pylearn2.costs.ebm_estimation.SMD {
# Denoising score matching uses a corruption process to transform
# the raw data. Here we use additive gaussian noise.
corruptor : !obj:pylearn2.corruption.GaussianCorruptor {
stdev : 0.4
},
},
# We'll use the monitoring dataset to figure out when to stop training.
#
# In this case, we stop if there is less than a 1% decrease in the
# training error in the last epoch. You'll notice that the learned
# features are a bit noisy. If you'd like nice smooth features you can
# make this criterion stricter so that the model will train for longer.
# (setting N to 10 should make the weights prettier, but will make it
# run a lot longer)
termination_criterion : !obj:pylearn2.termination_criteria.MonitorBased {
prop_decrease : 0.01,
N : 1000,
},
# Let's throw a learning rate adjuster into the training algorithm.
# To do this we'll use an "extension," which is basically an event
# handler that can be registered with the Train object.
# This particular one is triggered on each epoch.
# It will shrink the learning rate if the objective goes up and increase
# the learning rate if the objective decreases too slowly. This makes
# our learning rate hyperparameter less important to get right.
# This is not a very mathematically principled approach, but it works
# well in practice.
},
extensions : [!obj:pylearn2.training_algorithms.sgd.MonitorBasedLRAdjuster {}],
#Finally, request that the model be saved after each epoch
save_freq : 1
}