grbm.yaml

# pylearn2 tutorial example: cifar_grbm_smd.yaml by Ian Goodfellow
# Yoshihiro Tanaka has modified several points for my programs.
#
# Read the README file before reading this file
#
# This is an example of yaml file, which is the main way that an experimenter
# interacts with pylearn2.
#
# A yaml file is very similar to a python dictionary, with a bit of extra
# syntax.

# The !obj tag allows us to create a specific class of object. The text after
# the : indicates what class should be loaded. This is followed by a pair of
# braces containing the arguments to that class's __init__ method.
#
# Here, we allocate a Train object, which represents the main loop of the
# training script. The train script will run this loop repeatedly. Each time
# through the loop, the model is trained on data from a training dataset, then
# saved to file.
#
# Author: Yoshihiro Tanaka <feria.primavera@amil.com>
# forked from https://github.com/laughing/grbm_sample

!obj:pylearn2.train.Train {
    # The !pkl tag is used to create an object from a pkl file. Here we retrieve
    # the dataset made by make_dataset.py and use it as our training dataset.
    dataset: &data !obj:pylearn2.datasets.hoge_dataset.HogeDataset {
            which_set: "train",
            base_path: "${PYLEARN2_DATA_PATH}/train_test", # default: "${PYLEARN2_DATA_PATH}/hoge"
            image_to_csv: True, # default: False
            image_size: 128, # default: 128
            color: False # default: False
        },
    # Next we make the model to be trained. It is a Binary Gaussian RBM
    model: !obj:pylearn2.models.rbm.GaussianBinaryRBM {

        # The RBM needs 192 visible units (its inputs are 8x8 patches with 3
        # color channels)
        # This parameter must be "image_size * image_size * color".
        # If color is True : nvis = image_size * image_size * 3
        #             False: nvis = image_size * image_size * 1
        nvis : 16384,

        # We'll use 400 hidden units for this RBM. That's a small number but we
        # want this example script to train quickly.
        nhid : 80,

        # The elements of the weight matrices of the RBM will be drawn
        # independently from U(-0.05, 0.05)
        irange : 0.05,

        # There are many ways to parameterize a GRBM. Here we use a
        # parameterization that makes the correspondence to denoising
        # autoencoders more clear.
        energy_function_class : !obj:pylearn2.energy_functions.rbm_energy.grbm_type_1 {},

        # Some learning algorithms are capable of estimating the standard
        # deviation of the visible units of a GRBM successfully, others are not
        # and just fix the standard deviation to 1.  We're going to show off
        # and learn the standard deviation.
        learn_sigma : True,

        # Learning works better if we provide a smart initialization for the
        # parameters.  Here we start sigma at .4 , which is about the same
        # standard deviation as the training data. We start the biases on the
        # hidden units at -2, which will make them have fairly sparse
        # activations.
        init_sigma : .4,
        init_bias_hid : -2.,

        # Some GRBM training algorithms can't handle the visible units being
        # noisy and just use their mean for all computations. We will show off
        # and not use that hack here.
        mean_vis : False,

        # One hack we will make is we will scale back the gradient steps on the
        # sigma parameter. This way we don't need to worry about sigma getting
        # too small prematurely (if it gets too small too fast the learning
        # signal gets weak).
        sigma_lr_scale : 1e-3

    },

    # Next we need to specify the training algorithm that will be used to train
    # the model.  Here we use stochastic gradient descent.

    algorithm: !obj:pylearn2.training_algorithms.sgd.SGD {
        # The learning rate determines how big of steps the learning algorithm
        # takes.  Here we use fairly big steps initially because we have a
        # learning rate adjustment scheme that will scale them down if
        # necessary.
        learning_rate : 1e-1,

        # Each gradient step will be based on this many examples
        batch_size : 16,

        # We'll monitor our progress by looking at the first 20 batches of the
        # training dataset. This is an estimate of the training error. To be
        # really exhaustive, we could use the entire training set instead,
        # or to avoid overfitting, we could use held out data instead.
        monitoring_batches : 1,

        monitoring_dataset : *data,

        # Here we specify the objective function that stochastic gradient
        # descent should minimize.  In this case we use denoising score
        # matching, which makes this RBM behave as a denoising autoencoder.
        # See
        # Pascal Vincent. "A Connection Between Score Matching and Denoising
        # Auutoencoders." Neural Computation, 2011
        # for details.

        cost : !obj:pylearn2.costs.ebm_estimation.SMD {

            # Denoising score matching uses a corruption process to transform
            # the raw data.  Here we use additive gaussian noise.

            corruptor : !obj:pylearn2.corruption.GaussianCorruptor {
                    stdev : 0.4
            },
        },

        # We'll use the monitoring dataset to figure out when to stop training.
        #
        # In this case, we stop if there is less than a 1% decrease in the
        # training error in the last epoch.  You'll notice that the learned
        # features are a bit noisy. If you'd like nice smooth features you can
        # make this criterion stricter so that the model will train for longer.
        # (setting N to 10 should make the weights prettier, but will make it
        # run a lot longer)

        termination_criterion : !obj:pylearn2.termination_criteria.MonitorBased {
            prop_decrease : 0.01,
            N : 1000,
        },
        
        # Let's throw a learning rate adjuster into the training algorithm.
        # To do this we'll use an "extension," which is basically an event
        # handler that can be registered with the Train object.
        # This particular one is triggered on each epoch.
        # It will shrink the learning rate if the objective goes up and increase
        # the learning rate if the objective decreases too slowly. This makes
        # our learning rate hyperparameter less important to get right.
        # This is not a very mathematically principled approach, but it works
        # well in practice.
        },
    extensions : [!obj:pylearn2.training_algorithms.sgd.MonitorBasedLRAdjuster {}],
    #Finally, request that the model be saved after each epoch
    save_freq : 1
}