In [1]:
%pylab inline
import matplotlib.pyplot as plt

Populating the interactive namespace from numpy and matplotlib


In [2]:
import skimage.io
from scipy.ndimage.filters import convolve

#note: this requires the starter code for the assignments!
from common.plotting import plot_mat

In [3]:
import os
# Fix a bug in printing SVG
if sys.platform == 'win32':
    print "Monkey-patching pydot"
    import pydot

    def force_find_graphviz(graphviz_root):
        binpath = os.path.join(graphviz_root, 'bin')
        programs = 'dot twopi neato circo fdp sfdp'
        def helper():
            for prg in programs.split():
                if os.path.exists(os.path.join(binpath, prg)):
                    yield ((prg, os.path.join(binpath, prg)))
                elif os.path.exists(os.path.join(binpath, prg+'.exe')):
                    yield ((prg, os.path.join(binpath, prg+'.exe')))
        progdict = dict(helper())
        return lambda: progdict

    pydot.find_graphviz = force_find_graphviz('c:/Program Files (x86)/Graphviz2.34/')

Monkey-patching pydot
Couldn't import dot_parser, loading of dot files will not be possible.


In [4]:
import theano
import theano.tensor.signal.downsample

import numpy as np
import theano.tensor as T

Using gpu device 0: GeForce GTX 950M (CNMeM is disabled)


In [5]:
import lasagne

In [14]:
from fuel.datasets.mnist import MNIST
from fuel.transformers import ScaleAndShift, Cast, Flatten, Mapping
from fuel.streams import DataStream
from fuel.schemes import SequentialScheme, ShuffledScheme

MNIST.default_transformers = (
    (ScaleAndShift, [2.0 / 255.0, -1], {'which_sources': 'features'}),
    (Cast, [np.float32], {'which_sources': 'features'}))

train_set = MNIST(("train",), subset=slice(None,40000))
#this stream will shuffle the MNIST set and return us batches of 100 examples
train_set_stream = DataStream.default_stream(
    train_set,
    iteration_scheme=ShuffledScheme(mnist_train.num_examples, 25))
                                               
validation_set = MNIST(("train",), subset=slice(40000, None))

# We will use larger portions for testing and validation
# as these dont do a backward pass and reauire less RAM.
validation_set_stream = DataStream.default_stream(
    validation_set, iteration_scheme=SequentialScheme(validation_set.num_examples, 50))
test_set = MNIST(("test",))
test_set_stream = DataStream.default_stream(
    test_set, iteration_scheme=SequentialScheme(mnist_test.num_examples, 100))

In [15]:
print "The streams return batches containing %s" % (train_set_stream.sources,)

print "Each trainin batch consits of a tuple containing:"
for element in next(train_set_stream.get_epoch_iterator()):
    print " - an array of size %s containing %s" % (element.shape, element.dtype)
    
print "Validation/test batches consits of tuples containing:"
for element in next(test_set_stream.get_epoch_iterator()):
    print " - an array of size %s containing %s" % (element.shape, element.dtype)

The streams return batches containing (u'features', u'targets')
Each trainin batch consits of a tuple containing:
 - an array of size (25L, 3L, 32L, 32L) containing float32
 - an array of size (25L, 1L) containing uint8
Validation/test batches consits of tuples containing:
 - an array of size (100L, 3L, 32L, 32L) containing float32
 - an array of size (100L, 1L) containing uint8


In [16]:
def build_cnn(input_var=None):

    # Input layer
    network = lasagne.layers.InputLayer(shape=(None, 3, 32, 32),
                                        input_var=input_var)

    network = lasagne.layers.Conv2DLayer(network,
             #lasagne.layers.dropout(network, p=.2),
            num_filters=60, filter_size=(5, 5),
            nonlinearity=lasagne.nonlinearities.rectify,
            W=lasagne.init.GlorotUniform())  #DIM 28

    network = lasagne.layers.MaxPool2DLayer(network, pool_size=(2, 2))   #DIM 14

    network = lasagne.layers.Conv2DLayer(network,
            #lasagne.layers.dropout(network, p=.2),
            num_filters=200, filter_size=(5, 5),
            nonlinearity=lasagne.nonlinearities.rectify) #DIM 10
    
    network = lasagne.layers.MaxPool2DLayer(network, pool_size=(2, 2))  #DIM 5

    #layer2reg =  lasagne.layers.batch_norm(DenseLayer(
    layer2reg = lasagne.layers.DenseLayer(
            lasagne.layers.dropout(network, p=.5),
            num_units=1200,
            nonlinearity=lasagne.nonlinearities.rectify)

    network = lasagne.layers.DenseLayer(
            lasagne.layers.dropout(layer2reg, p=.2),
            num_units=10,
            nonlinearity=lasagne.nonlinearities.softmax)

    return network, layer2reg

def iterate_minibatches(inputs, targets, batchsize, shuffle=False):
    assert len(inputs) == len(targets)
    if shuffle:
        indices = np.arange(len(inputs))
        np.random.shuffle(indices)
    for start_idx in range(0, len(inputs) - batchsize + 1, batchsize):
        if shuffle:
            excerpt = indices[start_idx:start_idx + batchsize]
        else:
            excerpt = slice(start_idx, start_idx + batchsize)
        yield inputs[excerpt], targets[excerpt]

In [23]:
import time
from lasagne.regularization import regularize_layer_params_weighted, l2, l1
start_learning_rate = 2.5e-3
num_epochs = 50

total_batch = 0

# Prepare Theano variables for inputs and targets
input_var = T.tensor4('inputs')
target_var = T.ivector('targets')
lrate_var = theano.tensor.scalar('lrate',dtype='float32')
momentum_var = theano.tensor.scalar('momentum',dtype='float32')

#create network
network, layer2reg = build_cnn(input_var)


# Create a loss expression for training, i.e., a scalar objective we want
# to minimize (for our multi-class problem, it is the cross-entropy loss):
prediction = lasagne.layers.get_output(network)
loss = lasagne.objectives.categorical_crossentropy(prediction, target_var)
loss = loss.mean()

#regularization decrises accuracy
#penalty = lasagne.regularization.regularize_layer_params(layer2reg, l1) * 1.5e-5
#loss = loss + penalty

# SGB with momentm and changing learning rate
params = lasagne.layers.get_all_params(network, trainable=True)
updates = lasagne.updates.nesterov_momentum(
        loss, params, learning_rate = lrate_var, momentum = momentum_var)

# Create a loss expression for validation/testing. The crucial difference
# here is that we do a deterministic forward pass through the network,
# disabling dropout layers.
test_prediction = lasagne.layers.get_output(network, deterministic=True)
test_loss = lasagne.objectives.categorical_crossentropy(test_prediction,
                                                        target_var)
test_loss = test_loss.mean()
test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), target_var),
                  dtype=theano.config.floatX)

# Compile a training function
train_fn = theano.function([input_var, target_var, lrate_var, momentum_var], loss, updates=updates)

# Compile a second function computing the validation loss and accuracy:
val_fn = theano.function([input_var, target_var], [test_loss, test_acc])

# Finally, launch the training loop.
print("Starting training...")
best_result = 0.0

for epoch in range(num_epochs):
    # In each epoch, we do a full pass over the training data:
    train_err = 0.0
    train_batches = 0
    start_time = time.time()

    for inputs, targets in train_set_stream.get_epoch_iterator():
        K = 15000
        lrate = start_learning_rate * K / np.maximum(K, total_batch)
        train_err += train_fn(inputs, targets.flatten(), lrate, 0.9)
        train_batches += 1
        total_batch += 1
        if total_batch % 100 == 0:
            print "minibatch err %f" % (1.0*train_err / total_batch)

    # And a full pass over the validation data:
    val_err = 0
    val_acc = 0
    val_batches = 0
    train_err = 0
    train_acc = 0
    train_batches = 0

    for inputs, targets in validation_set_stream.get_epoch_iterator():
        err, acc = val_fn(inputs, targets.flatten())
        val_err += err
        val_acc += acc
        val_batches += 1
    for inputs, targets in train_set_stream.get_epoch_iterator():
        err, acc = val_fn(inputs, targets.flatten())
        train_err += err
        train_acc += acc
        train_batches += 1
    

    # Then we print the results for this epoch:
    print("Epoch {} of {} took {:.3f}s".format(
        epoch + 1, num_epochs, time.time() - start_time))
    print("  training loss:\t\t{:.6f}".format(train_err / train_batches))
    print("  validation loss:\t\t{:.6f}".format(val_err / val_batches))
    print("  validation accuracy:\t\t{:.2f} %".format(
        val_acc / val_batches * 100))
    print("  train accuracy:\t\t{:.2f} %".format(
        train_acc / train_batches * 100))
    #store net if accuracy increases
    if best_result <  val_acc / val_batches:
        #np.savez('C:\\Users\\Alek\\Desktop\\CIFAR\\net.net', *lasagne.layers.get_all_param_values(network))
        np.savez('net.net', *lasagne.layers.get_all_param_values(network))
        best_result = val_acc / val_batches


Starting training...


In [116]:
test_err = 0
test_acc = 0
test_batches = 0
for inputs, targets in test_set_stream.get_epoch_iterator():
    err, acc = val_fn(inputs, targets.flatten())
    test_err += err
    test_acc += acc
    test_batches += 1
print("Final results:")
print("  test loss:\t\t\t{:.6f}".format(test_err / test_batches))
print("  test accuracy:\t\t{:.2f} %".format(
    test_acc / test_batches * 100))

Final results:
  test loss:			0.651863
  test accuracy:		80.02 %
