In [1]:
import sys
import os
import time
import glob
import PIL.Image as Image
import numpy as np
import matplotlib.pyplot as plt

import theano
import theano.tensor as T

import lasagne
#import lasagne.layers.dnn

import Dcgan

Using cuDNN version 6020 on context None
Mapped name None to device cuda0: Graphics Device (0000:01:00.0)


In [2]:
# Load data
def load_dataset():
    import gzip
    def load_mnist_images(filename):
        if not os.path.exists(filename):
            download(filename)
        with gzip.open(filename, 'rb') as f:
            data = np.frombuffer(f.read(), np.uint8, offset=16)
        data = data.reshape(-1, 1, 28, 28)
        return data / np.float32(256)

    def load_mnist_labels(filename):
        if not os.path.exists(filename):
            download(filename)
        with gzip.open(filename, 'rb') as f:
            data = np.frombuffer(f.read(), np.uint8, offset=8)
        return data

    X_train = load_mnist_images('train-images-idx3-ubyte.gz')
    y_train = load_mnist_labels('train-labels-idx1-ubyte.gz')
    X_test = load_mnist_images('t10k-images-idx3-ubyte.gz')
    y_test = load_mnist_labels('t10k-labels-idx1-ubyte.gz')

    # We reserve the last 10000 training examples for validation.
    X_train, X_val = X_train[:-10000], X_train[-10000:]
    y_train, y_val = y_train[:-10000], y_train[-10000:]

    return X_train, y_train, X_val, y_val, X_test, y_test

In [3]:
# Batch iterator
def iterate_minibatches(inputs, targets, batchsize, shuffle=False):
    assert len(inputs) == len(targets)
    if shuffle:
        indices = np.arange(len(inputs))
        np.random.shuffle(indices)
    for start_idx in range(0, len(inputs) - batchsize + 1, batchsize):
        if shuffle:
            excerpt = indices[start_idx:start_idx + batchsize]
        else:
            excerpt = slice(start_idx, start_idx + batchsize)
        yield inputs[excerpt], targets[excerpt]

In [4]:
# Main
def main():
    
    # Hyper Params
    num_epochs = 100
    batchsize = 128
    initial_eta = 0.0002
    
    # Load the dataset
    print("Loading data...")
    X_train, y_train, X_val, y_val, X_test, y_test = load_dataset()
       
    # Prepare Theano variables for inputs and targets
    noiseVar = T.matrix('noise')
    inputVar = T.tensor4('inputs')

    # Build Network
    print("Building model and compiling functions...")
    generator = Dcgan.buildGenerator(noiseVar)
    discriminator = Dcgan.buildDiscriminator(inputVar)

    # Output expressions
    realOut = lasagne.layers.get_output(discriminator)
    fakeOut = lasagne.layers.get_output(discriminator,
            lasagne.layers.get_output(generator))
    
    # Loss expressions
    generatorLoss = lasagne.objectives.binary_crossentropy(
                fakeOut, 1).mean()
    discriminatorLoss = (lasagne.objectives.binary_crossentropy(
                    realOut, 1)
                    + lasagne.objectives.binary_crossentropy(
                    fakeOut, 0)).mean()

    # Update expressions 
    learning_rate = theano.shared(lasagne.utils.floatX(initial_eta))
    generatorParams = lasagne.layers.get_all_params(generator, 
                trainable=True)
    discriminatorParams = lasagne.layers.get_all_params(discriminator, 
                trainable=True)
    updates = lasagne.updates.adam(generatorLoss, 
        generatorParams, 
        learning_rate, 
        beta1=0.5)
    updates.update(lasagne.updates.adam(discriminatorLoss, 
        discriminatorParams, 
        learning_rate, 
        beta1=0.5))

    # Train Function
    train_fn = theano.function([noiseVar, inputVar], 
            [(realOut > .5).mean(),(fakeOut < .5).mean()], 
            updates=updates)

    # Data generating function
    gen_fn = theano.function([noiseVar],
        lasagne.layers.get_output(generator,
        deterministic=True))
    
    # Model Load if resumable
    with np.load('mnist_gen.npz') as f:
        param_values = [f['arr_%d' % i] for i in range(len(f.files))]
    lasagne.layers.set_all_param_values(generator, param_values)
    with np.load('mnist_disc.npz') as f:
        param_values = [f['arr_%d' % i] for i in range(len(f.files))]
    lasagne.layers.set_all_param_values(discriminator, param_values)
    
    # Training Loop
    print("Starting training...")
    # We iterate over epochs
    for epoch in range(num_epochs):
        # Full pass over the training data:
        train_err = 0
        train_batches = 0
        start_time = time.time()
        for batch in iterate_minibatches(X_train, y_train, batchsize, shuffle=True):
            inputs, targets = batch
            noise = lasagne.utils.floatX(np.random.rand(len(inputs), 100))

            train_err += np.array(train_fn(noise, inputs))
            train_batches += 1

        # Then we print the results for this epoch:
        print("Epoch {} of {} took {:.3f}s".format(
            epoch + 1, num_epochs, time.time() - start_time))
        print("  training loss:\t\t{}".format(train_err / train_batches))
        
        
        # And finally, we plot some generated data
        samples = gen_fn(lasagne.utils.floatX(np.random.rand(42, 100)))

        plt.imsave('mnist_samples.png',
                       (samples.reshape(6, 7, 28, 28)
                               .transpose(0, 2, 1, 3)
                               .reshape(6*28, 7*28)),
                       cmap='gray')
            
    # Save model
    np.savez('mnist_gen.npz', *lasagne.layers.get_all_param_values(generator))
    np.savez('mnist_disc.npz', *lasagne.layers.get_all_param_values(discriminator))

In [5]:
if __name__ == '__main__':
        main()

Loading data...
Building model and compiling functions...
Generator output: (None, 1, 28, 28)
num params 4918532
Discriminator output: (None, 1)
num params 3268769
Starting training...
Epoch 1 of 100 took 4.921s
  training loss:		[ 0.98932292  0.98798077]
Epoch 2 of 100 took 4.875s
  training loss:		[ 0.99076522  0.98980369]
Epoch 3 of 100 took 4.993s
  training loss:		[ 0.99485176  0.99356971]
Epoch 4 of 100 took 4.890s
  training loss:		[ 0.98010817  0.97800481]
Epoch 5 of 100 took 5.039s
  training loss:		[ 0.98946314  0.98625801]
Epoch 6 of 100 took 4.927s
  training loss:		[ 0.99811699  0.99723558]
Epoch 7 of 100 took 5.036s
  training loss:		[ 0.99066506  0.98928285]
Epoch 8 of 100 took 4.907s
  training loss:		[ 0.98369391  0.98209135]
Epoch 9 of 100 took 4.934s
  training loss:		[ 0.99625401  0.99523237]
Epoch 10 of 100 took 4.934s
  training loss:		[ 0.98966346  0.98774038]
Epoch 11 of 100 took 5.039s
  training loss:		[ 0.98167067  0.98004808]
Epoch 12 of 100 took 5.032s
  tr

KeyboardInterrupt: 