In [1]:
import theano.tensor as T
import theano
import numpy as np

from utils import get_one_hot
from model_tests import run_test
from vulcanai import mnist_loader
from ops import activations, optimizers
from sklearn.utils import shuffle
import lasagne
from utils import get_class

from utils import get_timestamp
import time
import os

import sys
import json
import cPickle as pickle

Using cuDNN version 7103 on context None
Mapped name None to device cuda: GeForce GTX 960M (0000:01:00.0)
because the backend has already been chosen;
matplotlib.use() must be called *before* pylab, matplotlib.pyplot,
or matplotlib.backends is imported for the first time.



In [2]:
import tensorflow as tf

RuntimeError: module compiled against API version 0xc but this version of numpy is 0xb

RuntimeError: module compiled against API version 0xc but this version of numpy is 0xb

In [20]:

class Network(object):
    """Class to generate networks and train them."""

    def __init__(self, name, dimensions, input_var, y, config,
                 input_network=None, num_classes=None, activation='rectify',
                 pred_activation='softmax', optimizer='adam', stopping_rule='best_validation_error',
                 learning_rate=0.001):

        """
        Initialize network specified.

        Args:
            name: string of network name
            dimensions: the size of the input data matrix
            input_var: theano tensor representing input matrix
            y: theano tensor representing truth matrix
            config: Network configuration (as dict)
            input_network: None or a dictionary containing keys (network, layer).
                network: a Network object
                layer: an integer corresponding to the layer you want output
            num_classes: None or int. how many classes to predict
            activation:  activation function for hidden layers
            pred_activation: the classifying layer activation
            optimizer: which optimizer to use as the learning function
            learning_rate: the initial learning rate
        """
        self.name = name
        self.layers = []
        self.cost = None
        self.val_cost = None
        self.input_dimensions = dimensions
        self.config = config
        self.learning_rate = learning_rate
        self.init_learning_rate = learning_rate
        self.stopping_rule = stopping_rule
        if not optimizers.get(optimizer, False):
            raise ValueError(
                'Invalid optimizer option: {}. '
                'Please choose from:'
                '{}'.format(optimizer, optimizers.keys()))
        if not activations.get(activation, False) or \
           not activations.get(pred_activation, False):
            raise ValueError(
                'Invalid activation option: {} and {}. '
                'Please choose from:'
                '{}'.format(activation, pred_activation, activations.keys()))
        self.activation = activation
        self.pred_activation = pred_activation
        self.optimizer = optimizer
        self.input_var = input_var
        self.y = y
        self.input_network = input_network
        self.input_params = None
        
        self.num_classes = num_classes
        
        
        self.network = self.build_model(
            config=self.config,
            nonlinearity=activations[self.activation]
        )
        
        if self.y is not None:
            self.trainer = self.create_trainer()
            self.validator = self.create_validator()

        self.output = theano.function(
            [i for i in [self.input_var] if i],
            lasagne.layers.get_output(self.network, deterministic=True))
        self.record = None

        try:
            self.timestamp
        except AttributeError:
            self.timestamp = get_timestamp()
        self.minibatch_iteration = 0
        
    def build_model(self, config, nonlinearity):
        
        import jsonschema
        import schemas
        mode = config.get('mode')
        if mode == 'dense':
            jsonschema.validate(config, schemas.dense_network)

            network = self.create_dense_network(
                units=config.get('units'),
                dropouts=config.get('dropouts'),
                nonlinearity=nonlinearity
            )
        elif mode == 'conv':
            jsonschema.validate(config, schemas.conv_network)

            network = self.create_conv_network(
                filters=config.get('filters'),
                filter_size=config.get('filter_size'),
                stride=config.get('stride'),
                pool_mode=config['pool'].get('mode'),
                pool_stride=config['pool'].get('stride'),
                nonlinearity=nonlinearity
            )
        else:
            raise ValueError('Mode {} not supported.'.format(mode))
            
        

        return network    
    
    def create_dense_network(self, units, dropouts, nonlinearity):
        
        if len(units) != len(dropouts):
            raise ValueError(
                "Cannot build network: units and dropouts don't correspond"
            )

        print("Creating {} Network...".format(self.name))
    
        if self.input_network is None:
            print('\tInput Layer:')
            
            network = tf.keras.layers.InputLayer(input_shape=self.input_dimensions,
                                                input_tensor=self.input_var,
                                                name="{}_input".format(
                                                    self.name)).output
            print('\t\t{}'.format(network.shape))
            self.layers.append(network)
        else:
            network = self.input_network['network']. \
                layers[self.input_network['layer']]

            print('Appending layer {} from {} to {}'.format(
                self.input_network['layer'],
                self.input_network['network'].name,
                self.name))

        if nonlinearity.__name__ == 'selu':
            network = lasagne.layers.BatchNormLayer(
                incoming=network,
                name="{}_batchnorm".format(self.name)
            )

        print('\tHidden Layer:')
        for i, (num_units, prob_dropout) in enumerate(zip(units, dropouts)):
            if nonlinearity.__name__ == 'selu':
                weights = tf.Variable(
                    tf.truncated_normal([network.output_shape[1], num_units],
                                        stddev=np.sqrt(1.0 / num_units)),
                    name = 'weights')
                w = lasagne.init.Normal(std=np.sqrt(1.0 / num_units))
                b = lasagne.init.Normal(std=0.0)
            else:
                
                                          
                biases = tf.constant(0.)
                w = lasagne.init.GlorotUniform()
                b = lasagne.init.Constant(0.)
            
            
            
            network = tf.layers.dense(
                inputs=network,
                units=num_units,
                use_bias=False,
                kernel_initializer=tf.contrib.layers.xavier_initializer(),
                name="layer")

            
            print network
            print "W:", network.shape
            
            self.layers.append(network)

            if nonlinearity.__name__ == 'selu':
                network = AlphaDropoutLayer(
                    incoming=network,
                    name="{}_alphadropout_{}".format(self.name, i))
            else:
                network = lasagne.layers.DropoutLayer(
                    incoming=network,
                    p=prob_dropout,
                    name="{}_dropout_{}".format(self.name, i)
                )

            self.layers.append(network)
            print('\t\t{}'.format(lasagne.layers.get_output_shape(network)))
        return network

    def create_conv_network(self, filters, filter_size, stride,
                            pool_mode, pool_stride, nonlinearity):
        
        conv_dim = len(filter_size[0])
        lasagne_pools = ['max', 'average_inc_pad', 'average_exc_pad']
        if not all(len(f) == conv_dim for f in filter_size):
            raise ValueError('Each tuple in filter_size {} must have a '
                             'length of {}'.format(filter_size, conv_dim))
        if not all(len(s) == conv_dim for s in stride):
            raise ValueError('Each tuple in stride {} must have a '
                             'length of {}'.format(stride, conv_dim))
        if not all(len(p) == conv_dim for p in pool_stride):
            raise ValueError('Each tuple in pool_stride {} must have a '
                             'length of {}'.format(pool_stride, conv_dim))
        if pool_mode not in lasagne_pools:
            raise ValueError('{} pooling does not exist. '
                             'Please use one of: {}'.format(pool_mode, lasagne_pools))

        print("Creating {} Network...".format(self.name))
        if self.input_network is None:
            print('\tInput Layer:')
            network = lasagne.layers.InputLayer(shape=self.input_dimensions,
                                                input_var=self.input_var,
                                                name="{}_input".format(
                                                    self.name))
            print('\t\t{}'.format(lasagne.layers.get_output_shape(network)))
            self.layers.append(network)
        else:
            network = self.input_network['network']. \
                layers[self.input_network['layer']]

            print('Appending layer {} from {} to {}'.format(
                self.input_network['layer'],
                self.input_network['network'].name,
                self.name))

        if conv_dim == 1:
            conv_layer = lasagne.layers.Conv1DLayer
            pool = lasagne.layers.Pool1DLayer
        elif conv_dim == 2:
            conv_layer = lasagne.layers.Conv2DLayer
            pool = lasagne.layers.Pool2DLayer
        elif conv_dim == 3:
            conv_layer = lasagne.layers.Conv3DLayer
            pool = lasagne.layers.Pool3DLayer
        else:
            pool = None   # Linter is stupid
            conv_layer = None
            ValueError("Convolution is only supported for one of the first three dimensions")

        print('\tHidden Layer:')
        for i, (f, f_size, s, p_s) in enumerate(zip(filters,
                                                    filter_size,
                                                    stride,
                                                    pool_stride)):
            network = conv_layer(
                incoming=network,
                num_filters=f,
                filter_size=f_size,
                stride=s,
                pad='same',
                nonlinearity=nonlinearity,
                name="{}_conv{}D_{}".format(
                    self.name, conv_dim, i)
            )
            network.add_param(
                network.W,
                network.W.get_value().shape,
                **{self.name: True}
            )
            network.add_param(
                network.b,
                network.b.get_value().shape,
                **{self.name: True}
            )
            self.layers.append(network)
            print('\t\t{}'.format(lasagne.layers.get_output_shape(network)))
            network = pool(
                incoming=network,
                pool_size=p_s,
                mode=pool_mode,
                name="{}_{}pool".format(
                    self.name, pool_mode)
            )
            self.layers.append(network)
            print('\t\t{}'.format(lasagne.layers.get_output_shape(network)))
        return network


    

    def create_classification_layer(self, network, num_classes,
                                    nonlinearity):
        
        print('\tOutput Layer:')
        network = lasagne.layers.DenseLayer(
            incoming=network,
            num_units=num_classes,
            nonlinearity=nonlinearity,
            name="{}_softmax".format(self.name)
        )
        network.add_param(
            network.W,
            network.W.get_value().shape,
            **{self.name: True}
        )
        network.add_param(
            network.b,
            network.b.get_value().shape,
            **{self.name: True}
        )
        print('\t\t{}'.format(lasagne.layers.get_output_shape(network)))
        self.layers.append(network)
        return network

    def cross_entropy_loss(self, prediction, y):
        """Generate a cross entropy loss function."""
        print("Using categorical cross entropy loss")
        return lasagne.objectives.categorical_crossentropy(prediction,
                                                           y).mean()

    def mse_loss(self, prediction, y):
        """Generate mean squared error loss function."""
        print("Using Mean Squared error loss")
        return lasagne.objectives.squared_error(prediction, y).mean()

    def create_trainer(self):
        
        print("Creating {} Trainer...".format(self.name))
        # get network output
        out = lasagne.layers.get_output(self.network)
        # get all trainable parameters from network

        self.params = lasagne.layers.get_all_params(
            self.network,
            trainable=True,
            **{self.name: True}
        )
        if self.input_params is not None:
            self.params = self.input_params + self.params

        # calculate a loss function which has to be a scalar
        if self.cost is None:
            if self.num_classes is None or self.num_classes == 0:
                self.cost = self.mse_loss(out, self.y)
            else:
                self.cost = self.cross_entropy_loss(out, self.y)

        # calculate updates using ADAM optimization gradient descent
        learning_rate_var = T.scalar(name='learning_rate')
        if self.optimizer == 'adam':
            updates = optimizers[self.optimizer](
                loss_or_grads=self.cost,
                params=self.params,
                learning_rate=learning_rate_var,
                beta1=0.9,
                beta2=0.999,
                epsilon=1e-08
            )
        elif self.optimizer == 'sgd':
            updates = optimizers[self.optimizer](
                loss_or_grads=self.cost,
                params=self.params,
                learning_rate=learning_rate_var
            )
        else:
            updates = None
            ValueError("No optimizer found")

        # omitted (, allow_input_downcast=True)
        return theano.function(
            [i for i in [self.input_var, self.y, learning_rate_var] if i],
            updates=updates
        )

    def create_validator(self):
        
        print("Creating {} Validator...".format(self.name))
        # create prediction
        val_prediction = lasagne.layers.get_output(
            self.network,
            deterministic=True
        )
        # check how much error in prediction
        if self.val_cost is None:
            if self.num_classes is None or self.num_classes == 0:
                self.val_cost = self.mse_loss(val_prediction, self.y)
                val_acc = T.constant(0)
            else:
                self.val_cost = self.cross_entropy_loss(val_prediction, self.y)
                # check the accuracy of the prediction
                if self.num_classes > 1:
                    val_acc = T.mean(T.eq(T.argmax(val_prediction, axis=1),
                                          T.argmax(self.y, axis=1)),
                                     dtype=theano.config.floatX)
                elif self.num_classes == 1:
                    val_acc = T.mean(T.eq(T.round(val_prediction,
                                                  mode='half_away_from_zero'),
                                          self.y),
                                     dtype=theano.config.floatX)

        return theano.function([self.input_var, self.y],
                               [self.val_cost, val_acc])


    


    def train(self, epochs, train_x, train_y, val_x, val_y,
              batch_ratio=0.1, plot=True, change_rate=None):
        
        print('\nTraining {} in progress...\n'.format(self.name))

        if batch_ratio > 1:
            batch_ratio = 1
        batch_ratio = float(batch_ratio)

        self.record = dict(
            epoch=[],
            train_error=[],
            train_accuracy=[],
            validation_error=[],
            validation_accuracy=[]
        )

        if self.stopping_rule == 'best_validation_error':
            best_state = None
            best_epoch = None
            best_error = float('inf')

        elif self.stopping_rule == 'best_validation_accuracy':
            best_state = None
            best_epoch = None
            best_accuracy = 0.0

        output_shape = lasagne.layers.get_output_shape(self.network)
        if output_shape[1:] != train_y.shape[1:]:
            raise ValueError(
                'Shape mismatch: non-batch dimensions don\'t match.'
                '\n\tNetwork output shape: {}'
                '\n\tLabel shape (train_y): {}'.format(
                    output_shape,
                    train_y.shape))

        if train_x.shape[0] * batch_ratio < 1.0:
            batch_ratio = 1.0 / train_x.shape[0]
            print('Warning: Batch ratio too small. Changing to {:.5f}'.format(batch_ratio))
        try:

            for epoch in range(epochs):
                epoch_time = time.time()
                print("--> Epoch: {}/{}".format(
                    epoch,
                    epochs - 1
                ))

                train_x, train_y = shuffle(train_x, train_y, random_state=0)

                for i in range(int(1 / batch_ratio)):
                    size = train_x.shape[0]
                    b_x = train_x[int(size * (i * batch_ratio)):
                                  int(size * ((i + 1) * batch_ratio))]
                    b_y = train_y[int(size * (i * batch_ratio)):
                                  int(size * ((i + 1) * batch_ratio))]

                    self.trainer(b_x, b_y, self.learning_rate)

                    sys.stdout.flush()
                    sys.stdout.write('\r\tDone {:.1f}% of the epoch'.format
                                     (100 * (i + 1) * batch_ratio))

                    if change_rate is not None:
                        if not callable(change_rate):
                            raise ValueError(
                                'Parameter change_rate must be a function '
                                'that returns a new learning rate. '
                                'Learning rate remains unchanged.'
                            )
                        # print('Modifying learning rate from {}'.format(
                        #     self.learning_rate)
                        # ),
                        self.learning_rate = change_rate(
                            self.init_learning_rate,
                            self.minibatch_iteration
                        )
                        # print('to {}'.format(self.learning_rate))
                    self.minibatch_iteration += 1

                train_error, train_accuracy = self.validator(train_x, train_y)
                validation_error, validation_accuracy = self.validator(val_x,
                                                                       val_y)

                if self.stopping_rule == 'best_validation_error' and validation_error < best_error:
                    best_epoch = epoch
                    best_error = validation_error

                elif self.stopping_rule == 'best_validation_accuracy' and validation_accuracy > best_accuracy:
                    best_epoch = epoch
                    best_accuracy = validation_accuracy

                self.record['epoch'].append(epoch)
                self.record['train_error'].append(train_error)
                self.record['train_accuracy'].append(train_accuracy)
                self.record['validation_error'].append(validation_error)
                self.record['validation_accuracy'].append(validation_accuracy)
                epoch_time_spent = time.time() - epoch_time
                print("\n\ttrain error: {:.6f} |"" train accuracy: {:.6f} in {:.2f}s".format(
                    float(train_error),
                    float(train_accuracy),
                    epoch_time_spent))
                print("\tvalid error: {:.6f} | valid accuracy: {:.6f} in {:.2f}s".format(
                    float(validation_error),
                    float(validation_accuracy),
                    epoch_time_spent))

                eta = epoch_time_spent * (epochs - epoch - 1)
                minute, second = divmod(eta, 60)
                hour, minute = divmod(minute, 60)
                print("\tEstimated time left: {}:{}:{} (h:m:s)\n".format(
                    int(hour),
                    int(minute),
                    int(second)))




        except KeyboardInterrupt:
            print("\n\n**********Training stopped prematurely.**********\n\n")
        finally:
            self.timestamp = get_timestamp()

            if self.stopping_rule == 'best_validation_error':
                print("STOPPING RULE: Rewinding to epoch {} which had the lowest validation error: {}\n".format(best_epoch, best_error))
                

            elif self.stopping_rule == 'best_validation_accuracy':
                print("STOPPING RULE: Rewinding to epoch {} which had the highest validation accuracy: {}\n".format(best_epoch, best_accuracy))
                
        
    def forward_pass(self, input_data, convert_to_class=False):
        
        if convert_to_class:
            return get_class(self.output(input_data))
        else:
            return self.output(input_data)

    @classmethod
    def load_model(cls, load_path):
       
        print('Loading model from: {}'.format(load_path))
        with open(load_path, 'rb') as f:
            instance = pickle.load(f)
        return instance










In [18]:
(train_images, train_labels, test_images, test_labels) = mnist_loader.load_fashion_mnist()

train_labels = get_one_hot(train_labels)

data folder already exists
Loading training images...
Loading training labels...
Loading testing images...
Loading testing labels...


In [5]:
network_dense_config = {
    'mode': 'dense',
    'units': [512, 100],
    'dropouts': [0.2, 0.3],
}

In [21]:
import tf_utils
with tf.Graph().as_default():
    input_var, y = tf_utils.init_placeholders(len(train_images[0]), len(train_labels[0]))
    
    dense_net = Network(
    name='3_dense_test',
    dimensions=[None] + list(train_images.shape[1:]),
    input_var=input_var,
    y=y,
    config=network_dense_config,
    input_network=None,
    num_classes=10,
    activation='rectify',
    pred_activation='softmax',
    optimizer='adam')


Creating 3_dense_test Network...
	Input Layer:
		(?, 784)
	Hidden Layer:
Tensor("layer/MatMul:0", shape=(?, 512), dtype=float32)
W: (?, 512)


AttributeError: 'Tensor' object has no attribute 'output_shape'

In [None]:
# # Use to load model from disk
# # dense_net = Network.load_model('models/20170704194033_3_dense_test.network')
dense_net.train(
    epochs=2,
    train_x=train_images[:50000],
    train_y=train_labels[:50000],
    val_x=train_images[50000:60000],
    val_y=train_labels[50000:60000],
    batch_ratio=0.05,
    plot=True
)



In [None]:
l1 = tf.keras.layers.InputLayer(input_shape = [None] + list(train_images.shape[1:]), input_tensor=input_var, name='input_layer').output
print l1.shape
l2 = tf.layers.dense(
      inputs=l1,
      units = 50)
print l2.shape

In [None]:
tf.keras.layers.InputLayer(input_tensor=input_var, name='input_layer').output

In [None]:
train_labels_tf = data_tf.test.images

In [None]:
len(train_labels_tf)

In [None]:
input_var = T.fmatrix('input')
print input_var

In [None]:
input_var.get_shape().as_list()[-1]

In [None]:
tf.glorot_uniform_initializer () 

In [None]:
v1 = tf.Variable([], name="v1")
v1.initializer


In [None]:
v1.initial_value

In [None]:
my_variable = tf.get_variable("my_variable", [1, 2, 3])

In [None]:
weights = tf.get_variable("weights", [784, 512])

In [None]:
tf.reset_default_graph() 

In [None]:
def cnn_network(units, n_layers, filter_width):
        """Assemble Convolutional neural network

        Args:
            units: input units to be convolved with kernels
            n_layers: number of layers
            filter_width: width of the filter (kernel)

        Returns:
            units: output units of the CNN
            auxiliary_outputs: auxiliary outputs from every layer
        """
        n_filters = units.get_shape().as_list()[-1]
        auxiliary_outputs = []
        for n_layer in range(n_layers):
            units = tf.layers.conv1d(units,
                                     n_filters,
                                     filter_width,
                                     padding='same',
                                     name='Layer_' + str(n_layer),
                                     activation=None,
                                     kernel_initializer=tf.contrib.layers.xavier_initializer())
            auxiliary_outputs.append(units)
            units = tf.nn.relu(units)
        return units, auxiliary_outputs 

In [None]:
cnn_network(input_var, 20, (5,))