In [1]:
import argparse
import numpy as np
from keras.datasets import mnist
from keras.utils import np_utils 
import sklearn.metrics as metrics

In [2]:
def parse_args():
    parser = argparse.ArgumentParser()
    parser.add_argument('-do',  help='Dropout param [default: 0.5]')
    parser.add_argument('-a',   help='Conv Layers LeakyReLU alpha param [if alpha set to 0 LeakyReLU is equivalent with ReLU] [default: 0.3]')
    parser.add_argument('-k',   help='Feature maps k multiplier [default: 4]')
    parser.add_argument('-cl',  help='Number of Convolutional Layers [default: 5]')
    parser.add_argument('-s',   help='Input Image rescale factor [default: 1]')
    parser.add_argument('-pf',  help='Percentage of the pooling layer: [0,1] [default: 1]')
    parser.add_argument('-pt',  help='Pooling type: \'Avg\', \'Max\' [default: Avg]')
    parser.add_argument('-fp',  help='Feature maps policy: \'proportional\',\'static\' [default: proportional]')
    parser.add_argument('-opt', help='Optimizer: \'SGD\',\'Adagrad\',\'Adam\' [default: Adam]')
    parser.add_argument('-obj', help='Minimization Objective: \'mse\',\'ce\' [default: ce]')
    parser.add_argument('-pat', help='Patience parameter for early stoping [default: 200]')
    parser.add_argument('-tol', help='Tolerance parameter for early stoping [default: 1.005]')
    parser.add_argument('-csv', help='csv results filename alias [default: res]')
    args = parser.parse_args()

    return args

In [3]:
def load_data():
    # loading mnist dataset
    (X_train, y_train), (X_val, y_val) = mnist.load_data()

    # adding a singleton dimension and rescale to [0,1]
    X_train = np.asarray(np.expand_dims(X_train,1))/float(255)
    X_val = np.asarray(np.expand_dims(X_val,1))/float(255)

    # labels to categorical vectors
    uniquelbls = np.unique(y_train)
    nb_classes = uniquelbls.shape[0]
    zbn = np.min(uniquelbls) # zero based numbering
    y_train = np_utils.to_categorical(y_train-zbn, nb_classes)
    y_val = np_utils.to_categorical(y_val-zbn, nb_classes)

    return (X_train, y_train), (X_val, y_val)

In [4]:
def evaluate(actual,pred):
    fscore = metrics.f1_score(actual, pred, average='macro')
    acc = metrics.accuracy_score(actual, pred)
    cm = metrics.confusion_matrix(actual,pred)

    return fscore, acc, cm

In [5]:
# import helpers as H
# import cnn_model as CNN


In [6]:
args         = parse_args()                          # Function for parcing command-line arguments
train_params = {
     'do' : float(args.do) if args.do else 0.5,        # Dropout Parameter
     'a'  : float(args.a) if args.a else 0.3,          # Conv Layers LeakyReLU alpha param [if alpha set to 0 LeakyReLU is equivalent with ReLU]
     'k'  : int(args.k) if args.k else 4,              # Feature maps k multiplier
     's'  : float(args.s) if args.s else 1,            # Input Image rescale factor
     'pf' : float(args.pf) if args.pf else 1,          # Percentage of the pooling layer: [0,1]
     'pt' : args.pt if args.pt else 'Avg',             # Pooling type: Avg, Max
     'fp' : args.fp if args.fp else 'proportional',    # Feature maps policy: proportional, static
     'cl' : int(args.cl) if args.cl else 5,            # Number of Convolutional Layers
     'opt': args.opt if args.opt else 'Adam',          # Optimizer: SGD, Adagrad, Adam
     'obj': args.obj if args.obj else 'ce',            # Minimization Objective: mse, ce
     'patience' : args.pat if args.pat else 200,       # Patience parameter for early stoping
     'tolerance': args.tol if args.tol else 1.005,     # Tolerance parameter for early stoping [default: 1.005, checks if > 0.5%]
     'res_alias': args.csv if args.csv else 'res'      # csv results filename alias
}

In [7]:
# loading mnist data as example
(X_train, y_train), (X_val, y_val) = load_data()

In [8]:
y_train

array([[ 0.,  0.,  0., ...,  0.,  0.,  0.],
       [ 1.,  0.,  0., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ...,  0.,  0.,  0.],
       ..., 
       [ 0.,  0.,  0., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ...,  0.,  1.,  0.]])

In [17]:
import sys
import cv2
import numpy as np
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Flatten, Activation
from keras.layers.convolutional import Convolution2D, MaxPooling2D,AveragePooling2D
from keras.layers.advanced_activations import LeakyReLU


In [10]:
def get_FeatureMaps(L, policy, constant=17):
    return {
        'proportional': (L+1)**2,
        'static': constant,
    }[policy]

In [11]:
def get_Obj(obj):
    return {
        'mse': 'MSE',
        'ce': 'categorical_crossentropy',
    }[obj]

In [12]:
train_params

{'a': 0.3,
 'cl': 5,
 'do': 0.5,
 'fp': '/Users/peterhirt/Library/Jupyter/runtime/kernel-972781be-088a-4622-a1a6-de11e62d0959.json',
 'k': 4,
 'obj': 'ce',
 'opt': 'Adam',
 'patience': 200,
 'pf': 1,
 'pt': 'Avg',
 'res_alias': 'res',
 's': 1,
 'tolerance': 1.005}

In [13]:
def train(x_train, y_train, x_val, y_val, params):
    ''' TODO: documentation '''

    
    # Parameters String used for saving the files
    parameters_str = str('_d' + str(params['do']).replace('.', '') +
                         '_a' + str(params['a']).replace('.', '') + 
                         '_k' + str(params['k']).replace('.', '') + 
                         '_c' + str(params['cl']).replace('.', '') + 
                         '_s' + str(params['s']).replace('.', '') + 
                         '_pf' + str(params['pf']).replace('.', '') + 
                         '_pt' + params['pt'] +
                         '_fp' + str(params['fp']).replace('.', '') +
                         '_opt' + params['opt'] +
                         '_obj' + params['obj'])

    # Printing the parameters of the model
    print('[Dropout Param] \t->\t'+str(params['do']))
    print('[Alpha Param] \t\t->\t'+str(params['a']))
    print('[Multiplier] \t\t->\t'+str(params['k']))
    print('[Patience] \t\t->\t'+str(params['patience']))
    print('[Tolerance] \t\t->\t'+str(params['tolerance']))
    print('[Input Scale Factor] \t->\t'+str(params['s']))
    print('[Pooling Type] \t\t->\t'+ params['pt'])
    print('[Pooling Factor] \t->\t'+str(str(params['pf']*100)+'%'))
    print('[Feature Maps Policy] \t->\t'+ params['fp'])
    print('[Optimizer] \t\t->\t'+ params['opt'])
    print('[Objective] \t\t->\t'+ get_Obj(params['obj']))
    print('[Results filename] \t->\t'+str(params['res_alias']+parameters_str+'.txt'))

    # Rescale Input Images
    if params['s'] != 1:
        print('\033[93m'+'Rescaling Patches...'+'\033[0m')
        x_train = np.asarray(np.expand_dims([cv2.resize(x_train[i, 0, :, :], (0,0), fx=params['s'], fy=params['s']) for i in xrange(x_train.shape[0])], 1))
        x_val = np.asarray(np.expand_dims([cv2.resize(x_val[i, 0, :, :], (0,0), fx=params['s'], fy=params['s']) for i in xrange(x_val.shape[0])], 1))
        print('\033[92m'+'Done, Rescaling Patches'+'\033[0m')
        print('[New Data Shape]\t->\tX: '+str(x_train.shape))

    model = get_model(x_train.shape, y_train.shape, params)

    # Counters-buffers
    maxf         = 0
    maxacc       = 0
    maxit        = 0
    maxtrainloss = 0
    maxvaloss    = np.inf
    p            = 0
    it           = 0
    best_model   = model

    # Open file to write the results
    open(params['res_alias']+parameters_str+'.csv', 'a').write('Epoch, Val_fscore, Val_acc, Train_loss, Val_loss\n')
    open(params['res_alias']+parameters_str+'-Best.csv', 'a').write('Epoch, Val_fscore, Val_acc, Train_loss, Val_loss\n')
    
    while p < params['patience']:
        p += 1

        # Fit the model for one epoch
        print('Epoch: ' + str(it))
        history = model.fit(x_train, y_train, batch_size=128, nb_epoch=1, validation_data=(x_val,y_val), shuffle=True)

        # Evaluate models
        y_score = model.predict(x_val, batch_size=1050)
        fscore, acc, cm = H.evaluate(np.argmax(y_val, axis=1), np.argmax(y_score, axis=1))
        print('Val F-score: '+str(fscore)+'\tVal acc: '+str(acc))

        # Write results in file
        open(params['res_alias']+parameters_str+'.csv', 'a').write(str(str(it)+', '+str(fscore)+', '+str(acc)+', '+str(np.max(history.history['loss']))+', '+str(np.max(history.history['val_loss']))+'\n'))

        # check if current state of the model is the best and write evaluation metrics to file
        if fscore > maxf*params['tolerance']:  # if fscore > maxf*params['tolerance']:
            p            = 0  # restore patience counter
            best_model   = model  # store current model state
            maxf         = fscore 
            maxacc       = acc
            maxit        = it
            maxtrainloss = np.max(history.history['loss'])
            maxvaloss    = np.max(history.history['val_loss'])

            print(np.round(100*cm/np.sum(cm,axis=1).astype(float)))
            open(params['res_alias']+parameters_str+'-Best.csv', 'a').write(str(str(maxit)+', '+str(maxf)+', '+str(maxacc)+', '+str(maxtrainloss)+', '+str(maxvaloss)+'\n'))

        it += 1

    print('Max: fscore:', maxf, 'acc:', maxacc, 'epoch: ', maxit, 'train loss: ', maxtrainloss, 'validation loss: ', maxvaloss)

    return best_model

In [33]:
def get_model(input_shape, output_shape, params):

    print('compiling model...')
        
    # Dimension of The last Convolutional Feature Map (eg. if input 32x32 and there are 5 conv layers 2x2 fm_size = 27)
    fm_size = input_shape[-1] - params['cl']
    
    # Tuple with the pooling size for the last convolutional layer using the params['pf']
    pool_siz = (np.round(fm_size*params['pf']).astype(int), np.round(fm_size*params['pf']).astype(int))
    
    # Initialization of the model
    model = Sequential()
    
    # keras.layers.convolutional.Convolution2D(
    # nb_filter, : Number of convolution filters to use.
    # nb_row,     : Number of rows in the convolution kernel.
    # nb_col,     : Number of columns in the convolution kernel. 
    # init='glorot_uniform',  : name of initialization function for the weights of the layer 
    #                          (see initializations), or alternatively, Theano function to use for weights initialization. 
    #                           This parameter is only relevant if you don't pass a weights argument.
    # activation='linear',     : name of activation function to use (see activations), or alternatively, elementwise Theano function. 
    #                            If you don't specify anything, no activation is applied 
    # weights=None,            : list of numpy arrays to set as initial weights.
    # border_mode='valid',     : 'valid' or 'same'.
    # subsample=(1, 1),        : tuple of length 2. Factor by which to subsample output. Also called strides elsewhere.
    # dim_ordering='th',    : 'th' or 'tf'. In 'th' mode, the channels dimension (the depth) is at index 1, in 'tf' mode is it at index 3.
    # W_regularizer=None,    : : instance of WeightRegularizer (eg. L1 or L2 regularization), applied to the main weights matrix.
    # b_regularizer=None,    : instance of WeightRegularizer, applied to the bias.
    # activity_regularizer=None,  : instance of ActivityRegularizer, applied to the network output.
    # W_constraint=None,          : instance of the constraints module (eg. maxnorm, nonneg), applied to the main weights matrix.
    # b_constraint=None)          : instance of the constraints module, applied to the bias.
    
    # params['fp'] = {
    # "stdin_port": 55807, 
    # "ip": "127.0.0.1", 
    # "control_port": 55808, 
    # "hb_port": 55809, 
    # "signature_scheme": "hmac-sha256", 
    # "key": "1fe89813-e1e0-4524-873b-eb2d9e6882d7", 
    # "kernel_name": "", 
    # "shell_port": 55805, 
    # "transport": "tcp", 
    # "iopub_port": 55806
    # }
    
    
    model.add(Convolution2D(16, 2, 2, init='orthogonal', activation=LeakyReLU(0.3), 
                            input_shape=input_shape[1:]))
    
    model.add(AveragePooling2D(pool_size=pool_siz))
    
    model.add(Flatten())
    model.add(Dropout(params['do']))
    
    model.add(Dense(int(params['k']*get_FeatureMaps(params['cl'], params['fp']))/params['pf']*6, init='he_uniform', activation=LeakyReLU(0)))
    model.add(Dropout(params['do']))
    model.add(Dense(int(params['k']*get_FeatureMaps(params['cl'], params['fp']))/params['pf']*2, init='he_uniform', activation=LeakyReLU(0)))
    model.add(Dropout(params['do']))
    model.add(Dense(output_shape[1], init='he_uniform', activation='softmax'))
    
    
    

    
    
    return model

In [22]:
def train1(x_train, y_train, x_val, y_val, params):
    ''' TODO: documentation '''

    
    # Parameters String used for saving the files
    parameters_str = str('_d' + str(params['do']).replace('.', '') +
                         '_a' + str(params['a']).replace('.', '') + 
                         '_k' + str(params['k']).replace('.', '') + 
                         '_c' + str(params['cl']).replace('.', '') + 
                         '_s' + str(params['s']).replace('.', '') + 
                         '_pf' + str(params['pf']).replace('.', '') + 
                         '_pt' + params['pt'] +
                         '_fp' + str(params['fp']).replace('.', '') +
                         '_opt' + params['opt'] +
                         '_obj' + params['obj'])

    # Printing the parameters of the model
    print('[Dropout Param] \t->\t'+str(params['do']))
    print('[Alpha Param] \t\t->\t'+str(params['a']))
    print('[Multiplier] \t\t->\t'+str(params['k']))
    print('[Patience] \t\t->\t'+str(params['patience']))
    print('[Tolerance] \t\t->\t'+str(params['tolerance']))
    print('[Input Scale Factor] \t->\t'+str(params['s']))
    print('[Pooling Type] \t\t->\t'+ params['pt'])
    print('[Pooling Factor] \t->\t'+str(str(params['pf']*100)+'%'))
    print('[Feature Maps Policy] \t->\t'+ params['fp'])
    print('[Optimizer] \t\t->\t'+ params['opt'])
    print('[Objective] \t\t->\t'+ get_Obj(params['obj']))
    print('[Results filename] \t->\t'+str(params['res_alias']+parameters_str+'.txt'))
    
    # Rescale Input Images
    if params['s'] != 1:
        print('\033[93m'+'Rescaling Patches...'+'\033[0m')
        x_train = np.asarray(np.expand_dims([cv2.resize(x_train[i, 0, :, :], (0,0), fx=params['s'], fy=params['s']) for i in xrange(x_train.shape[0])], 1))
        x_val = np.asarray(np.expand_dims([cv2.resize(x_val[i, 0, :, :], (0,0), fx=params['s'], fy=params['s']) for i in xrange(x_val.shape[0])], 1))
        print('\033[92m'+'Done, Rescaling Patches'+'\033[0m')
        print('[New Data Shape]\t->\tX: '+str(x_train.shape))
        
    model = get_model(x_train.shape, y_train.shape, params)
    
    

In [34]:
model = train1(X_train, y_train, X_val, y_val, train_params)

[Dropout Param] 	->	0.5
[Alpha Param] 		->	0.3
[Multiplier] 		->	4
[Patience] 		->	200
[Tolerance] 		->	1.005
[Input Scale Factor] 	->	1
[Pooling Type] 		->	Avg
[Pooling Factor] 	->	100%
[Feature Maps Policy] 	->	/Users/peterhirt/Library/Jupyter/runtime/kernel-972781be-088a-4622-a1a6-de11e62d0959.json
[Optimizer] 		->	Adam
[Objective] 		->	categorical_crossentropy
[Results filename] 	->	res_d05_a03_k4_c5_s1_pf1_ptAvg_fp/Users/peterhirt/Library/Jupyter/runtime/kernel-972781be-088a-4622-a1a6-de11e62d0959json_optAdam_objce.txt
compiling model...


KeyError: '/Users/peterhirt/Library/Jupyter/runtime/kernel-972781be-088a-4622-a1a6-de11e62d0959.json'