In [1]:
#IPython extension to reload modules before executing user code.
#'autoreload' reloads modules automatically before entering the execution of code typed at the IPython prompt.
%load_ext autoreload
%autoreload 2

In [2]:
import os
os.environ['PYTHONHASHSEED'] = '0'
import gpustat

#select the best free GPU on the nvidia card
stats = gpustat.GPUStatCollection.new_query()
ids = map(lambda gpu: int(gpu.entry['index']), stats)
ratios = map(lambda gpu: float(gpu.entry['memory.used'])/float(gpu.entry['memory.total']), stats)
bestGPU = min(zip(ids, ratios), key=lambda x: x[1])[0]
bestGPU = 1

print("setGPU: Setting GPU to: {}".format(bestGPU))
os.environ['CUDA_DEVICE_ORDER'] = 'PCI_BUS_ID'
os.environ['CUDA_VISIBLE_DEVICES'] = str(bestGPU)

'''
#set memory usage to 0.5
from keras.backend.tensorflow_backend import set_session
import tensorflow as tf
config = tf.ConfigProto()
config.gpu_options.per_process_gpu_memory_fraction = 0.33
set_session(tf.Session(config=config))'''

setGPU: Setting GPU to: 1


'\n#set memory usage to 0.5\nfrom keras.backend.tensorflow_backend import set_session\nimport tensorflow as tf\nconfig = tf.ConfigProto()\nconfig.gpu_options.per_process_gpu_memory_fraction = 0.33\nset_session(tf.Session(config=config))'

In [3]:
#!pip install -q sklearn
import collections
import numpy as np, numpy
from keract import get_activations, display_activations
import keras
from keras import backend as K
from keras import optimizers, regularizers
from keras.callbacks import LearningRateScheduler
from keras.datasets import cifar10, cifar100 # we can use also cifar100
from keras.layers import Input, BatchNormalization, AveragePooling2D, ZeroPadding2D, LeakyReLU, GlobalAveragePooling2D, Dense, Flatten, Activation, Dropout
from keras.layers.convolutional import Conv2D, MaxPooling2D
from keras.models import Sequential, load_model, Model
from keras.regularizers import l2
from keras.utils import plot_model, to_categorical
from keras.utils.vis_utils import model_to_dot
import matplotlib.pyplot as plt
import scipy
import sklearn
import sklearn.metrics
import sys
#sys.executable
#sys.path
import time
from IPython.display import SVG

from layca_optimizers import SGD

Using TensorFlow backend.


In [4]:
def normalize(x, mean, std):
    # This function normalizes inputs for zero mean and unit variance to speed up learning.
    
    # In case std = 0, we add eps = 1e-7
    eps = K.epsilon()
    x = (x-mean)/(std+eps)
    return x
  
def import_cifar(dataset):
    if dataset == 10:
        (x_train, y_train), (x_test, y_test) = cifar10.load_data()
    elif dataset == 100:
        (x_train, y_train), (x_test, y_test) = cifar100.load_data(label_mode='fine')

    # By default, they are uint8 but we need them float to normalize them
    x_train = x_train.astype('float32')
    x_test = x_test.astype('float32')
    
    # Calculating the mean and standard deviation of the training data
    mean = np.mean(x_train, axis=(0, 1, 2, 3))
    std = np.std(x_train, axis=(0, 1, 2, 3))
    
    # Normalizing 
    x_train = normalize(x_train, mean, std)
    x_test = normalize(x_test, mean, std)

    y_train = to_categorical(y_train, num_classes=dataset)
    y_test = to_categorical(y_test,  num_classes=dataset)
    
    return (x_train, y_train), (x_test, y_test)

In [5]:
# LOAD DATABase
num_classes = 10
(x_train, y_train), (x_test, y_test) = import_cifar(num_classes)

In [6]:
# Architecture taken from https://github.com/geifmany/cifar-vgg
# Weight decay and Dropout have been removed
# BatchNormalization before activations
def VGG16_Vanilla_beta(input_shape, num_classes):
    model = Sequential([
        #0
        Conv2D(64, (3, 3), padding='same', input_shape=input_shape),
        BatchNormalization(scale=False, center=False),
        Activation('relu'),
        #3
        Conv2D(64, (3, 3), padding='same'),
        BatchNormalization(scale=False, center=False),
        Activation('relu'),
        MaxPooling2D(pool_size=(2, 2)),
        #7
        Conv2D(128, (3, 3), padding='same'),
        BatchNormalization(scale=False, center=False),
        Activation('relu'),
        #10
        Conv2D(128, (3, 3), padding='same'),
        BatchNormalization(scale=False, center=False),
        Activation('relu'),
        MaxPooling2D(pool_size=(2, 2)),
        #14
        Conv2D(256, (3, 3), padding='same'),
        BatchNormalization(scale=False, center=False),
        Activation('relu'),
        #17
        Conv2D(256, (3, 3), padding='same'),
        BatchNormalization(scale=False, center=False),
        Activation('relu'),
        #20
        Conv2D(256, (3, 3), padding='same'),
        BatchNormalization(scale=False, center=False),
        Activation('relu'),
        MaxPooling2D(pool_size=(2, 2)),
        #24
        Conv2D(512, (3, 3), padding='same'),
        BatchNormalization(scale=False, center=False),
        Activation('relu'),
        #27
        Conv2D(512, (3, 3), padding='same'),
        BatchNormalization(scale=False, center=False),
        Activation('relu'),
        #30
        Conv2D(512, (3, 3), padding='same'),
        BatchNormalization(scale=False, center=False),
        Activation('relu'),
        MaxPooling2D(pool_size=(2, 2)),
        #34
        Conv2D(512, (3, 3), padding='same'),
        BatchNormalization(scale=False, center=False),
        Activation('relu'),
        #37
        Conv2D(512, (3, 3), padding='same'),
        BatchNormalization(scale=False, center=False),
        Activation('relu'),
        #40
        Conv2D(512, (3, 3), padding='same'),
        BatchNormalization(scale=False, center=False),
        Activation('relu'),
        MaxPooling2D(pool_size=(2, 2)),
        Flatten(),
        #45
        Dense(512),
        BatchNormalization(scale=False, center=False),
        Activation('relu'),
        #48
        Dense(num_classes),
        BatchNormalization(scale=False, center=False),
        Activation('softmax')])
    return model

# Architecture taken from https://github.com/geifmany/cifar-vgg
# BatchNormalization before activations
def VGG16_beta(input_shape, num_classes, weight_decay):
    model = Sequential([
        Conv2D(64, (3, 3), padding='same', input_shape=input_shape, kernel_regularizer=regularizers.l2(weight_decay)),
        BatchNormalization(scale=False, center=False),
        Activation('relu'),
        #Dropout(0.3),
        Conv2D(64, (3, 3), padding='same', kernel_regularizer=regularizers.l2(weight_decay)),
        BatchNormalization(scale=False, center=False),
        Activation('relu'),
        MaxPooling2D(pool_size=(2, 2)),
        Conv2D(128, (3, 3), padding='same', kernel_regularizer=regularizers.l2(weight_decay)),
        BatchNormalization(scale=False, center=False),
        Activation('relu'),
        #Dropout(0.4),
        Conv2D(128, (3, 3), padding='same', kernel_regularizer=regularizers.l2(weight_decay)),
        BatchNormalization(scale=False, center=False),
        Activation('relu'),
        MaxPooling2D(pool_size=(2, 2)),
        Conv2D(256, (3, 3), padding='same', kernel_regularizer=regularizers.l2(weight_decay)),
        BatchNormalization(scale=False, center=False),
        Activation('relu'),
        #Dropout(0.4),
        Conv2D(256, (3, 3), padding='same', kernel_regularizer=regularizers.l2(weight_decay)),
        BatchNormalization(scale=False, center=False),
        Activation('relu'),
        #Dropout(0.4),
        Conv2D(256, (3, 3), padding='same', kernel_regularizer=regularizers.l2(weight_decay)),
        BatchNormalization(scale=False, center=False),
        Activation('relu'),
        MaxPooling2D(pool_size=(2, 2)),
        Conv2D(512, (3, 3), padding='same', kernel_regularizer=regularizers.l2(weight_decay)),
        BatchNormalization(scale=False, center=False),
        Activation('relu'),
        #Dropout(0.4),
        Conv2D(512, (3, 3), padding='same', kernel_regularizer=regularizers.l2(weight_decay)),
        BatchNormalization(scale=False, center=False),
        Activation('relu'),
        #Dropout(0.4),
        Conv2D(512, (3, 3), padding='same', kernel_regularizer=regularizers.l2(weight_decay)),
        BatchNormalization(scale=False, center=False),
        Activation('relu'),
        MaxPooling2D(pool_size=(2, 2)),
        Conv2D(512, (3, 3), padding='same', kernel_regularizer=regularizers.l2(weight_decay)),
        BatchNormalization(scale=False, center=False),
        Activation('relu'),
        #Dropout(0.4),
        Conv2D(512, (3, 3), padding='same', kernel_regularizer=regularizers.l2(weight_decay)),
        BatchNormalization(scale=False, center=False),
        Activation('relu'),
        #Dropout(0.4),
        Conv2D(512, (3, 3), padding='same', kernel_regularizer=regularizers.l2(weight_decay)),
        BatchNormalization(scale=False, center=False),
        Activation('relu'),
        MaxPooling2D(pool_size=(2, 2)),
        #Dropout(0.5),
        Flatten(),
        Dense(512, kernel_regularizer=regularizers.l2(weight_decay)),
        BatchNormalization(scale=False, center=False),
        Activation('relu'),
        #Dropout(0.5),
        Dense(num_classes, kernel_regularizer=regularizers.l2(weight_decay)),
        BatchNormalization(scale=False, center=False),
        Activation('softmax')])
    return model

In [7]:
# build the models
modelLAY = VGG16_Vanilla_beta(input_shape=(32,32,3), num_classes=10) #without Weight Decay but batchNorm before activation
modelSGD = VGG16_Vanilla_beta(input_shape=(32,32,3), num_classes=10) #without Weight Decay but batchNorm before activation
modelWD = VGG16_beta(input_shape=(32,32,3), num_classes=10, weight_decay=0.005) #with Weight Decay, and batchNorm before activation

In [8]:
##OPTIONNAL
# get the initials weights
#weightsSGD_init = modelSGD.get_weights()
#weightsWD_init = modelWD.get_weights()
#weightsLAY_init = modelLAY.get_weights()

In [9]:
sgd = SGD(lr=0.0045, momentum=0.9, nesterov=True, layca=True)
modelLAY.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])

In [10]:
# Optimization details
sgd = SGD(lr=0.0045, momentum=0.9, nesterov=True, layca=False)
modelSGD.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])

In [11]:
sgd = SGD(lr=0.002, momentum=0.9, nesterov=True, layca=False)
modelWD.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])

In [12]:
modelLAY.save_weights('weights/modelLAY_weights_init.hdf5')

In [13]:
modelSGD.save_weights('weights/modelSGD_weights_init.hdf5')

In [14]:
modelWD.save_weights('weights/modelWD_weights_init.hdf5')

In [12]:
epochs = 100

In [None]:
for epoch in range(epochs):
    fit = modelLAY.fit(x_train, y_train, batch_size=256, epochs=1, validation_data=(x_test, y_test), shuffle=True, verbose=0)
    print('epoch : ',str(epoch+1), '   acc : ',str(fit.history['acc'][0]*100), '   val_acc : ',str(fit.history['val_acc'][0]*100))
    if fit.history['acc'][0] >= 1. :
        break

epoch :  1    acc :  58.849999996185296    val_acc :  66.96
epoch :  2    acc :  76.5180000038147    val_acc :  71.54
epoch :  3    acc :  82.80799999809265    val_acc :  74.63
epoch :  4    acc :  86.63800000190734    val_acc :  77.05


In [None]:
for epoch in range(epochs):
    fit = modelSGD.fit(x_train, y_train, batch_size=256, epochs=1, validation_data=(x_test, y_test), shuffle=True, verbose=0)
    print('epoch : ',str(epoch+1), '   acc : ',str(fit.history['acc'][0]*100), '   val_acc : ',str(fit.history['val_acc'][0]*100))
    if fit.history['acc'][0] >= 1. :
        break

In [None]:
for epoch in range(epochs):
    fit = modelWD.fit(x_train, y_train, batch_size=256, epochs=1, validation_data=(x_test, y_test), shuffle=True, verbose=0)
    print('epoch : ',str(epoch+1), '   acc : ',str(fit.history['acc'][0]*100), '   val_acc : ',str(fit.history['val_acc'][0]*100))
    if fit.history['acc'][0] >= 1. :
        break

In [None]:
print('\n LAYCA models:')

scores = modelLAY.evaluate(x_test, y_test, verbose=1)
print('Test loss:', scores[0])
print('Test accuracy:', scores[1])
print("Error: %.2f%%" % (100-scores[1]*100))

In [None]:
# EVALUATION
# Final evaluation of the models

print('\n SGD models:')

scores = modelSGD.evaluate(x_test, y_test, verbose=1)
print('Test loss:', scores[0])
print('Test accuracy:', scores[1])
print("Error: %.2f%%" % (100-scores[1]*100))

In [None]:
print('\n Weight Decay models:')

scores = modelWD.evaluate(x_test, y_test, verbose=1)
print('Test loss:', scores[0])
print('Test accuracy:', scores[1])
print("Error: %.2f%%" % (100-scores[1]*100))

In [24]:
#Save model by model
modelLAY.save_weights('weights/modelLAY_%test_%train_epoch_0.0045lr.hdf5')

In [25]:
modelSGD.save_weights('weights/modelSGD_%test_%train_epoch_0.0045lr.hdf5')

In [None]:
modelWD.save_weights('weights/modelWD_%test_%train_epoch_0.002lr_0005.hdf5')