# Binary neural network

On veux tester la binarisation des poids du réseau neuronal durant l'apprentissage.
On le test sur CIFAR-10.

## Import Modules

In [2]:
# Use GPU for Theano, comment to use CPU instead of GPU
# Tensorflow uses GPU by default
import os
os.environ["THEANO_FLAGS"] = "mode=FAST_RUN,device=gpu,floatX=float32"

In [3]:
import time
import matplotlib.pyplot as plt
import numpy as np
% matplotlib inline
np.random.seed(2017) 
from keras.models import Sequential
from keras.layers.convolutional import Convolution2D, MaxPooling2D
from keras.layers import Activation, Flatten, Dense, Dropout
from keras.layers.normalization import BatchNormalization
from keras.utils import np_utils

Using TensorFlow backend.


In [4]:
# To visiualize models
from keras.utils import plot_model

# If using tensorflow, set image dimensions order
from keras import backend as K
if K.backend()=='tensorflow':
    K.set_image_dim_ordering("th")

## Binary nn

In [5]:
from keras.optimizers import SGD, Adam, RMSprop
from keras.callbacks import LearningRateScheduler
# Evolution of learning rate

from binary_ops import binary_tanh as binary_tanh_op
from binary_layers import BinaryDense, BinaryConv2D, Clip

class DropoutNoScale(Dropout):
    '''Keras Dropout does scale the input in training phase, which is undesirable here.
    '''
    def call(self, inputs, training=None):
        if 0. < self.rate < 1.:
            noise_shape = self._get_noise_shape(inputs)

            def dropped_inputs():
                return K.dropout(inputs, self.rate, noise_shape,
                                 seed=self.seed) * (1 - self.rate)
            return K.in_train_phase(dropped_inputs, inputs,
                                    training=training)
        return inputs

def binary_tanh(x):
    return binary_tanh_op(x)

## Pre-processings : 

- Global contrast normalization
- ZCA whitening

In [19]:
import numpy as np
import scipy
import scipy.misc
from PIL import Image
from __future__ import division

def normalization(X, up):
    """ Normalize X beetween its minimum and up """
    return (X - np.min(X)) * up / (np.max(X) - np.min(X))

def global_contrast_normalization(Y):
    """ Apply GCN on image Y """
    X = Y.copy()
    average = np.mean(X)
    dev_stand = np.std(X)
    
    for i in range(0, len(X)):
        av = X[i] - average
        dev = av / dev_stand
        X[i] = normalization(dev, 254.0)
    return X
    
def zca_whiten(x_train, y_train):
    """
    Function to compute ZCA whitening matrix (aka Mahalanobis whitening).
    INPUT:  X_train and y_train
    OUTPUT: Whiten X_train
    """
    # Attention, fonction sale
    datagen = ImageDataGenerator(zca_whitening=True)

    # compute quantities required for featurewise normalization
    # (std, mean, and principal components if ZCA whitening is applied)
    datagen.fit(x_train)

    flow = datagen.flow(x_train, y_train, batch_size=1, shuffle=False)

    whiten_images = []

    for _ in range(x_train.shape[0]):
        x, y = flow.next()
        whiten_images.append(x[0])
        
    return whiten_images

## Function to load outside CIFAR-10 images

In [20]:
import cv2

#Choisit le test set
#Source = blurred, normal, etc..
classes = ['airplanes', 'automobiles', 'birds', 'cats', 'deers', 'dogs', 'frogs', 'horses', 'ships', 'trucks']

def pick_test_set(source):
    images = []
    labels = []
    for dossier, sous_dossiers, fichiers in os.walk("Images/" + source):
        split = dossier.split("/")
        classe = ""
        if (len(split) >=3):
            classe = split[2]
            #print("Processing " + classe + "...")
        
        #Pour l'instant on ne garde que les vraies classes
        if (classe != "birdogs" and classe != "birdsdogs" and classe != "catsdogs" and classe != "others"):
            print("Processing " + classe + "...")
            
            for fichier in fichiers: #Pour chaque image

                path = dossier + "/"+ fichier
                img = cv2.imread(path)
                img = img[:,:,::-1] # BGR to RGB
                # print(img.shape)
                # TODO
                image = np.transpose(np.reshape(img,(32,32,3)), (2,0,1)) # To CIFAR format
                
                labels.append([classes.index(classe)])
                images.append(image)
                
    return np.array(images), np.array(labels)

## Load CIFAR10 Dataset

In [21]:
from keras.datasets import cifar10
(train_features, train_labels), (test_features, test_labels) = cifar10.load_data()

# USE OUR TEST DATA
test_features, test_labels = pick_test_set("Resized")

num_train, img_channels, img_rows, img_cols =  train_features.shape
num_test, _, _, _ =  test_features.shape
num_classes = len(np.unique(train_labels))

Processing ...
Processing ships...
Processing deers...
Processing frogs...
Processing trucks...
Processing cats...
Processing dogs...
Processing birds...
Processing airplanes...
Processing automobiles...
Processing horses...


## Data pre-processing

In [22]:
train_features = train_features.astype('float32')/255
test_features = test_features.astype('float32')/255
# convert class labels to binary class labels
train_labels = np_utils.to_categorical(train_labels, num_classes)
test_labels = np_utils.to_categorical(test_labels, num_classes)

# Mieux vaut utiliser la fonction zca_whiten pour quelques exemples dans le rapport, mais utiliser les flows pour passer les données
# pré traiter à Keras direct lors de l'apprentissage -> moins d'embrouilles. Limite dans la fonction train
# voir train_processing

# ZCA whitening
#print("ZCA whitening...")
#train_features = zca_whiten(train_features, train_labels) # labels facultatifs.. Mais 5h du mat'
#test_features = zca_whiten(test_features, train_labels)

# Global Contrast Normalization
print("Global contrast normalization...")
for i in range(0, len(train_features)):
    train_features[i] = global_contrast_normalization(train_features[i])
    
for i in range(0, len(test_features)):
    test_features[i] = global_contrast_normalization(test_features[i])

Global contrast normalization...


## Show Examples from Each Class

In [24]:
def show_examples(features, labels):

    class_names = ['airplane','automobile','bird','cat','deer',
                   'dog','frog','horse','ship','truck']
    fig = plt.figure(figsize=(8,3))
    for i in range(num_classes):
        ax = fig.add_subplot(2, 5, 1 + i, xticks=[], yticks=[])
        idx = np.where(labels[:]==i)[0]
        features_idx = features[idx,::]
        img_num = np.random.randint(features_idx.shape[0])
        im = np.transpose(features_idx[img_num,::], (1, 2, 0))
        ax.set_title(class_names[i])
        plt.imshow(im)
    plt.show()
    
show_examples(train_features, train_labels)
show_examples(test_features, test_labels)

SyntaxError: invalid syntax (<ipython-input-24-a049f05cc591>, line 16)

In [None]:
i = 15

img = wimages[i]
#img = normalization(img, 1.0)
img = np.transpose(img, (1, 2, 0))
plt.imshow(img)
plt.show()

img = train_features[i]
img = np.transpose(img, (1, 2, 0))
plt.imshow(img)
plt.show()

## Function to plot model accuracy and loss 

In [None]:
def plot_model_history(model_history):
    fig, axs = plt.subplots(1,2,figsize=(15,5))
    # summarize history for accuracy
    axs[0].plot(range(1,len(model_history.history['acc'])+1),model_history.history['acc'])
    axs[0].plot(range(1,len(model_history.history['val_acc'])+1),model_history.history['val_acc'])
    axs[0].set_title('Model Accuracy')
    axs[0].set_ylabel('Accuracy')
    axs[0].set_xlabel('Epoch')
    axs[0].set_xticks(np.arange(1,len(model_history.history['acc'])+1),len(model_history.history['acc'])/10)
    axs[0].legend(['train', 'val'], loc='best')
    # summarize history for loss
    axs[1].plot(range(1,len(model_history.history['loss'])+1),model_history.history['loss'])
    axs[1].plot(range(1,len(model_history.history['val_loss'])+1),model_history.history['val_loss'])
    axs[1].set_title('Model Loss')
    axs[1].set_ylabel('Loss')
    axs[1].set_xlabel('Epoch')
    axs[1].set_xticks(np.arange(1,len(model_history.history['loss'])+1),len(model_history.history['loss'])/10)
    axs[1].legend(['train', 'val'], loc='best')
    plt.show()

## Funtion to compute test accuracy

In [None]:
def accuracy(test_x, test_y, model):
    result = model.predict(test_x)
    predicted_class = np.argmax(result, axis=1)
    true_class = np.argmax(test_y, axis=1)
    num_correct = np.sum(predicted_class == true_class) 
    accuracy = float(num_correct)/result.shape[0]
    return (accuracy * 100)

## Function to train the network

In [None]:
def train(model, callbacks=None, nb_epoch=10, batch_size=128, verbose=1, lr_start=1e-3, lr_end=1e-4):
    # Train the model
    
    lr_decay = (lr_end / lr_start)**(1. / nb_epoch)
    
    if callbacks==None:
        lr_scheduler = LearningRateScheduler(lambda e: lr_start * lr_decay ** e)
        callbacks = [lr_scheduler]
    
    start = time.time()
    model_info = model.fit(train_features, train_labels, 
                           batch_size=batch_size, nb_epoch=nb_epoch, 
                           validation_data = (test_features, test_labels), 
                           verbose=verbose, callbacks=callbacks)
    end = time.time()
    # plot model history
    plot_model_history(model_info)
    print "Model took %0.2f seconds to train"%(end - start)
    # compute test accuracy
    print "Accuracy on test data is: %0.2f"%accuracy(test_features, test_labels, model)
    
    
def train_processing(model, callbacks=None, nb_epoch=10, batch_size=128, verbose=1, lr_start=1e-3, lr_end=1e-4, zca_whitening=True):
    # Train the model
    
    lr_decay = (lr_end / lr_start)**(1. / nb_epoch)
    
    if callbacks==None:
        lr_scheduler = LearningRateScheduler(lambda e: lr_start * lr_decay ** e)
        callbacks = [lr_scheduler]
    
    print("Pre-processing data...")
    datagen = ImageDataGenerator(zca_whitening=zca_whitening)
    # compute quantities required for featurewise normalization
    # (std, mean, and principal components if ZCA whitening is applied)
    datagen.fit(train_features)
    
    start = time.time()
    model_info = # fits the model on batches with real-time data augmentation:
    model.fit_generator(datagen.flow(train_features, train_labels, batch_size=batch_size),
                        steps_per_epoch=len(train_features) / nb_epoch, epochs=nb_epoch,
                        verbose=verbose, callbacks=callbacks)
    end = time.time()
    # plot model history
    plot_model_history(model_info)
    print "Model took %0.2f seconds to train"%(end - start)
    # compute test accuracy
    print "Accuracy on test data is: %0.2f"%accuracy(test_features, test_labels, model)

    

## Save and load trained model

In [None]:
def save_weights(model, h5_file):
    # serialize weights to HDF5
    model.save_weights(h5_file)
    print("Saved model to disk")
    
def load_weights(model, h5_file):
    # load weights into new model
    model.load_weights(h5_file)
    print("Loaded model from disk")
    #return model

## Simple network, without binarization

In [None]:
num_unit = 100
num_hidden = 3
lr_start = 1e-3

# Define the model
model = Sequential()

# Simple layer
model.add(Flatten(input_shape=(3, 32, 32)))
for _ in range(num_hidden):
    model.add(Dense(num_unit, activation='tanh'))

# Output layer
model.add(Dense(num_classes, activation='softmax')) # 10SVM (L2-SVM) coming soon

# Compile the model
model.compile(loss='squared_hinge', optimizer=Adam(lr=lr_start), metrics=['acc'])
#model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model
#train(model)

# plot_model(model, to_file='simple_model.png')

## Paper's architecture, without binarization

In [None]:
h5_file = "paper_nb.h5"
from keras.regularizers import l2

# Define the model
model = Sequential()

# (2 * 128C3)
model.add(Convolution2D(128, 3, 3, border_mode='same', input_shape=(3, 32, 32), activation='relu'))
model.add(Convolution2D(128, 3, 3, activation='relu'))

# MP2
model.add(MaxPooling2D(pool_size=(2, 2)))
# model.add(Dropout(0.25)) # test

# (2 * 256C3)
model.add(Convolution2D(256, 3, 3, border_mode='same', activation='relu'))
model.add(Convolution2D(256, 3, 3, activation='relu'))

# MP2
model.add(MaxPooling2D(pool_size=(2, 2)))

# (2 * 512C3)
model.add(Convolution2D(512, 3, 3, border_mode='same', activation='relu'))
model.add(Convolution2D(512, 3, 3, activation='relu'))

# MP2
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Flatten()) # Flat before FC

# (2 * 1024FC)
model.add(Dense(1024, activation='relu'))
model.add(Dense(1024, activation='relu'))

# Output layer
model.add(Dense(num_classes, activation='linear', W_regularizer=l2(0.01))) # 10SVM (L2-SVM) coming soon

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model
#train(model)

# Load the model
# model = load(model, h5_file)

# Save the model
#save(model, h5_file)

model.summary()

## Simple network, with binarization

In [None]:
deterministic = True # deterministic or stochastic binarization

H = 'Glorot'

# network
num_unit = 100
num_hidden = 3
use_bias = False

# BN
epsilon = 1e-6
momentum = 0.9

# dropout
drop_in = 0.2
drop_hidden = 0.5

# lr
lr_start = 1e-3

model = Sequential()
model.add(DropoutNoScale(drop_in, input_shape=(3, 32, 32), name='drop0'))
model.add(Flatten())
for i in range(num_hidden):
    model.add(BinaryDense(num_unit, H=H, use_bias=use_bias, # deterministic=deterministic
              name='dense{}'.format(i+1)))
    model.add(BatchNormalization(epsilon=epsilon, momentum=momentum, name='bn{}'.format(i+1)))
    model.add(Activation(binary_tanh, name='act{}'.format(i+1)))
    model.add(DropoutNoScale(drop_hidden, name='drop{}'.format(i+1)))
# Output layer
model.add(BinaryDense(10, H=H, use_bias=use_bias, # deterministic=deterministic
          name='dense'))
model.add(BatchNormalization(epsilon=epsilon, momentum=momentum, name='bn'))

model.summary()
 
model.compile(loss='squared_hinge', optimizer=Adam(lr=lr_start), metrics=['acc'])
#model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model
#train(model, lr_start=lr_start)

model = load_weights(model, "test.h5")

## Binary weights complex

In [None]:
# Define the model
#model = Sequential()

"""
model.add(BinaryConv2D(128, deterministic=deterministic, kernel_size=kernel_size, input_shape=(channels, img_rows, img_cols),
                       data_format='channels_first',
                       H=H, kernel_lr_multiplier=kernel_lr_multiplier, 
                       padding='same', use_bias=use_bias, name='conv1'))
model.add(BatchNormalization(epsilon=epsilon, momentum=momentum, axis=1, name='bn1'))
"""

# Load the model
# model = load(model, h5_file)

# Save the model
#save(model, yaml_file, h5_file)

## Add Data Augmentation

In [None]:
from keras.preprocessing.image import ImageDataGenerator

datagen = ImageDataGenerator(zoom_range=0.2, 
                             horizontal_flip=True)


# train the model
start = time.time()
# Train the model
model_info = model.fit_generator(datagen.flow(train_features, train_labels, batch_size = 128),
                                 samples_per_epoch = train_features.shape[0], nb_epoch = 200, 
                                 validation_data = (test_features, test_labels), verbose=0)
end = time.time()
print "Model took %0.2f seconds to train"%(end - start)
# plot model history
plot_model_history(model_info)
# compute test accuracy
print "Accuracy on test data is: %0.2f"%accuracy(test_features, test_labels, model)