In [1]:
import keras
import cv2
import numpy as np
import pandas as pd
import os
import scipy
import tensorflow as tf
from tensorflow.keras import initializers
from itertools import count
from sklearn.metrics import accuracy_score
from keras.datasets import mnist
from keras.applications.vgg16 import VGG16
from keras import layers
from keras.layers import Dense, Dropout, Flatten, Activation, Input, Conv2D, MaxPooling2D, BatchNormalization, GlobalAveragePooling2D
from keras.models import Model
from sklearn.model_selection import train_test_split
from keras.callbacks import EarlyStopping, CSVLogger
from scipy.stats import pearsonr
from tqdm import tqdm
from IPython.display import clear_output

Using TensorFlow backend.


In [0]:
BATCH_SIZE = 128
EPOCHS = 9999
IMAGE_SIZE = 28
NUM_CLASSES = 10
NUM_CHANNELS = 1
MODEL_NAME = "MNIST_weight_init"
PATH = ""
NR_OF_RUNS = 10

# Preprocess

In [0]:
def preprocess(imgs):
    
    return imgs.reshape(imgs.shape[0], IMAGE_SIZE, IMAGE_SIZE, 1)

In [4]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()

x_train = preprocess(x_train)
x_test = preprocess(x_test)

print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')

Downloading data from https://s3.amazonaws.com/img-datasets/mnist.npz
x_train shape: (60000, 28, 28, 1)
60000 train samples
10000 test samples


In [0]:
# Convert class vectors to binary class matrices.
y_trainc = keras.utils.to_categorical(y_train, NUM_CLASSES)
y_testc = keras.utils.to_categorical(y_test, NUM_CLASSES)

In [0]:
x_train_full = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train_full /= 255
x_test /= 255

# Model

In [0]:
def MNISTmodel(imsize, num_classes, num_channels):
    inputs = Input((imsize,imsize,num_channels))
    x = Conv2D(filters = 32, kernel_size = (3,3), activation = 'relu', strides = 2)(inputs)
    x = BatchNormalization()(x)
    x = MaxPooling2D(pool_size = (2,2), strides=(2,2), padding = "same")(x)
    x = Conv2D(filters=32, kernel_size=(1,1), activation='relu', padding='valid')(x)
    x = Conv2D(filters = 10, kernel_size = (1,1),strides = (1,1), padding = 'valid')(x)
    x = GlobalAveragePooling2D()(x)
    outputs = Activation('softmax')(x)
    
    model = Model(inputs=inputs, outputs=outputs)
    
    optimizer = keras.optimizers.Adam(learning_rate = 1e-04)

    model.compile(loss='categorical_crossentropy',
                      optimizer=optimizer,
                      metrics=['accuracy'])
    return model

# Predict

In [0]:
def hard_voting(models, X):
    predictions = []

    for m in models:
        predictions.append(np.argmax(m.predict(X), axis=1))

    prediction = np.transpose(predictions)
    prediction = np.apply_along_axis(lambda x: np.bincount(x).argmax(), axis=1, arr=prediction)

    return prediction

def soft_voting(models, X):
    predictions = np.empty((len(X),0,NUM_CLASSES))

    for m in models:
        pred = np.expand_dims(m.predict(X), axis=1)
        predictions = np.append(predictions, pred, axis=1)

    predictions = np.apply_along_axis(np.transpose, axis=1, arr=predictions)
    predictions = np.mean(predictions, axis=1)
    prediction = np.argmax(predictions, axis=1)

    return prediction

def predict(models, X, Y,voting = 'hard'):
    
    if voting == "soft":
      prediction = soft_voting(models, X)
    elif voting == "hard":
      prediction = hard_voting(models, X)
    else:
      raise ValueError(f"Voting mechanism: {VOTING} not supported")

    return accuracy_score(prediction, np.argmax(Y, axis=1))

# Train

In [9]:
for run in range(1, NR_OF_RUNS+1):
    
    # Split the data
    x_train, x_val, y_train, y_val = train_test_split(x_train_full, y_trainc, test_size=0.20, shuffle= True)
    
    models = []
    accuracies = []
    predictions = []
    initializer = ["Zero","Ones","Random Normal","Random Uniform","Identity","Orthogonal","Glorot Normal","Glorot Uniform"]
    for i in range(len(initializer)):
        print(f"\n ===== Train model: Weight init method: {initializer[i]}  =====")
        
        # Set the seeds
        np.random.seed(run*i)
        tf.random.set_seed(run*i)

        # Create directories
        os.makedirs(PATH + MODEL_NAME + f"/{run}/history", exist_ok=True)
        os.makedirs(PATH + MODEL_NAME + f"/{run}/weights", exist_ok=True)
        
        # weight init method
        model = MNISTmodel(IMAGE_SIZE,NUM_CLASSES,NUM_CHANNELS)
    
        for layer in model.layers: 
            if hasattr(layer, 'kernel_initializer'):
                if(initializer[i] == "Zero"):
                    layer.kernel_initializer = initializers.Zeros()
                elif(initializer[i] == "Ones"):
                    layer.kernel_initializer = initializers.Ones()
                elif(initializer[i] == "Random Normal"):
                    layer.kernel_initializer = initializers.RandomNormal()
                elif(initializer[i] == "Random Unifrom"):
                    layer.kernel_initializer = initializers.RandomUniform()
                elif(initializer[i] == "Identity"):
                    layer.kernel_initializer = initializers.Identity()
                elif(initializer[i] == "Orthogonal"):
                    layer.kernel_initializer = initializers.Orthogonal()
                elif(initializer[i] == "Glorot Normal"):
                    layer.kernel_initializer = initializers.GlorotNormal()
                elif(initializer[i] == "Glorot Unifrom"):
                    layer.kernel_initializer = initializers.GlorotUnifrom()
            
        #save weights 
        weights_path = PATH + MODEL_NAME + f"/{run}/weights/weights-{initializer[i]}.h5"
            
        #save weights 
        weights_path = PATH + MODEL_NAME + f"/{run}/weights/weights-{initializer[i]}.h5"
        if os.path.exists(weights_path):
            print(f"Skipping training of model {initializer[i]}: weights exists")
            model.load_weights(weights_path)
        else:
            # initiate early stopping
            es = EarlyStopping(min_delta=0.01, patience=3)
            csv_logger = CSVLogger(PATH + MODEL_NAME + f"/{run}/history/history-{initializer[i]}.csv", separator=';')
            #train
            model.fit(x_train,y_train,
                      batch_size = BATCH_SIZE,
                      epochs = EPOCHS,
                      validation_data = (x_val,y_val),
                      shuffle = True,
                      callbacks=[es, csv_logger])
            model.save_weights(weights_path)
            
        models.append(model)
        y_prob = model.predict(x_test) 
        predictions.append(y_prob.argmax(axis=-1))
        acc = model.evaluate(x_test,y_testc)[1]
        accuracies.append(acc)

        print(f"Model: {initializer[i]} added. Resulting score: {acc}")
        
    # Results  
        
    # Accuracy vs Weight initialization method
        
    print("\n ===== Accuracy vs weight init methods =====")
    accuracy_df = pd.DataFrame(accuracies, columns=["Accuracy"])
    accuracy_df["weight_init_method"] = initializer
    display(accuracy_df)
    accuracy_df.to_csv(PATH + MODEL_NAME + f"/{run}/accuracy.csv")
        
    print("\n ===== Converting Binary classification =====")
    classified = []
    for prediction in tqdm(predictions):
        classified.append([1 if i==j else 0 for i,j in zip(prediction,y_test)])
        
    ## Correlation between models
    print("\n ===== Correlation =====")  
    correlation_matrix = []

    for ix, x in enumerate(classified):
        row = []
  
        for iy, y in enumerate(classified):
            if (ix == iy):
                row.append(np.nan)
            else:
                row.append(pearsonr(x,y)[0])

        correlation_matrix.append(row)

    correlation_matrix = np.array(correlation_matrix)
    correlation_matrix_df = pd.DataFrame(correlation_matrix)
    correlation_matrix_df.columns = initializer
    correlation_matrix_df.index = initializer
    correlation_matrix_df.to_csv(PATH + MODEL_NAME + f"/{run}/correlation_matrix.csv")
    display(correlation_matrix_df)
    correlation = np.nanmean(correlation_matrix.flatten())
    print("Average correlation: " + str(correlation))
    
    
    print("\n ===== Computing ensemble accuracy =====")  
    # Ensemble accuracy
    accuracy_hard = predict(models, x_test, y_testc,voting = 'hard')
    print("Accuracy of ensemble using hard voting: " + str(accuracy_hard))
    accuracy_soft = predict(models, x_test, y_testc,voting = 'soft')
    print("Accuracy of ensemble using soft voting: " + str(accuracy_soft))
    
    
    print("\n ===== Computing ensemble accuracy =====")  
    # Save the results
    file = PATH + MODEL_NAME + f"/results_.csv"
    df = pd.DataFrame([[run,correlation,accuracy_hard,accuracy_soft]])

    if not os.path.isfile(file):
        df.to_csv(file, header=["run", "correlation","accuracy_hard_voting","accuracy_soft_voting"], index=False)
    else: # else it exists so append without writing the header
        df.to_csv(file, mode='a', header=False, index=False)

    clear_output(wait=True)


 ===== Train model: Weight init method: Zero  =====
Train on 48000 samples, validate on 12000 samples
Epoch 1/9999
Epoch 2/9999
Epoch 3/9999
Epoch 4/9999
Epoch 5/9999
Epoch 6/9999
Epoch 7/9999
Epoch 8/9999
Epoch 9/9999
Epoch 10/9999
Epoch 11/9999
Epoch 12/9999
Epoch 13/9999
Epoch 14/9999
Epoch 15/9999
Epoch 16/9999
Epoch 17/9999
Epoch 18/9999
Epoch 19/9999
Epoch 20/9999
Epoch 21/9999
Epoch 22/9999
Epoch 23/9999
Epoch 24/9999
Epoch 25/9999
Epoch 26/9999
Epoch 27/9999
Epoch 28/9999
Epoch 29/9999
Epoch 30/9999
Epoch 31/9999
Epoch 32/9999
Epoch 33/9999
Epoch 34/9999
Epoch 35/9999
Epoch 36/9999
Epoch 37/9999
Epoch 38/9999
Epoch 39/9999
Epoch 40/9999
Epoch 41/9999
Epoch 42/9999
Epoch 43/9999
Epoch 44/9999
Epoch 45/9999
Epoch 46/9999
Epoch 47/9999
Epoch 48/9999
Epoch 49/9999
Epoch 50/9999
Epoch 51/9999
Epoch 52/9999
Epoch 53/9999
Epoch 54/9999
Epoch 55/9999
Epoch 56/9999
Epoch 57/9999
Epoch 58/9999
Epoch 59/9999
Epoch 60/9999
Epoch 61/9999
Epoch 62/9999
Epoch 63/9999
Epoch 64/9999
Epoch 65/9

Unnamed: 0,Accuracy,weight_init_method
0,0.7388,Zero
1,0.7715,Ones
2,0.7639,Random Normal
3,0.752,Random Uniform
4,0.746,Identity
5,0.7538,Orthogonal
6,0.7817,Glorot Normal
7,0.7483,Glorot Uniform


100%|██████████| 8/8 [00:00<00:00, 280.54it/s]


 ===== Converting Binary classification =====

 ===== Correlation =====





Unnamed: 0,Zero,Ones,Random Normal,Random Uniform,Identity,Orthogonal,Glorot Normal,Glorot Uniform
Zero,,0.587803,0.616055,0.626345,0.686928,0.634591,0.612675,0.587243
Ones,0.587803,,0.648594,0.627244,0.638305,0.638795,0.627977,0.616771
Random Normal,0.616055,0.648594,,0.640386,0.644964,0.626787,0.623366,0.632491
Random Uniform,0.626345,0.627244,0.640386,,0.659673,0.639875,0.622561,0.659369
Identity,0.686928,0.638305,0.644964,0.659673,,0.669593,0.642048,0.645626
Orthogonal,0.634591,0.638795,0.626787,0.639875,0.669593,,0.658887,0.620618
Glorot Normal,0.612675,0.627977,0.623366,0.622561,0.642048,0.658887,,0.593793
Glorot Uniform,0.587243,0.616771,0.632491,0.659369,0.645626,0.620618,0.593793,


Average correlation: 0.6331914074507132

 ===== Computing ensemble accuracy =====
Accuracy of ensemble using hard voting: 0.7865
Accuracy of ensemble using soft voting: 0.7939

 ===== Computing ensemble accuracy =====


In [10]:
!zip -r /content/MNIST_weight_init.zip /content/MNIST_weight_init

  adding: content/MNIST_weight_init/ (stored 0%)
  adding: content/MNIST_weight_init/9/ (stored 0%)
  adding: content/MNIST_weight_init/9/correlation_matrix.csv (deflated 65%)
  adding: content/MNIST_weight_init/9/accuracy.csv (deflated 33%)
  adding: content/MNIST_weight_init/9/weights/ (stored 0%)
  adding: content/MNIST_weight_init/9/weights/weights-Identity.h5 (deflated 71%)
  adding: content/MNIST_weight_init/9/weights/weights-Orthogonal.h5 (deflated 71%)
  adding: content/MNIST_weight_init/9/weights/weights-Ones.h5 (deflated 71%)
  adding: content/MNIST_weight_init/9/weights/weights-Glorot Uniform.h5 (deflated 71%)
  adding: content/MNIST_weight_init/9/weights/weights-Zero.h5 (deflated 71%)
  adding: content/MNIST_weight_init/9/weights/weights-Glorot Normal.h5 (deflated 71%)
  adding: content/MNIST_weight_init/9/weights/weights-Random Uniform.h5 (deflated 71%)
  adding: content/MNIST_weight_init/9/weights/weights-Random Normal.h5 (deflated 71%)
  adding: content/MNIST_weight_init