In [0]:
import keras
import cv2
import numpy as np
import tensorflow as tf
from sklearn.metrics import accuracy_score
from keras.datasets import mnist
from keras.applications.vgg16 import VGG16
from keras.layers import Dense, Dropout, Flatten, Activation
from keras.models import Sequential
from keras.callbacks import EarlyStopping
from tqdm import tqdm

In [0]:
batch_size = 64
epochs = 35
IMAGE_WIDTH = 32
IMAGE_HEIGHT = 32
NUM_CLASSES = 10
NUM_MODELS = 5
TRAIN_SET_SIZE = 2500

# Set seeds

In [0]:
np.random.seed(1)
tf.random.set_seed(1)

# Preprocess

In [0]:
def preprocess(imgs):
    
    processed = []
    
    for img in tqdm(imgs):
        processed.append(cv2.resize(img, (IMAGE_WIDTH, IMAGE_HEIGHT), interpolation = cv2.INTER_AREA))
    
    proccessed = np.array(processed)
    return proccessed.reshape(proccessed.shape[0], IMAGE_WIDTH, IMAGE_HEIGHT, 1)

In [0]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()

# Decrease the training set size
idx = np.random.choice(len(x_train), size=TRAIN_SET_SIZE, replace=False)
x_train = x_train[idx]
y_train = y_train[idx]

x_train = preprocess(x_train)
x_test = preprocess(x_test)

print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')

100%|██████████| 60000/60000 [00:00<00:00, 128918.72it/s]
100%|██████████| 10000/10000 [00:00<00:00, 141659.74it/s]

x_train shape: (60000, 32, 32, 1)
60000 train samples
10000 test samples





In [0]:
# Convert class vectors to binary class matrices.
y_train = keras.utils.to_categorical(y_train, NUM_CLASSES)
y_test = keras.utils.to_categorical(y_test, NUM_CLASSES)

In [0]:
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255

# Model

In [0]:
def create_model():
    conv_base = VGG16(input_shape = (IMAGE_HEIGHT,IMAGE_WIDTH, 1),
                            include_top = False, weights = None, classes=NUM_CLASSES)

    conv_base.trainable = True

    model = Sequential()

    model.add(conv_base)
    model.add(Dropout(0.2))
    model.add(Flatten())
    model.add(Dense(512))
    model.add(Activation('relu'))
    model.add(Dense(NUM_CLASSES))
    model.add(Activation('softmax'))

    optimizer = keras.optimizers.Adam(learning_rate=0.0001)

    model.compile(loss='categorical_crossentropy',
              optimizer=optimizer,
              metrics=['categorical_accuracy']
              )
    
    return model

# Train

In [0]:
models = []

for i in range(NUM_MODELS):

    print(f"Train model {i}")
    idx = np.random.choice(len(x_train), size=len(x_train), replace=True)

    x_train_model = x_train[idx]
    y_train_model = y_train[idx]

    model = create_model()
    
    es = EarlyStopping(monitor='val_categorical_accuracy', mode='max', min_delta=0.01, patience=3)
    model.fit(x_train_model,y_train_model,
              batch_size = batch_size,
              epochs = epochs,
              validation_data = (x_test,y_test),
              shuffle = True,
              callbacks=[es])
    models.append(model)

Train model 0
Train on 60000 samples, validate on 10000 samples
Epoch 1/35
Epoch 2/35
Epoch 3/35
Epoch 4/35
Epoch 5/35
Train model 1
Train on 60000 samples, validate on 10000 samples
Epoch 1/35
Epoch 2/35
Epoch 3/35
Epoch 4/35
Train model 2
Train on 60000 samples, validate on 10000 samples
Epoch 1/35
Epoch 2/35
Epoch 3/35
Epoch 4/35
Epoch 5/35
Epoch 6/35
Epoch 7/35
Train model 3
Train on 60000 samples, validate on 10000 samples
Epoch 1/35
Epoch 2/35
Epoch 3/35
Epoch 4/35
Epoch 5/35
Epoch 6/35
Epoch 7/35
Train model 4
Train on 60000 samples, validate on 10000 samples
Epoch 1/35
Epoch 2/35
Epoch 3/35
Epoch 4/35
Epoch 5/35


# Predict

In [0]:
predictions = []

for m in tqdm(models):
    predictions.append(np.argmax(m.predict(x_test), axis=1))

100%|██████████| 5/5 [00:25<00:00,  5.10s/it]


In [0]:
prediction = np.transpose(predictions)
prediction = np.apply_along_axis(lambda x: np.bincount(x).argmax(), axis=1, arr=prediction)

print('Test accuracy:', accuracy_score(prediction, np.argmax(y_test, axis=1)))

Test accuracy: 0.993


# Correlation between models

In [0]:
from scipy.stats import pearsonr
import pandas as pd

correlation_matrix = []

for ix, x in enumerate(predictions):
  row = []
  
  for iy, y in enumerate(predictions):
    if (ix == iy):
      row.append(np.nan)
    else:
      row.append(pearsonr(x,y)[0])

  correlation_matrix.append(row)

correlation_matrix = np.array(correlation_matrix)
display(pd.DataFrame(correlation_matrix))
print("Average correlation: " + str(np.nanmean(correlation_matrix.flatten())))

Unnamed: 0,0,1,2,3,4
0,,0.980346,0.987144,0.984345,0.982658
1,0.980346,,0.980168,0.982352,0.980815
2,0.987144,0.980168,,0.985582,0.983056
3,0.984345,0.982352,0.985582,,0.986408
4,0.982658,0.980815,0.983056,0.986408,


Average correlation: 0.9832875621470725
