# MC Dropout

In [1]:
%load_ext autoreload

In [2]:
import os, sys

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import accuracy_score

import tensorflow as tf
import tensorflow.keras as keras
from tensorflow.keras.models import  Sequential, Model
from tensorflow.keras.layers import Dense, Dropout, Flatten,Conv2D, MaxPooling2D, Input
from tensorflow.keras.datasets import mnist
import tensorflow.keras.backend as K

In [3]:
BASE_PATH = os.path.join(os.getcwd(), "..", "..")
MODULES_PATH = os.path.join(BASE_PATH, "modules")

sys.path.append(MODULES_PATH)
from models import setup_growth

In [4]:
setup_growth()

1 Physical GPU's,  1 Logical GPU's


In [5]:
batch_size = 128
num_classes = 10
epochs = 12

# Input image dimensions
img_rows, img_cols = 28, 28

In [6]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()

In [7]:
if K.image_data_format() == 'channels_first':
    x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols)
    x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols)
    input_shape = (1, img_rows, img_cols)
else:
    x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
    x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
    input_shape = (img_rows, img_cols, 1)

In [8]:
x_train = x_train.astype("float32")
x_test = x_test.astype("float32")
x_train /= 255
x_test /= 255

print("x_train shape: ", x_train.shape)
print(x_train.shape[0], "train samples")
print(x_test.shape[0], "test samples")

x_train shape:  (60000, 28, 28, 1)
60000 train samples
10000 test samples


In [9]:
# convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

In [10]:
def get_dropout(input_tensor, p=0.5, mc=False):
    if mc:
        return Dropout(p)(input_tensor, training=True)
    else:
        return Dropout(p)(input_tensor)


def get_model(mc=False, act="relu"):
    inp = Input(input_shape)
    x = Conv2D(32, kernel_size=(3, 3), activation=act)(inp)
    x = Conv2D(64, kernel_size=(3, 3), activation=act)(x)
    x = MaxPooling2D(pool_size=(2, 2))(x)
    x = get_dropout(x, p=0.25, mc=mc)
    x = Flatten()(x)
    x = Dense(128, activation=act)(x)
    x = get_dropout(x, p=0.5, mc=mc)
    out = Dense(num_classes, activation='softmax')(x)

    model = Model(inputs=inp, outputs=out)

    model.compile(loss=keras.losses.categorical_crossentropy,
                  optimizer=keras.optimizers.Adadelta(),
                  metrics=['accuracy'])
    return model


In [11]:
model = get_model()
mc_model = get_model(mc=True)

In [12]:
model.fit(x_train, y_train, batch_size=batch_size, epochs=10, verbose=1, validation_data=(x_test, y_test))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7f20240a2a90>

In [13]:
score = model.evaluate(x_test, y_test, verbose=0)

print("Test loss: ", score[0])
print("Test accuracy: ", score[1])

Test loss:  1.1152267456054688
Test accuracy:  0.7894999980926514


In [14]:
h_mc = mc_model.fit(x_train, y_train, batch_size=batch_size, epochs=10, verbose=1, validation_data=(x_test, y_test))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [15]:
import tqdm

mc_predictions = []
for i in tqdm.tqdm(range(500)):
    y_p = mc_model.predict(x_test, batch_size=1000)
    mc_predictions.append(y_p)
    

100%|██████████| 500/500 [01:18<00:00,  6.36it/s]


In [16]:
accs = []
for y_p in mc_predictions:
    acc = accuracy_score(y_test.argmax(axis=1), y_p.argmax(axis=1))
    accs.append(acc)
    
print("MC accuracy: {:.1%}".format(sum(accs)/len(accs)))

MC accuracy: 68.5%


In [17]:
mc_predictions[0].shape

(10000, 10)

In [18]:
stacked = np.stack(mc_predictions, axis=2)
stacked.shape

(10000, 10, 500)

In [19]:
stacked.shape

(10000, 10, 500)

In [20]:
maxes = stacked.argmax(axis=1)
maxes.shape

(10000, 500)

In [21]:
stacked.argmax(axis=1).shape

(10000, 500)

In [22]:
means = np.mean(stacked, axis=2)

In [23]:
accuracy_score(y_test.argmax(axis=1), means.argmax(axis=1))

0.8177

In [24]:
mc_model.evaluate(x_test, y_test)



[1.1333953142166138, 0.6891000270843506]

In [25]:
batches = np.split(stacked, 500, axis=2)

In [26]:
all_accs = []
for batch in batches:
    reshaped = batch.reshape(len(batch), 10)
    new_score = accuracy_score(y_test.argmax(axis=1), reshaped.argmax(axis=1))
    all_accs.append(new_score)

In [27]:
sum(all_accs)/len(all_accs)

0.6854134000000008

# Debugg Mc Dropout

In [28]:
%autoreload 2
from active_learning import TrainConfig
from bayesian import McDropout

In [29]:
%autoreload 2
wp_mc_model = McDropout(mc_model, TrainConfig())

In [31]:
predictions = wp_mc_model.demo_call(x_test, batch_size=1000)

In [32]:
predictions.shape

(10000, 10, 10)

In [138]:
%autoreload 2
batch_prediction = wp_mc_model(x_test[:20], n_times=1, batch_size=8)
batch_prediction.shape

Batches: 7
Batch-shape: (3, 28, 28, 1)
Batch-shape: (3, 28, 28, 1)
Batch-shape: (3, 28, 28, 1)
Batch-shape: (3, 28, 28, 1)
Batch-shape: (3, 28, 28, 1)
Batch-shape: (3, 28, 28, 1)
Batch-shape: (2, 28, 28, 1)


(20, 10)

In [114]:
label_predictions = np.argmax(batch_prediction, axis=-1)

In [116]:
label_predictions.shape

(10, 10)

array([[7, 7, 7, 7, 7, 7, 7, 7, 7, 7],
       [2, 2, 2, 2, 2, 2, 2, 2, 2, 2],
       [1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [4, 4, 4, 4, 4, 4, 4, 4, 4, 4],
       [1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
       [4, 4, 4, 4, 4, 4, 4, 4, 4, 4],
       [9, 9, 9, 9, 9, 9, 9, 9, 9, 9],
       [5, 5, 5, 5, 5, 5, 5, 5, 5, 5],
       [9, 9, 9, 9, 9, 9, 9, 9, 9, 9]])

In [121]:
np.mean(true_labels.T[:10] == label_predictions)

0.71

In [66]:
label_predictions = np.argmax(predictions, axis=-1)
label_predictions.shape

(10000, 10)

In [47]:
true_labels = np.argmax(y_test, axis=1)

In [54]:
true_labels = np.vstack([true_labels]*label_predictions.shape[-1])

In [55]:
true_labels.shape

(10, 10000)

In [59]:
true_labels.T

array([[7, 7, 7, ..., 7, 7, 7],
       [2, 2, 2, ..., 2, 2, 2],
       [1, 1, 1, ..., 1, 1, 1],
       ...,
       [4, 4, 4, ..., 4, 4, 4],
       [5, 5, 5, ..., 5, 5, 5],
       [6, 6, 6, ..., 6, 6, 6]])

In [61]:
label_predictions

array([[3, 2, 1, ..., 0, 9, 7],
       [3, 5, 1, ..., 0, 8, 9],
       [3, 5, 1, ..., 0, 8, 7],
       ...,
       [3, 4, 5, ..., 0, 8, 2],
       [3, 4, 9, ..., 0, 5, 2],
       [3, 4, 5, ..., 0, 1, 2]])

In [71]:
np.mean(label_predictions == true_labels.T)

0.14983

In [36]:
%autoreload 2
np.argmax(wp_mc_model(x_test[:10]), axis=1)

array([7, 2, 1, 0, 4, 1, 4, 9, 4, 7])

In [41]:
res1 = np.argmax(wp_mc_model(x_test[:10]), axis=1)
res1.shape

(10,)

In [42]:
res2 = np.argmax(wp_mc_model(x_test[:10]), axis=1)

In [43]:
res3 = np.argmax(wp_mc_model(x_test[:10]), axis=1)

In [44]:
accuracy_score(res1, np.argmax(y_test[:10], axis=1))

0.8

In [45]:
accuracy_score(res2, np.argmax(y_test[:10], axis=1))

0.5

In [46]:
accuracy_score(res3, np.argmax(y_test[:10], axis=1))

0.8

In [None]:
results = wp_mc_model.batch_prediction(x_test, n_times=10, batch_size=1000)

In [None]:
batches = np.split(results, 10, axis=1)

In [None]:
accs = []
for batch in batches:
    reshaped = batch.reshape(len(batch), 10)
    score = accuracy_score(y_test.argmax(axis=1), reshaped.argmax(axis=1))
    print(score)
    accs.append(score)

In [None]:
sum(accs)/len(accs)

In [None]:
g_model = wp_mc_model.get_model()

In [None]:
g_model.evaluate(x_test, y_test)

In [None]:
all_split = np.split(x_test, 100, axis=0)

In [None]:
all_split[0].shape

In [None]:
all_split = np.split(x_test, 1, axis=0)

In [None]:
all_split[0].shape