# Deep ensemble for uncertainty estimation

https://medium.com/@albertoarrigoni/paper-review-code-deep-ensembles-nips-2017-c5859070b8ce
https://github.com/vvanirudh/deep-ensembles-uncertainty/blob/master/model.py
https://github.com/muupan/deep-ensemble-uncertainty/blob/master/train_ensemble.ipynb


## Import & preprocess the data

In [7]:
import sys
import os
import pandas as pd
import numpy as np

module_path = os.path.abspath(os.path.join('../..'))

sys.path.insert(1, module_path + '/src')

from sklearn import preprocessing
from keras.utils import np_utils
from sktime.utils.data_io import load_from_tsfile_to_dataframe
from sktime.utils.data_processing import from_nested_to_2d_array

In [38]:
le = preprocessing.LabelEncoder()

#X_train, y_train = load_from_tsfile_to_dataframe(module_path + '/features/extracted_features_ts_files/uit_MiniROCKET_TRAIN.ts')
#X_test, y_test = load_from_tsfile_to_dataframe(module_path + '/features/extracted_features_ts_files/uit_MiniROCKET_TEST.ts')

X_train, y_train_ = load_from_tsfile_to_dataframe(module_path + '/data/ts_files/UiT_5s_noOverlap_TRAIN.ts')
X_test, y_test_ = load_from_tsfile_to_dataframe(module_path + '/data/ts_files/UiT_5s_noOverlap_TEST.ts')
X_val, y_val_ = load_from_tsfile_to_dataframe(module_path + '/data/ts_files/UiT_5s_noOverlap_VAL.ts')


X_train = from_nested_to_2d_array(X_train)
X_test = from_nested_to_2d_array(X_test)
X_val = from_nested_to_2d_array(X_val)

X_train.columns = np.arange(len(X_train.columns))
X_test.columns = np.arange(len(X_test.columns))
X_val.columns = np.arange(len(X_val.columns))

y_train_ = pd.Series(y_train_)
y_test_ = pd.Series(y_test_)
y_val_ = pd.Series(y_val_)

min_max_scaler = preprocessing.MinMaxScaler(feature_range=(-1, 1))
scaler = min_max_scaler.fit(X_train)
X_train = min_max_scaler.transform(X_train)
X_test = min_max_scaler.transform(X_test)
X_val = min_max_scaler.transform(X_val)

X_train = X_train.reshape(X_train.shape[0],X_train.shape[1], 1)
X_test = X_test.reshape(X_test.shape[0],X_test.shape[1], 1)
X_val = X_val.reshape(X_val.shape[0],X_val.shape[1], 1)

In [9]:
le = preprocessing.LabelEncoder()

le.fit(y_train_)
num_classes = len(le.classes_)
y_train = le.transform(y_train_)
y_test = le.transform(y_test_)
y_val = le.transform(y_val_)

y_train = np_utils.to_categorical(y_train, num_classes = num_classes)
y_test = np_utils.to_categorical(y_test, num_classes = num_classes)
y_val = np_utils.to_categorical(y_val, num_classes = num_classes)

## Modified Layers in NN

A proper scoring rule needs to be used. In this case the final layer of the neural network should output the mean and the variance, to take into account the predictive uncertainty. 

The custom loss function treats the observed value as a sample from a Gaussian distribution. Using the predicted mean and variance the negative log loss is calculated as the custom loss function

In [332]:
from tensorflow import math
from keras import backend as K
from keras.layers import Input, Dense, Layer, Dropout, Conv1D, Flatten, MaxPooling1D
from keras.models import Model
from keras.initializers import glorot_normal
import numpy as np


def custom_loss(sigma):
    def gaussian_loss(y_true, y_pred):
        fir = 0.5*math.log(sigma)
        sec = 0.5*math.divide(math.square(y_true - y_pred), sigma)
        res = math.reduce_mean(fir + sec, axis = -1) + 1e-6
        return res
    return gaussian_loss



class GaussianLayer(Layer):
    def __init__(self, output_dim, **kwargs):
        self.output_dim = output_dim
        super(GaussianLayer, self).__init__(**kwargs)
    def build(self, input_shape):
        self.kernel_1 = self.add_weight(name='kernel_1', 
                                      shape=(30, self.output_dim),
                                      initializer=glorot_normal(),
                                      trainable=True)
        self.kernel_2 = self.add_weight(name='kernel_2', 
                                      shape=(30, self.output_dim),
                                      initializer=glorot_normal(),
                                      trainable=True)
        self.bias_1 = self.add_weight(name='bias_1',
                                    shape=(self.output_dim, ),
                                    initializer=glorot_normal(),
                                    trainable=True)
        self.bias_2 = self.add_weight(name='bias_2',
                                    shape=(self.output_dim, ),
                                    initializer=glorot_normal(),
                                    trainable=True)
        super(GaussianLayer, self).build(input_shape) 
    def call(self, x):
        print(self.kernel_1.shape)
        output_mu  = K.dot(x, self.kernel_1) + self.bias_1
        output_sig = K.dot(x, self.kernel_2) + self.bias_2
        output_sig_pos = K.log(1 + K.exp(output_sig)) + 1e-06  
       
        return [output_mu, output_sig_pos]
    def compute_output_shape(self, input_shape):
        
        return [(input_shape[0], self.output_dim), (input_shape[0], self.output_dim)]


## Single network

In [333]:
from tensorflow.python.framework.ops import disable_eager_execution
disable_eager_execution()

input_shape=(X_train.shape[1], 1)

inputs = Input(shape=input_shape)
x = Conv1D(filters=10, kernel_size=3, activation='relu')(inputs)
x = MaxPooling1D(pool_size=50)(x)
x = Flatten()(x)
x = Dense(1500, activation='relu')(x)
x = Dense(500, activation='relu')(x)
x = Dense(30, activation='relu')(x)
mu, sigma = GaussianLayer(num_classes, name='main_output')(x)
model = Model(inputs, mu)
model.compile(loss=custom_loss(sigma), optimizer='adam')

(30, 5)


In [334]:
model.summary()

Model: "model_65"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_68 (InputLayer)        [(None, 25000, 1)]        0         
_________________________________________________________________
conv1d_52 (Conv1D)           (None, 24998, 10)         40        
_________________________________________________________________
max_pooling1d_51 (MaxPooling (None, 499, 10)           0         
_________________________________________________________________
flatten_51 (Flatten)         (None, 4990)              0         
_________________________________________________________________
dense_227 (Dense)            (None, 1500)              7486500   
_________________________________________________________________
dense_228 (Dense)            (None, 500)               750500    
_________________________________________________________________
dense_229 (Dense)            (None, 30)                150

Need to fix the batch problem in the custom loss function here!!!!

In [335]:
model.fit(X_train, y_train, epochs=50, batch_size = 10)

Train on 80 samples


<tensorflow.python.keras.callbacks.History at 0x7f839b5b2470>

In [86]:
layer_name = 'main_output' # Where to extract the output from
get_intermediate = K.function(inputs=[model.input], outputs=model.get_layer(layer_name).output)

In [113]:
np.array([X_train[:80][1]]).shape

(1, 25000, 1)

In [128]:
preds, sigmas = [], []
for j in range(len(X_test[:80])):
    mu, sigma = get_intermediate(np.array([X_test[:80][j]]))
    preds.append(mu[0])
    sigmas.append(sigma[0])
    
preds = np.array(preds)
sigmas = np.array(sigmas)

In [136]:
np.argmax(y_test[10])

4

In [137]:
np.argmax(preds[10].mean(axis = 0))

4

## Ensemble network

In [336]:
def create_trained_network(X_train, y_train):
    input_shape=(X_train.shape[1], 1)

    inputs = Input(shape=input_shape)
    x = Conv1D(filters=10, kernel_size=3, activation='relu')(inputs)
    x = MaxPooling1D(pool_size=50)(x)
    x = Flatten()(x)
    x = Dense(1500, activation='relu')(x)
    x = Dense(500, activation='relu')(x)
    x = Dense(30, activation='relu')(x)
    mu, sigma = GaussianLayer(num_classes, name='main_output')(x)
    model = Model(inputs, mu)
    model.compile(loss=custom_loss(sigma), optimizer='adam')
    model.fit(X_train, y_train, epochs=1, batch_size = 10)
    layer_name = 'main_output' # Where to extract the output from
    get_intermediate = K.function(inputs=[model.input], outputs=model.get_layer(layer_name).output)
    return get_intermediate



prediction_fns = []
for i in range(10):
    prediction_fns.append(create_trained_network(X_train, y_train))

(30, 5)
Train on 2322 samples
(30, 5)
Train on 2322 samples
(30, 5)
Train on 2322 samples
(30, 5)
Train on 2322 samples
(30, 5)
Train on 2322 samples
(30, 5)
Train on 2322 samples
(30, 5)
Train on 2322 samples
(30, 5)
Train on 2322 samples
(30, 5)
Train on 2322 samples
(30, 5)
Train on 2322 samples


In [341]:
preds, sigmas = [], []
for i in range(10):
    pred = prediction_fns[i](np.array([X_test[0]]))[0]
    sigma = prediction_fns[i](np.array([X_test[0]]))[1]
    
    preds.append(pred)
    sigmas.append(sigma)
    
preds = np.array(preds)
sigmas = np.array(sigmas)

In [365]:
def create_trained_network_with_adv(X_train, y_train):
    input_shape=(X_train.shape[1], 1)

    inputs = Input(shape=input_shape)
    outputs = Input(shape=input_shape) 
    x = Conv1D(filters=10, kernel_size=3, activation='relu')(inputs)
    x = MaxPooling1D(pool_size=50)(x)
    x = Flatten()(x)
    x = Dense(1500, activation='relu')(x)
    x = Dense(500, activation='relu')(x)
    x = Dense(30, activation='relu')(x)
    mu, sigma = GaussianLayer(num_classes, name='main_output')(x)
    model = Model(inputs, mu)
    model.compile(loss=custom_loss(sigma), optimizer='adam')
    model.fit(X_train, y_train, epochs=1, batch_size = 10)
    
    def gaussian_loss(y_true, y_pred, sigma):
        """
        Util function used to derive gradients w.r.t. to input data (for adversarial examples generation)
        """
        return math.reduce_mean(0.5*math.log(sigma) + 0.5*math.divide(math.square(y_true - y_pred), sigma)) + 1e-6
    
    
    #### ADVERSARIAL TRAINING EXAMPLES GENERATION
    loss_calc = gaussian_loss(outputs, mu, sigma)
    
    loss_gradients = tf.gradients(loss_calc, inputs)
    gr_sign = tf.sign(loss_gradients)
    adversarial_input_data = tf.add(inputs, 0.4 * gr_sign)
    
    print(adversarial_input_data[0])
    ####
    sess = tf.compat.v1.Session()
    init_op = tf.compat.v1.global_variables_initializer()
    sess.run(init_op)
    
    adversarial_input_data = sess.run(adversarial_input_data[0], feed_dict={inputs: X_train, outputs: y_train})[0]
    
    augmented_train_x = np.concatenate([X_train, adversarial_input_data.reshape(X_train.shape[0], 1)])
    augmented_train_y = np.concatenate([y_train, y_train])
    
    model.fit(augmented_train_x, augmented_train_y, epochs=1, verbose=0)
    
    get_intermediate = K.function(inputs=[model.input], outputs=model.get_layer(layer_name).output)
    
    return get_intermediate

In [358]:
model.summary()

Model: "model_65"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_68 (InputLayer)        [(None, 25000, 1)]        0         
_________________________________________________________________
conv1d_52 (Conv1D)           (None, 24998, 10)         40        
_________________________________________________________________
max_pooling1d_51 (MaxPooling (None, 499, 10)           0         
_________________________________________________________________
flatten_51 (Flatten)         (None, 4990)              0         
_________________________________________________________________
dense_227 (Dense)            (None, 1500)              7486500   
_________________________________________________________________
dense_228 (Dense)            (None, 500)               750500    
_________________________________________________________________
dense_229 (Dense)            (None, 30)                150

In [366]:
prediction_fns = []
for i in range(2):
    prediction_fns.append(create_trained_network_with_adv(X_train, y_train))

(30, 5)
Train on 2322 samples
Tensor("strided_slice_1:0", shape=(None, 25000, 1), dtype=float32)


ValueError: Cannot feed value of shape (2322, 5) for Tensor 'input_93:0', which has shape '(None, 25000, 1)'