# Predictive Uncertainty Estimation using Deep Ensemble (Regression)

This algorithm is implementation of paper [Simple and Scalable Predictive Uncertainty Estimation using Deep Ensembles](https://arxiv.org/abs/1612.01474). In this jupyter notebook, I will implement regression part of this paper using [Concrete Compressive Strength Dataset](https://archive.ics.uci.edu/ml/datasets/Concrete+Compressive+Strength)

## Import Modules

In [1]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import os
import scipy.io
import cv2
import random
import pandas

## Parameters

In [2]:
# Parameters of training
Learning_rate = 0.00025
epsilon = 1e-8

num_iter = 5000
batch_size = 128

test_ratio = 0.1
gpu_fraction = 0.5

# Ensemble networks
networks = ['network1', 'network2', 'network3', 'network4', 'network5']

# Import Excel File
data = pandas.read_excel('Concrete_Data.xls')
column_names = data.columns

num_rows = len(data)
num_columns = len(column_names) 
num_data = num_columns - 1

# Dense [input size, output size]
dense1 = [num_data, 100]
dense2 = [100, 100]
dense_mu  = [100, 1]
dense_sig = [100, 1]

## Get Concrete Dataset

In [3]:
data_x = np.zeros([num_rows, num_columns - 1])
data_y = np.zeros([num_rows, 1])

for i in range(num_rows):
    for j in range(num_columns - 1):
        data_x[i, j] = data[column_names[j]][i]
    data_y[i,0] = data[column_names[-1]][i]

num_train_data = int(num_rows * (1 - test_ratio))
num_test_data  = num_rows - num_train_data

train_x = data_x[:num_train_data, :]
train_y = data_y[:num_train_data, :]
test_x  = data_x[num_train_data:, :]
test_y  = data_y[num_train_data:, :]

print("Train data shape: " + str(train_x.shape))
print("Test data shape: " + str(test_x.shape))

Train data shape: (927, 8)
Test data shape: (103, 8)


## Functions

In [4]:
tf.reset_default_graph()

def weight_variable(name, shape):
    return tf.get_variable(name, shape = shape, initializer = tf.contrib.layers.xavier_initializer())

def bias_variable(name, shape):
    return tf.get_variable(name, shape = shape, initializer = tf.contrib.layers.xavier_initializer())

# Get networks
def get_network(network_name):
    input_x = tf.placeholder(tf.float32, shape = [None, num_data])
    
    with tf.variable_scope(network_name):
        # Densely connect layer variables
        w_fc1 = weight_variable(network_name + '_w_fc1', dense1)
        b_fc1 = bias_variable(network_name + '_b_fc1', [dense1[1]])
        
        w_fc2 = weight_variable(network_name + '_w_fc2', dense2)
        b_fc2 = bias_variable(network_name + '_b_fc2', [dense2[1]])
        
        w_fc_mu = weight_variable(network_name + '_w_fc_mu', dense_mu)
        b_fc_mu = bias_variable(network_name + '_b_fc_mu', [dense_mu[1]])

        w_fc_sig = weight_variable(network_name + '_w_fc_sig', dense_sig)
        b_fc_sig = bias_variable(network_name + '_b_fc_sig', [dense_sig[1]])

    # Network
    fc1 = tf.nn.relu(tf.matmul(input_x, w_fc1) + b_fc1)
    fc2 = tf.nn.relu(tf.matmul(fc1, w_fc2) + b_fc2)
    output_mu  = tf.matmul(fc2, w_fc_mu) + b_fc_mu
    output_sig = tf.matmul(fc2, w_fc_sig) + b_fc_sig
    
    y = tf.placeholder(tf.float32, shape = [None, 1])
    
    # Negative Log Likelihood(NLL) 
#     loss = tf.reduce_mean(tf.div(tf.log(tf.square(output_sig)), 2) + tf.div(tf.square(y - output_mu), 2 * tf.square(output_sig) + 1e-10)) 
    loss = tf.reduce_mean(tf.div(tf.log(tf.square(output_sig)), 2) + tf.div(tf.square(y - output_mu), 2 * tf.square(output_sig) + 1e-10)) 

    # Get trainable variables
    train_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, network_name) 
    
    train_opt = tf.train.AdamOptimizer(Learning_rate).minimize(loss, var_list = train_vars)
    
    return input_x, y, output_mu, output_sig, loss, train_opt, train_vars


# Make batch data 
def making_batch(data_size, sample_size, data_x, data_y):
    
    # Making batches(testing)
    batch_idx = np.random.choice(data_size, sample_size)
    
    batch_x = np.zeros([sample_size, num_data])
    batch_y = np.zeros([sample_size, 1])
        
    for i in range(batch_idx.shape[0]):
        batch_x[i,:] = data_x[batch_idx[i], :]
        batch_y[i,:] = data_y[batch_idx[i], :] 
        
    return batch_x, batch_y   

## Initialize Ensemble Networks

In [5]:
x_list = []
y_list = []
output_mu_list = []
output_sig_list = []
loss_list = []
train_list = []
train_var_list = []

# Train each ensemble network
for i in range(len(networks)):
    x_input, y, output_mu, output_sig, loss, train_opt, train_vars = get_network(networks[i])

    x_list.append(x_input)
    y_list.append(y)
    output_mu_list.append(output_mu)
    output_sig_list.append(output_sig)
    loss_list.append(loss)
    train_list.append(train_opt)
    train_var_list.append(train_vars)


## Create Session

In [6]:
# Create Session
config = tf.ConfigProto()
config.gpu_options.per_process_gpu_memory_fraction = gpu_fraction

sess = tf.InteractiveSession(config=config)
sess.run(tf.global_variables_initializer())

## Training

In [8]:
# Set parameters for printing and testing
num_print = 100
test_size = 10

train_data_num = train_x.shape[0]
test_data_num  = test_x.shape[0]

loss_train = np.zeros([len(networks)])
out_mu     = np.zeros([test_size, len(networks)])
out_sig    = np.zeros([test_size, len(networks)])

for iter in range(num_iter):
    # Making batches(testing)
    batch_x_test, batch_y_test = making_batch(test_data_num, test_size, test_x, test_y)
        
    for i in range(len(networks)):
        # Making batches(training)
        batch_x, batch_y = making_batch(train_data_num, batch_size, train_x, train_y)
       
        # Training
        _, loss, mu, sig = sess.run([train_list[i], loss_list[i], output_mu_list[i], output_sig_list[i]], 
                                 feed_dict = {x_list[i]: batch_x, y_list[i]: batch_y})
  
      
        # Testing
        loss_test, mu_test, sig_test = sess.run([loss_list[i], output_mu_list[i], output_sig_list[i]], 
                                         feed_dict = {x_list[i]: batch_x_test, y_list[i]: batch_y_test})
        
     
        loss_train[i] += loss
        out_mu[:, i] = np.reshape(mu_test, (test_size))
        out_sig[:, i] = np.reshape(sig_test, (test_size))
        
    # Get final test result
    out_mu_final = np.mean(out_mu, axis = 1)
    out_sig_final = np.mean(np.square(out_sig) + np.square(out_mu), axis = 1) - np.square(out_mu_final)
    
    if iter % num_print == 0 and iter != 0:
        print(('-------------------------') + ' Iteration: ' + str(iter) + ' -------------------------')
        print('Average Loss(NLL): ' + str(loss_train / num_print))
        print(out_mu_final[0])
        print(out_sig_final[0])
        print(batch_y_test[0])
#         print('Testing Accuracy: ' + str(acc_check_test / num_print))
#         print('Final Testing Accuracy: ' + str(acc_check_test_final / num_print))
        print('\n')
        
        loss_train = np.zeros(len(networks))

# plt.show()

------------------------- Iteration: 100 -------------------------
Average Loss(NLL): [ 5.72897389  5.17768682  5.82681759  4.28654823  5.46311496]
17.3536041737
46038.7768427
[ 52.82696164]


------------------------- Iteration: 200 -------------------------
Average Loss(NLL): [ 5.60411545  5.11667846  5.76882472  4.10027392  5.39878491]
14.2268810749
43609.3591583
[ 41.54230795]


------------------------- Iteration: 300 -------------------------
Average Loss(NLL): [ 5.50474209  5.10688853  5.76732743  4.00469336  5.38891404]
12.2560874939
41553.1356283
[ 33.39959639]


------------------------- Iteration: 400 -------------------------
Average Loss(NLL): [ 5.34824973  5.09637959  5.7671202   3.9220329   5.37609635]
15.0874347687
37782.8637372
[ 37.43165204]


------------------------- Iteration: 500 -------------------------
Average Loss(NLL): [ 5.03759049  5.08700925  5.76459293  3.83983398  5.36208223]
11.9280004501
39210.9609666
[ 38.46103971]


------------------------- Iteration

KeyboardInterrupt: 