In [1]:
import numpy as np
import gzip
from sklearn.model_selection import train_test_split
from tensorflow.python.platform import gfile
from sklearn.preprocessing import normalize
import os
import tensorflow.compat.v1 as tf
tf.disable_eager_execution()
import logging
from __future__ import print_function, division, absolute_import, unicode_literals
from typing import Union, Any
import shutil
from tensorflow.keras.datasets import mnist
from tensorflow.keras.datasets import cifar10

from matplotlib import pyplot as plt
import scipy.ndimage as nd

%matplotlib inline
import pylab as pl
from IPython import display
from keras import backend as K_1, regularizers
from keras.engine.training import Model
from keras.layers import Conv2D, MaxPooling2D, Dropout, Flatten, Dense, \
    BatchNormalization, Activation, Input

In [2]:
class DataSet(object):
    def __init__(self, images, labels, probs, indices, reshape=True):
        
        if reshape:
            mean = np.mean(images,axis=(0,1,2,3))
            std =  np.std(images,axis=(0,1,2,3))
            images = (images - mean)/std
        
        self._images = images
        self._num_examples = images.shape[0]
        self._labels = labels
        self._probs = probs
        self._epochs_completed = 0
        self._index_in_epoch = 0
        self._counter = np.zeros(self._num_examples)
        self._indices = indices
        self._subset_size = images.shape[0]
        self._subset_ids = np.arange(self._num_examples)
        self._data_limit = None
        self._train_order = np.arange(self._num_examples)
        self._epoch_ids = None
        self._precent = 0.25
        self._cur_precent = 0.25

    @property
    def images(self):
        return self._images

    @property
    def labels(self):
        return self._labels

    @property
    def probs(self):
        return self._probs

    @property
    def indices(self):
        return self._indices

    @property
    def num_examples(self):
        return self._num_examples

    @property
    def epochs_completed(self):
        return self._epochs_completed

    @property
    def counter(self):
        return self._counter
    
    @property
    def subset_size(self):
        return self._subset_size

    @property
    def subset_ids(self):
        return self._subset_ids
    
    def train_order(self):
        pp = np.array(self._probs)
        self._train_order = np.argsort(pp)
        return self._train_order
    
    def revert_indicte(self):
        self._indices = np.zeros(self._num_examples)
    
    def subset_step_indicate(self, epoch, increase_amount):
        if epoch == 0:
            self._data_limit = int(np.ceil(self._num_examples * self._precent))
            self._epoch_ids = self._train_order[:self._data_limit]
            self._indices[self._epoch_ids]  =  np.ones(self._data_limit)
        else:
            self._precent = min(self._cur_precent * increase_amount,1)
            if self._precent != self._cur_precent:
                self._cur_precent = self._precent
                self._data_limit = int(np.ceil(self._num_examples * self._precent))
                self._epoch_ids = self._train_order[:self._data_limit]
                self._indices[self._epoch_ids]  =  np.ones(self._data_limit)
            else:
                self._data_limit = int(np.ceil(self._num_examples * self._precent))
                self._epoch_ids = self._train_order[:self._data_limit]
                self._indices[self._epoch_ids]  =  np.ones(self._data_limit)
                
                
    def next_batch_train(self, batch_size):
        start = self._index_in_epoch
        self._index_in_epoch += batch_size
        if self._index_in_epoch > self._num_examples:
            self._epochs_completed += 1
            start = 0
            self._index_in_epoch = batch_size
            assert batch_size <= self._num_examples
        end = self._index_in_epoch
        return self._images[self._train_order][start:end], \
                self._labels[self._train_order][start:end], \
                self._probs[self._train_order][start:end], \
                self._indices[self._train_order][start:end]    
    
    def next_batch_test(self, batch_size):    
        start = self._index_in_epoch
        self._index_in_epoch += batch_size
        if self._index_in_epoch > self._num_examples:
            # Finished epoch
            self._epochs_completed += 1

            # Shuffle the data
            np.random.seed(1)
            perm = np.arange(self._num_examples)
            np.random.shuffle(perm)
            self._images = self._images[perm]
            self._labels = self._labels[perm]
            self._probs = self._probs[perm]
            self._indices = self._indices[perm]

            # Start next epoch
            start = 0
            self._index_in_epoch = batch_size
            assert batch_size <= self._num_examples
        end = self._index_in_epoch
        return self._images[start:end], self._labels[start:end], self._probs[start:end], self._indices[start:end]

    def change_probs(self, new_values):
        self._probs[self._train_order] = new_values

In [3]:
def read_data_sets(datasetname, init_probs=[], one_hot=False):
    
    class DataSets(object):
        pass
    data_sets = DataSets()
    
    if datasetname == 'cifar10_keras':
        (train_images,train_labels),(test_images,test_labels) = cifar10.load_data()
    elif datasetname == 'mnist_keras':
        (train_images,train_labels),(test_images,test_labels) = mnist.load_data()
        num_rows_train = train_images.shape[1]
        num_clos_train = train_images.shape[2]
        num_rows_test = test_images.shape[1]
        num_clos_test = test_images.shape[2]
        train_images = train_images.reshape(train_images.shape[0],num_rows_train,num_clos_train,1)
        test_images = test_images.reshape(test_images.shape[0],num_rows_train,num_clos_train,1)
        if K_1.image_data_format()=='channels_last':
            train_images = train_images.transpose(0,1,2,3)
            test_images = test_images.transpose(0,1,2,3)
    else:
        raise NotImplementedError('dataset not supported')
    
    if one_hot:
        train_labels = tf.keras.utils.to_categorical(train_labels, num_classes=10)
        test_labels = tf.keras.utils.to_categorical(test_labels, num_classes=10)
  
    n_test = test_images.shape[0]
    n_train = train_images.shape[0]
    
    if not init_probs:
        print('RANDOM INIT PROBABILITIES')
        np.random.seed(3)
        probs = np.random.rand(n_train)
    else:
        init_probs = np.asarray(init_probs)
        probs_class = np.asarray(1.0 * init_probs / np.sum(init_probs), np.float32)
        dense_train_labels = np.argmax(train_labels, axis=1)
        probs = np.zeros_like(dense_train_labels, np.float32)
        for k in range(0, np.unique(dense_train_labels).max()+1):
            i = np.where(dense_train_labels == k)[0]
            probs[i] = probs_class[k]

    train_probs = np.squeeze(probs)
    test_probs = np.squeeze(normalize(np.expand_dims(np.ones(n_test, np.float32), 1), axis=0, norm='l1'))
    
    train_indices = np.zeros(n_train)
   
    test_indices = np.zeros(n_test)
    
    data_sets.train = DataSet(train_images, train_labels, train_probs, train_indices)
    
    data_sets.test = DataSet(test_images, test_labels, test_probs, test_indices)
    
    return data_sets

In [4]:
dataset_1 = read_data_sets('cifar10_keras', init_probs=[],one_hot=True)
train_set = dataset_1.train
test_set = dataset_1.test
K = 10

RANDOM INIT PROBABILITIES


In [5]:
def relu_evidence(logits):
    return tf.nn.relu(logits)

def exp_evidence(logits): 
    return tf.exp(tf.clip_by_value(logits/10,-10,10))

def softplus_evidence(logits):
    return tf.nn.softplus(logits)
def softsign_evidence(logits):
    return tf.nn.softsign(logits)

In [6]:
def var(name, shape, init=None):
    init = tf.truncated_normal_initializer(stddev=(1 / shape[0]) ** 0.5) if init is None else init
    return tf.get_variable(name=name, shape=shape, dtype=tf.float32, initializer=init)

In [7]:
def DEAL_dense_layer(x, units, rate, activation, scope='DEAL_dense_0'):
    with tf.variable_scope(scope):
        W3 = var('W3', [x.get_shape()[1], 1000])
        b3 = var('b3', [1000])
        out3 = Activation(activation=activation)(tf.matmul(x, W3) + b3)
        out3 = Dropout(rate=rate)(out3)

        W4 = var('W4', [1000, units])
        b4 = var('b4', [units])
        logits = tf.matmul(out3, W4) + b4
        return logits, W3, W4
        

In [8]:
def network(x, units=10,logits2evidence=softplus_evidence,activation='elu', dropout_1_rate=0.25, dropout_2_rate=0.5,
            reg_factor=50e-4, bias_reg_factor=None, batch_norm=False):
    
    l2_reg = regularizers.l2(reg_factor)  # K.variable(K.cast_to_floatx(reg_factor))
    l2_bias_reg = None
    if bias_reg_factor:
        l2_bias_reg = regularizers.l2(bias_reg_factor)  # K.variable(K.cast_to_floatx(bias_reg_factor))

    x = Conv2D(filters=32, kernel_size=(3, 3), padding='same', kernel_regularizer=l2_reg,
               bias_regularizer=l2_bias_reg)(x)
    if batch_norm:
        x = BatchNormalization()(x)
    x = Activation(activation=activation)(x)
    x = Conv2D(filters=32, kernel_size=(3, 3), padding='same', kernel_regularizer=l2_reg,
                   bias_regularizer=l2_bias_reg)(x)
    if batch_norm:
        x = BatchNormalization()(x)
    x = Activation(activation=activation)(x)
    x = MaxPooling2D(pool_size=(2, 2))(x)
    x = Dropout(rate=dropout_1_rate)(x)

    x = Conv2D(filters=64, kernel_size=(3, 3), padding='same', kernel_regularizer=l2_reg,
                bias_regularizer=l2_bias_reg)(x)
    if batch_norm:
        x = BatchNormalization()(x)
    x = Activation(activation=activation)(x)
    x = Conv2D(filters=64, kernel_size=(3, 3), padding='same', kernel_regularizer=l2_reg,
                   bias_regularizer=l2_bias_reg)(x)
    if batch_norm:
        x = BatchNormalization()(x)
    x = Activation(activation=activation)(x)
    x = MaxPooling2D(pool_size=(2, 2))(x)
    x = Dropout(rate=dropout_1_rate)(x)

    x = Conv2D(filters=128, kernel_size=(3, 3), padding='same', kernel_regularizer=l2_reg,
                   bias_regularizer=l2_bias_reg)(x)
    if batch_norm:
        x = BatchNormalization()(x)
    x = Activation(activation=activation)(x)
    x = Conv2D(filters=128, kernel_size=(3, 3), padding='same', kernel_regularizer=l2_reg,
                   bias_regularizer=l2_bias_reg)(x)
    if batch_norm:
        x = BatchNormalization()(x)
    x = Activation(activation=activation)(x)
    x = MaxPooling2D(pool_size=(2, 2))(x)
    x = Dropout(rate=dropout_1_rate)(x)

    x = Conv2D(filters=256, kernel_size=(2, 2), padding='same', kernel_regularizer=l2_reg,
                bias_regularizer=l2_bias_reg)(x)
    if batch_norm:
        x = BatchNormalization()(x)
    x = Activation(activation=activation)(x)
    x = Conv2D(filters=256, kernel_size=(2, 2), padding='same', kernel_regularizer=l2_reg,
                   bias_regularizer=l2_bias_reg)(x)
    if batch_norm:
        x = BatchNormalization()(x)
    x = Activation(activation=activation)(x)
    x = MaxPooling2D(pool_size=(2, 2))(x)
    x = Dropout(rate=dropout_1_rate)(x)

    x = Flatten()(x)
    x,W3,W4 = DEAL_dense_layer(x, units, rate=dropout_2_rate, activation=activation, scope='logit')
#     x = Dense(units=128, kernel_regularizer=l2_reg, bias_regularizer=l2_bias_reg)(x)
#     if batch_norm:
#         x = BatchNormalization()(x)
#     x = Activation(activation=activation)(x)

#     x = Dropout(rate=dropout_2_rate)(x)
#     x = Dense(units=units, kernel_regularizer=l2_reg, bias_regularizer=l2_bias_reg)(x)
    evidence = logits2evidence(x)
    return x, evidence, W3, W4


In [9]:
def KL(alpha, K):
    beta = tf.constant(np.ones((1, K)), dtype=tf.float32)
    S_alpha = tf.reduce_sum(alpha, axis=1, keepdims=True)

    KL = tf.reduce_sum((alpha - beta) * (tf.digamma(alpha) - tf.digamma(S_alpha)), axis=1, keepdims=True) + \
         tf.lgamma(S_alpha) - tf.reduce_sum(tf.lgamma(alpha), axis=1, keepdims=True) + \
         tf.reduce_sum(tf.lgamma(beta), axis=1, keepdims=True) - tf.lgamma(tf.reduce_sum(beta, axis=1, keepdims=True))
    return KL

In [10]:
def loss_eq4(p, alpha, K, global_step, annealing_step, v):
    loglikelihood = tf.reduce_mean(
    tf.reduce_sum(p * (tf.digamma(tf.reduce_sum(alpha, axis=1, keepdims=True)) - tf.digamma(alpha)), 1,
                    keepdims=True))
    KL_reg = tf.minimum(1.0, tf.cast(global_step / annealing_step, tf.float32)) * KL((alpha - 1) * (1 - p) + 1, K)
    loss = loglikelihood + KL_reg  # UAS_full
    final_loss = tf.multiply(loss, v) #UAS_exp
    return loss

In [11]:
bsize = 100 #batch size
n_batches = np.ceil(1.0 * train_set.num_examples / bsize).astype(np.int32)
n_batches_1 = np.ceil(1.0 * test_set.num_examples / bsize).astype(np.int32)

In [12]:
def lr_shd(epoch):
    if epoch >=0 and epoch <=15:
        lr = 0.001
    else:
        lr = 0.0001
    return lr

In [13]:
def Resent_EDL(units=10,logits2evidence=softplus_evidence,activation='elu',reg_factor=50e-4, 
               bias_reg_factor=None, batch_norm=False,loss_function=loss_eq4,lmb=0.005):
    g = tf.Graph()
    with g.as_default():
        
        X = tf.placeholder(shape=[None,32,32,3], dtype=tf.float32)
        Y = tf.placeholder(shape=[None,10], dtype=tf.float32)
        keep_prob_1 = tf.placeholder(dtype=tf.float32)
        keep_prob_2 = tf.placeholder(dtype=tf.float32)
        global_step = tf.Variable(initial_value=0, name='global_step', trainable=False)
        annealing_step = tf.placeholder(dtype=tf.int32) 
        v = tf.placeholder(dtype=tf.float32)
        lr = tf.placeholder(dtype=tf.float32)
        
        logits, evidence,W3,W4 = network(X, units=10,logits2evidence=softplus_evidence,activation='elu', dropout_1_rate=keep_prob_1, 
                                          dropout_2_rate=keep_prob_2,reg_factor=50e-4, bias_reg_factor=None, batch_norm=False)   
        alpha = evidence + 1        
        u = K / tf.reduce_sum(alpha, axis=1, keepdims=True) #uncertainty
        prob = alpha/tf.reduce_sum(alpha, 1, keepdims=True) 
        
        loss = tf.reduce_mean(loss_function(Y, alpha,K ,global_step, annealing_step, v))
        l2_loss = (tf.nn.l2_loss(W3)+tf.nn.l2_loss(W4)) * lmb
        total_loss = loss + l2_loss
        step = tf.train.AdamOptimizer(lr).minimize(loss, global_step=global_step)
        
        # Calculate accuracy
        pred = tf.argmax(prob, 1)
        truth = tf.argmax(Y, 1)
        match = tf.reshape(tf.cast(tf.equal(pred, truth), tf.float32),(-1,1))
        acc = tf.reduce_mean(match)

        
        return g, step, X, Y, v, annealing_step, prob, acc, loss, u, keep_prob_1, keep_prob_2,lr, evidence

In [14]:
g2, step2, X2, Y2, v2, annealing_step, prob2, acc2, loss2,  u, keep_prob_1,keep_prob_2,lr,evidence = Resent_EDL()

In [15]:
sess2 = tf.Session(graph=g2)
with g2.as_default():
    sess2.run(tf.global_variables_initializer())

In [16]:
L_train_acc1=[]
L_test_acc1=[]


best_acc = 0
for epoch in range(50):  
    loss_trains = []
    acc_train = []
    acc_test = []
    uncertainty = np.zeros(train_set.num_examples)
    learning_rate = lr_shd(epoch)
    train_set.train_order()
    train_set.subset_step_indicate(epoch,increase_amount=1.2)
    

    start = 0
    end = bsize
    for i in range(n_batches):
        data, label, probs, indices = train_set.next_batch_train(bsize)
        feed_dict={X2:data, Y2:label, v2:indices, keep_prob_1:0.25, keep_prob_2:0.5, lr:learning_rate, annealing_step:50*n_batches}
        _, train_loss, unc, train_acc1 = sess2.run([step2,loss2,u,acc2],feed_dict)
        loss_trains.append(train_loss)
        acc_train.append(train_acc1)
        print('epoch %d - %d%%) '% (epoch+1, (100*(i+1))//n_batches), end='\r' if i<n_batches-1 else '')
        unc = np.array(unc).reshape((-1,))
        if end > train_set.num_examples:
            end = train_set.num_examples
            unc = unc[:(end-start)]
            
        uncertainty[start:end] = unc
        start += data.shape[0]
        end += data.shape[0]
    train_set.change_probs(uncertainty)
    train_set.revert_indicte()
    
    for m in range(n_batches_1):
        data2, label2, probs2, indices2 = test_set.next_batch_test(bsize)
        feed_dict={X2:data2, Y2:label2,keep_prob_1:1.,keep_prob_2:1.}
        test_acc1 = sess2.run(acc2, feed_dict)
        acc_test.append(test_acc1)
 
    
    
    
    loss_train = np.mean(loss_trains)
    train_acc = np.mean(acc_train)
    test_acc = np.mean(acc_test)

    
    L_train_acc1.append(train_acc)
    L_test_acc1.append(test_acc)

    print('training:  acc: %2.4f loss: %2.4f \t testing: acc: %2.4f' % 
          (train_acc, loss_train, test_acc))
    
    if test_acc > best_acc:
        best_acc = test_acc
        best_epoch = epoch + 1
    print('best epoch: %d best acc: %2.4f' % (best_epoch, best_acc))
    

epoch 1 - 100%) training:  acc: 0.4248 loss: 1.7251 	 testing: acc: 0.5632
best epoch: 1 best acc: 0.5632
epoch 2 - 100%) training:  acc: 0.6519 loss: 1.2742 	 testing: acc: 0.6847
best epoch: 2 best acc: 0.6847
epoch 3 - 100%) training:  acc: 0.7392 loss: 1.0798 	 testing: acc: 0.7364
best epoch: 3 best acc: 0.7364
epoch 4 - 100%) training:  acc: 0.7845 loss: 0.9762 	 testing: acc: 0.7627
best epoch: 4 best acc: 0.7627
epoch 5 - 100%) training:  acc: 0.8220 loss: 0.8646 	 testing: acc: 0.7712
best epoch: 5 best acc: 0.7712
epoch 6 - 100%) training:  acc: 0.8403 loss: 0.8290 	 testing: acc: 0.7855
best epoch: 6 best acc: 0.7855
epoch 7 - 100%) training:  acc: 0.8564 loss: 0.7862 	 testing: acc: 0.7711
best epoch: 6 best acc: 0.7855
epoch 8 - 100%) training:  acc: 0.8790 loss: 0.7009 	 testing: acc: 0.7918
best epoch: 8 best acc: 0.7918
epoch 9 - 100%) training:  acc: 0.8848 loss: 0.6930 	 testing: acc: 0.7770
best epoch: 8 best acc: 0.7918
epoch 10 - 100%) training:  acc: 0.8948 loss: 