In [6]:
NB_TRAINING_SAMPLES=-1 # -1 means "all"
NB_TEST_SAMPLES=-1
NB_EPOCHS=1
VERBOSITY=0

# LOADING DATA

In [9]:
import tensorflow as tf
from tensorflow.keras.utils import to_categorical
from tensorflow.keras import datasets, layers, models, Model
import numpy as np
import warnings
warnings.filterwarnings('ignore')

(train_images, train_labels), (test_images, test_labels) = datasets.cifar10.load_data()

if NB_TRAINING_SAMPLES!=-1:
    train_images=train_images[:NB_TRAINING_SAMPLES]
    train_labels=train_labels[:NB_TRAINING_SAMPLES]
if NB_TEST_SAMPLES!=-1:
    test_images=test_images[:NB_TEST_SAMPLES]
    test_labels=test_labels[:NB_TEST_SAMPLES]

train_images, test_images = train_images / 255.0, test_images / 255.0
train_labels=to_categorical(train_labels).astype(np.float32)
test_labels=to_categorical(test_labels).astype(np.float32)

print(train_images.shape)
print(test_images.shape)
print(train_labels.shape)
print(test_labels.shape)

# Util functions

TODO: https://github.com/hollance/reliability-diagrams/blob/master/reliability_diagrams.py 

In [12]:
import tensorflow_probability as tfp
import sklearn
import tensorflow as tf
from tensorflow.keras import Input, Model, optimizers, layers
import numpy as np

In [31]:
def CE(target,output,epsilon=1e-7):
    bce = target * np.log( output +epsilon) + (1-target) * np.log(  1-output+epsilon)
    return -np.mean(bce)
def softmax(x):
  return np.exp(x) /  np.sum(np.exp(x), axis=-1, keepdims=True)

  
def brier_score(y,y_pred):
    b=(y_pred-y)**2
    return np.mean( np.sum( b , axis=-1 ) )
def evaluate(y,y_pred):
  
  labels_predicted=np.argmax(y_pred,axis=1).astype(np.int32)
  labels_gt=np.argmax(y,axis=1).astype(np.int32)

  acc=np.mean(labels_predicted==labels_gt)
  nll=CE(y,y_pred)
  
  try:
    num_bins=15 #usual value in papers
    ece=tfp.stats.expected_calibration_error(
      num_bins, logits=y_pred, labels_true=labels_gt, name=None
    ).numpy()
  except:
    ece=np.nan

  try:
    brier=brier_score(y,y_pred)
  except:
    brier=np.nan
  return np.round([acc,nll, ece, brier],4)


In [18]:
def keras_model_builder(train_images, train_labels, drop_rate=0):
    # Same than https://www.tensorflow.org/tutorials/images/cnn
    inpx=layers.Input(shape=(32, 32, 3))
    x=layers.Conv2D(32, (3, 3), activation='relu')(inpx)
    if drop_rate>0:
      x=layers.Dropout(drop_rate)(x,training=True)
    x=layers.MaxPooling2D((2, 2))(x)
    x=layers.Conv2D(64, (3, 3), activation='relu')(x)
    if drop_rate>0:
      x=layers.Dropout(drop_rate)(x,training=True)
    x=layers.MaxPooling2D((2, 2))(x)
    x=layers.Conv2D(64, (3, 3), activation='relu')(x)
    x=layers.Flatten()(x)
    if drop_rate>0:
      x=layers.Dropout(drop_rate)(x,training=True)
    x=layers.Dense(64, activation='relu')(x)
    if drop_rate>0:
      x=layers.Dropout(drop_rate)(x,training=True)
    x=layers.Dense(10, activation='softmax')(x)
    model=Model(inpx,x)
    model.compile(optimizer='adam',
              loss=tf.keras.losses.CategoricalCrossentropy())
    history = model.fit(train_images, train_labels, epochs=NB_EPOCHS, 
                    validation_data=(test_images, test_labels),verbose=VERBOSITY)
    return model

# Simple DNN

In [21]:
model=keras_model_builder(train_images, train_labels)
test_y_pred=model(test_images)
score_info=evaluate(test_labels,test_y_pred)
print(score_info)
del model

[0.5368 0.2005 0.3844 0.5932]


# MC DROPOUT<br>
URL: https://arxiv.org/pdf/1506.02142.pdf

In [19]:
for d in [0.5, 0.2, 0.02, 0.002]: # Value used in the original paper 0.1 0.05 0.005
  model=keras_model_builder(train_images, train_labels, drop_rate=d)
  iters=16 # original paper (10)
  test_y_pred=np.zeros(test_labels.shape)
  for i in range(iters):
      test_y_pred+=model(test_images)
      if (i+1)==4 or (i+1)==8 or (i+1)==16:
        test_y_pred/=iters
        score_info=evaluate(test_labels,test_y_pred)
        print(f"d:{d} iter:{i+1} score_info:{score_info}")
  del model

d:0.5 iter:4 score_info:[0.4546 0.3083 0.3475 0.8731]
d:0.5 iter:8 score_info:[0.4534 0.3033 0.3459 0.8666]
d:0.5 iter:16 score_info:[0.4582 0.2551 0.3432 0.772 ]
d:0.2 iter:4 score_info:[0.5074 0.2927 0.3974 0.8424]
d:0.2 iter:8 score_info:[0.5115 0.2871 0.4008 0.8336]
d:0.2 iter:16 score_info:[0.512  0.238  0.3903 0.7209]
d:0.02 iter:4 score_info:[0.5507 0.2774 0.439  0.8172]
d:0.02 iter:8 score_info:[0.5491 0.2722 0.4367 0.8076]
d:0.02 iter:16 score_info:[0.5508 0.2222 0.4252 0.6791]
d:0.002 iter:4 score_info:[0.5575 0.2774 0.4462 0.8196]
d:0.002 iter:8 score_info:[0.558  0.2724 0.446  0.8099]
d:0.002 iter:16 score_info:[0.558  0.2225 0.4332 0.6817]


# ENSEMBLE OF DNN<br>
URL: https://proceedings.neurips.cc/paper/2017/file/9ef2ed4b7fd2c810847ffa5fa85bce38-Paper.pdf

In [None]:
import numpy as np
max_ensemble_size=16
global_preds=np.zeros(test_labels.shape)
for i in range(max_ensemble_size):
  model=keras_model_builder(train_images, train_labels) # create independant training
  global_preds+=model(test_images)
  del model
  ensemble_i_pred=global_preds/(i+1)
  score_info=evaluate(test_labels, ensemble_i_pred)
  print(f"ensemble_size:{i+1} res:{score_info}")

ensemble_size:1 res:[0.5424 0.2005 0.3893 0.59  ]
ensemble_size:2 res:[0.5535 0.1959 0.4012 0.577 ]
ensemble_size:3 res:[0.5693 0.1915 0.4201 0.5648]
ensemble_size:4 res:[0.5765 0.1897 0.4271 0.5595]
ensemble_size:5 res:[0.581  0.1897 0.4336 0.5586]
ensemble_size:6 res:[0.5865 0.1888 0.4393 0.556 ]
ensemble_size:7 res:[0.5848 0.1888 0.4377 0.5563]
ensemble_size:8 res:[0.585  0.1887 0.4379 0.5561]


# Variational inference with TF PROBA

In [36]:
import tensorflow_probability as tfp
from tensorflow_probability.python.layers import util as tfp_layers_util
from tensorflow_probability.python.distributions import kullback_leibler as kl_lib
from tensorflow_probability import distributions as tfd

kernel_posterior_scale_mean=-9.0
kernel_posterior_scale_stddev=0.1
kernel_posterior_scale_constraint=0.2

def nll(y_true, y_pred):
    return -y_pred.log_prob(y_true)
def approximate_kl(q, p, q_tensor):
    return tf.reduce_mean(q.log_prob(q_tensor) - p.log_prob(q_tensor))
def _untransformed_scale_constraint(t):
     # value used: https://github.com/tensorflow/probability/blob/main/tensorflow_probability/examples/models/bayesian_vgg.py
    return tf.clip_by_value(t, -1000,tf.math.log(kernel_posterior_scale_constraint))

total_samples = len(train_images)
divergence_fn = lambda q, p, q_tensor : approximate_kl(q, p, q_tensor) / total_samples

def prior_trainable(kernel_size, bias_size=0, dtype=None):
  # Specify the prior over keras.layers.Dense kernel and bias.
  n = kernel_size + bias_size
  return tf.keras.Sequential([
      tfp.layers.VariableLayer(n, dtype=dtype),
      tfp.layers.DistributionLambda(lambda t: tfd.Independent(
          tfd.Normal(loc=t, scale=1),
          reinterpreted_batch_ndims=1)),
  ])
# Specify the surrogate posterior over `keras.layers.Dense` `kernel` and `bias`.
def posterior_mean_field(kernel_size, bias_size=0, dtype=None):
  n = kernel_size + bias_size
  c = np.log(np.expm1(1.))
  return tf.keras.Sequential([
      tfp.layers.VariableLayer(2 * n, dtype=dtype),
      tfp.layers.DistributionLambda(lambda t: tfd.Independent(
          tfd.Normal(loc=t[..., :n],
                     scale=1e-5 + tf.nn.softplus(c + t[..., n:])),
          reinterpreted_batch_ndims=1)),
  ])
kernel_posterior_fn = tfp.layers.default_mean_field_normal_fn(
    untransformed_scale_initializer=tf.compat.v1.initializers.random_normal(
        mean=kernel_posterior_scale_mean,
        stddev=kernel_posterior_scale_stddev),
    untransformed_scale_constraint=_untransformed_scale_constraint)
  
def tfp_model(nb_output=10,two_layer=True):
    #model = models.Sequential()# https://www.tensorflow.org/tutorials/images/cnn
    inpx=layers.Input(shape=(32, 32, 3))
    x=layers.Conv2D(32, (3, 3), activation='relu')(inpx)
    x=layers.MaxPooling2D((2, 2))(x)
    x=layers.Conv2D(64, (3, 3), activation='relu')(x)
    x=layers.MaxPooling2D((2, 2))(x)
    x=layers.Conv2D(64, (3, 3), activation='relu')(x)
    x=layers.Flatten()(x)
    
    # 2 mode: variatonal mode and stand mode
    if two_layer:
      x=tfp.layers.DenseReparameterization(
          units = 64, activation = 'LeakyReLU',
          kernel_posterior_fn = tfp.layers.default_mean_field_normal_fn(is_singular=False),
          kernel_prior_fn = tfp.layers.default_multivariate_normal_fn,
          bias_prior_fn = tfp.layers.default_multivariate_normal_fn,
          bias_posterior_fn = tfp.layers.default_mean_field_normal_fn(is_singular=False),
          kernel_divergence_fn = divergence_fn,
          bias_divergence_fn = divergence_fn
      )(x)
    else:
      x=layers.Dense(64, activation='relu')(x)
    
    x=tfp.layers.DenseReparameterization( #https://towardsdatascience.com/uncertainty-in-deep-learning-bayesian-cnn-tensorflow-probability-758d7482bef6
        units = tfp.layers.OneHotCategorical.params_size(nb_output), activation = None,
        kernel_posterior_fn = tfp.layers.default_mean_field_normal_fn(is_singular=False),
        kernel_prior_fn = tfp.layers.default_multivariate_normal_fn,
        bias_prior_fn = tfp.layers.default_multivariate_normal_fn,
        bias_posterior_fn = tfp.layers.default_mean_field_normal_fn(is_singular=False),
        kernel_divergence_fn = divergence_fn,
        bias_divergence_fn = divergence_fn
    )(x)
    model=Model(inpx,x)
    
    def myloss(y,y_):
      neg_log_likelihood = tf.nn.softmax_cross_entropy_with_logits(
      labels=y, logits=y_)
      kl=sum(model.losses)
      loss = neg_log_likelihood + 0.001*kl
      return loss
    model.compile(optimizer=tf.optimizers.Adam(), loss=myloss, metrics=['accuracy'])
    history = model.fit(train_images, train_labels, epochs=NB_EPOCHS, validation_data=(test_images, test_labels), verbose=VERBOSITY)
    return model

In [37]:
for two_varia_layer in [True, False]:
  # train
  model=tfp_model(two_layer=two_varia_layer)

  # predict
  global_preds=np.zeros(test_labels.shape)
  for i in range(16):
    logit=model.predict(test_images,verbose=VERBOSITY)
    global_preds+=softmax(logit)
    if (i+1)==4 or (i+1)==8 or (i+1)==16:
      print(f"i:{i+1} two_varia:{two_varia_layer} eval:{evaluate(test_labels,global_preds/(i+1))}")

i:4 two_varia:True eval:[0.5438 0.1971    nan 0.5858]
i:8 two_varia:True eval:[0.5482 0.1965    nan 0.584 ]
i:16 two_varia:True eval:[0.5492 0.1964    nan 0.5835]
i:4 two_varia:False eval:[0.5162 0.2064    nan 0.6113]
i:8 two_varia:False eval:[0.5189 0.2057    nan 0.6092]
i:16 two_varia:False eval:[0.5189 0.2055    nan 0.6084]


# TEMPERATURE SCALING<br>
URL:https://arxiv.org/pdf/1706.04599.pdf

In [35]:
for training_rate in [0.1, 0.01, 0.001]:
  # get training/validation split data
  p=int(len(train_images)*(1-training_rate))
  model=keras_model_builder(train_images[:p], train_labels[:p])
  
  # get predictions
  valid_raw_y_pred=model.predict(train_images[p:],verbose=VERBOSITY)
  test_raw_y_pred=model.predict(test_images,verbose=VERBOSITY)

  del model

  # fit the temperature parameter
  id_criterion=1 # LLN criteria, like the linked paper on temperature scale
  valid_scores=[]
  info=[]
  for i in range(-10,20+1,1):
    t=1.1**float(i)
    valid_y_pred=softmax(valid_raw_y_pred*t)
    test_y_pred=softmax(test_raw_y_pred*t)
    
    valid_info=evaluate(train_labels[p:], valid_y_pred)
    test_info=evaluate(test_labels, test_y_pred)
    if not np.isnan(valid_info[id_criterion]):
      valid_scores.append( valid_info[id_criterion] )
    info.append( f"valid/train split:{training_rate} t:{t} valid:{valid_info} test:{test_info}" )
  
  # display the result
  best_temp_id=np.argmin(valid_scores)
  print(info[best_temp_id])

valid/train split:0.1 t:5.1597803519999985 valid:[0.55   0.2129 0.3953 0.6011] test:[0.5343 0.2162 0.3794 0.6119]
valid/train split:0.01 t:5.1597803519999985 valid:[0.526  0.2189 0.3693 0.6139] test:[0.5303 0.2191 0.3754 0.6195]
valid/train split:0.001 t:5.1597803519999985 valid:[0.52   0.2414 0.3677 0.6794] test:[0.537  0.2147 0.3831 0.6082]
