In [1]:
import matplotlib.pyplot as plt
import numpy as np

import tensorflow as tf
import tensorflow_probability as tfp
import time

In [2]:
(train_images, train_labels), (test_images, test_labels) = tf.keras.datasets.mnist.load_data()

def preprocess_images(images):
  images = images.reshape((images.shape[0], 28,28)) / 255.
  return images


train_images = preprocess_images(train_images)
test_images = preprocess_images(test_images)

train_labels = np.expand_dims(train_labels,axis=-1)
test_labels = np.expand_dims(test_labels,axis=-1)
train_size = 60000
batch_size = 1024
test_size = 10000


train_images = tf.expand_dims(train_images, axis = -1)
test_images = tf.expand_dims(test_images, axis = -1)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


In [3]:
train_dataset = (tf.data.Dataset.from_tensor_slices((train_images, train_labels))
                 .shuffle(train_size,reshuffle_each_iteration=True).batch(batch_size,drop_remainder=True))
test_dataset = (tf.data.Dataset.from_tensor_slices((test_images,test_labels))
                .shuffle(test_size).batch(batch_size,drop_remainder=True))

In [4]:
train_labels.shape, train_images.shape

((60000, 1), TensorShape([60000, 28, 28, 1]))

In [5]:
import tensorflow as tf
tf.get_logger().setLevel('ERROR')

In [6]:

class CustomDropout(tf.keras.layers.Layer):
    def __init__(self, rate, input_dim, **kwargs):
        super(CustomDropout, self).__init__(**kwargs)
        self.rate = 1-rate
        self.input_dim = input_dim
        self.mask_w = self.add_weight(shape=(self.input_dim,n_decision_makers), trainable=True)
        self.mask_b = self.add_weight(shape=(n_decision_makers,), initializer="zeros",trainable=True)


    def call(self, inputs, label, training=None):
        if training:

          scce = tf.keras.losses.SparseCategoricalCrossentropy(reduction=tf.keras.losses.Reduction.NONE)

          loss = scce(tf.tile(tf.transpose([label],perm = [1,2,0]),[1,n_decision_makers,1]),inputs)

          threshold = tfp.stats.percentile(loss, q=self.rate*100)
          dropout_mask = (loss<=threshold) ## <= 1-rate keep the best 10%

          mask = tf.tile(tf.expand_dims(dropout_mask, axis=-1), [1,1,10])

          mask_pred = tf.nn.sigmoid(tf.matmul(tf.keras.layers.Flatten()(inputs), self.mask_w)+self.mask_b)
          mask_pred = tf.tile(mask_pred, [1,10])
          return tf.multiply(tf.keras.layers.Reshape((n_decision_makers,10))(mask_pred), inputs), tf.cast(mask,'float32'), mask_pred

        else:
          mask_pred = tf.nn.sigmoid(tf.matmul(tf.keras.layers.Flatten()(inputs), self.mask_w)+self.mask_b)
          mask_pred = tf.tile(mask_pred, [1,10])
          return tf.multiply(tf.keras.layers.Reshape((n_decision_makers,10))(mask_pred),inputs),tf.ones(shape = (batch_size,n_decision_makers,10)),mask_pred ## reshape self.mask

In [7]:

class CustomDropout(tf.keras.layers.Layer):
    def __init__(self, rate, input_dim, lda=1.0, **kwargs):
        super(CustomDropout, self).__init__(**kwargs)
        self.rate = 1-rate
        self.input_dim = input_dim
        self.mask_w = self.add_weight(shape=(self.input_dim,n_decision_makers), trainable=True)
        self.mask_b = self.add_weight(shape=(n_decision_makers,), initializer="zeros",trainable=True)
        self.lda = lda

    def call(self, inputs, label, training=None):

        if training:
          scce = tf.keras.losses.SparseCategoricalCrossentropy(reduction=tf.keras.losses.Reduction.NONE)
          loss = scce(tf.tile(tf.transpose([label],perm = [1,2,0]),[1,n_decision_makers,1]),inputs)
          threshold = tfp.stats.percentile(loss, q=self.rate*100)
          dropout_mask = (loss<=threshold)
          mask = tf.tile(tf.expand_dims(dropout_mask, axis=-1), [1,1,10])
          mask_pred = tf.nn.sigmoid(tf.matmul(tf.keras.layers.Flatten()(inputs), self.mask_w)+self.mask_b)
          mask_pred = tf.tile(mask_pred, [1,10])

          # print(mask_pred)
          # tf.print(mask_pred)
          # mask_pred_scaled = mask_pred * self.lda
          # tf.print(mask_pred_scaled)
          # print(mask_pred_scaled)

          return tf.multiply(tf.keras.layers.Reshape((n_decision_makers,10))(mask_pred * self.lda), inputs), tf.cast(mask,'float32'), mask_pred * self.lda
        else:
          mask_pred = tf.nn.sigmoid(tf.matmul(tf.keras.layers.Flatten()(inputs), self.mask_w)+self.mask_b)
          mask_pred = tf.tile(mask_pred, [1,10])
          return tf.multiply(tf.keras.layers.Reshape((n_decision_makers,10))(mask_pred),inputs),tf.ones(shape = (batch_size,n_decision_makers,10)),mask_pred ## reshape self.mask

In [8]:

n_decision_makers = 4  #100
class MyModel(tf.keras.Model):
    def __init__(self, lda=1.0,**kwargs):
      super(MyModel,self).__init__(**kwargs)

      self.flat1 = tf.keras.layers.Flatten()
      self.flat2 = tf.keras.layers.Flatten()
      self.flat3 = tf.keras.layers.Flatten()
      self.flat4 = tf.keras.layers.Flatten()
      self.flat5 = tf.keras.layers.Flatten()
      self.flat6 = tf.keras.layers.Flatten()
      self.reshape1 = tf.keras.layers.Reshape((n_decision_makers,10))
      self.reshape2 = tf.keras.layers.Reshape((n_decision_makers,10))
      self.lda = lda



      self.dropout1 = CustomDropout(0.3,n_decision_makers*10, lda=self.lda)
      self.dropout4 = tf.keras.layers.Dropout(0.2)
      self.dropout5 = tf.keras.layers.Dropout(0.2)

      self.pool1 = tf.keras.layers.MaxPooling2D((2, 2))
      self.pool2 = tf.keras.layers.MaxPooling2D((2, 2))

      self.conv1 = tf.keras.layers.Conv2D(64, 3, activation='relu',padding='same',kernel_regularizer=tf.keras.regularizers.l1(l=0.01),kernel_initializer='he_uniform',)
      self.conv11 = tf.keras.layers.Conv2D(128, 3, activation='relu',padding='same',kernel_regularizer=tf.keras.regularizers.l1(l=0.01),kernel_initializer='he_uniform',)
      self.dense1 = tf.keras.layers.Dense(10,activation=tf.nn.softmax)
      self.batchnorm1 = tf.keras.layers.BatchNormalization()

      self.conv2 = tf.keras.layers.Conv2D(128, 3, activation='relu',padding='same',kernel_regularizer=tf.keras.regularizers.l1(l=0.01),kernel_initializer='he_uniform',)
      self.conv22 = tf.keras.layers.Conv2D(64, 3, activation='relu',padding='same',kernel_regularizer=tf.keras.regularizers.l1(l=0.01),kernel_initializer='he_uniform',)
      self.dense2 = tf.keras.layers.Dense(10,activation=tf.nn.softmax)
      self.batchnorm2 = tf.keras.layers.BatchNormalization()

      self.dense5 = tf.keras.layers.Dense(n_decision_makers*10,activation=tf.nn.relu)
      self.dense7 = tf.keras.layers.Dense(10, activation=tf.nn.softmax)

    def call(self, input):

      [input, label] = input
      hidden_conv1 = self.dropout4(self.batchnorm1(self.pool1(self.conv1(self.conv11(input)))))
      hidden_conv1_reshape = self.flat4(hidden_conv1)
      hidden_conv1_out = self.dense1(hidden_conv1_reshape)

      hidden_conv2 = self.dropout5(self.batchnorm2(self.pool2(self.conv2(self.conv22(hidden_conv1)))))
      hidden_conv2_reshape = self.flat5(hidden_conv2)
      hidden_conv2_out = self.dense2(hidden_conv2_reshape)

      hidden1 = self.dense5(hidden_conv2_reshape)
      hidden1_reshape = self.reshape1(hidden1)
      hidden1_softmax = tf.nn.softmax(hidden1_reshape)
      hidden1_out,hidden1_true_mask,hidden1_pred_mask = self.dropout1(hidden1_softmax,label)

      outputs = self.dense7(self.flat1(hidden1_out)) #leader outputs

      return outputs

In [9]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [10]:
txt_path = '/content/drive/MyDrive/Research/DPhil Research/ICLR_2024/' + 'MNIST_LFNN_BF_lambda'
txt_path = txt_path + '.txt'

# Open a file in write mode
with open(txt_path, 'w') as file:

	for l in [0.2, 0.4, 0.6, 0.8, 1.0, 1.2, 1.4, 1.6, 1.8, 2.0, 0]:
		print("Lambda: ", l, file=file)
		print("-----------------------------------------", file=file)
		print("Lambda: ", l, file=file)
		print("-----------------------------------------", file=file)

		print(txt_path)
		model = MyModel(lda = l)
		model([tf.zeros((batch_size, 28, 28, 1)),tf.zeros((batch_size, 1))])
		model.summary()

		model.compile(loss='sparse_categorical_crossentropy',
			optimizer='adam',
			metrics='acc')


		history = model.fit([train_images, train_labels], train_labels,
			epochs=100,
			batch_size=batch_size,
			validation_data=([test_images, np.zeros(test_labels.shape)],test_labels),
		)

		for key in history.history.keys():
				file.write(f"{key}: {history.history[key]}\n")


/content/drive/MyDrive/Research/DPhil Research/ICLR_2024/MNIST_LFNN_BF_lambda.txt
Model: "my_model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten (Flatten)           multiple                  0         
                                                                 
 flatten_1 (Flatten)         multiple                  0 (unused)
                                                                 
 flatten_2 (Flatten)         multiple                  0 (unused)
                                                                 
 flatten_3 (Flatten)         multiple                  0         
                                                                 
 flatten_4 (Flatten)         multiple                  0         
                                                                 
 flatten_5 (Flatten)         multiple                  0 (unused)
                                          

local_loss