In [None]:
import tensorflow as tf
import numpy as np

from tensorflow.keras import Sequential, Model
from tensorflow.keras.layers import Dense, BatchNormalization, Dropout, Activation, Input, Flatten, Conv2D, AveragePooling2D

In [None]:
device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
  raise SystemError('GPU device not found')
print('Found GPU at: {}'.format(device_name))

Found GPU at: /device:GPU:0


In [None]:
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
x_train, x_test = x_train/255, x_test/255
y_train, y_test = tf.one_hot(y_train, 10), tf.one_hot(y_test, 10)

In [None]:
batch_size = 32
train_ds = tf.data.Dataset.from_tensor_slices((x_train, y_train)).batch(batch_size)
test_ds = tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(10000)

In [None]:
class MyDense(tf.keras.layers.Layer):
  def __init__(self, w, b, activation='relu'):
    super().__init__()
    self.w = tf.Variable(w, trainable=True)
    self.b = tf.Variable(b, trainable=True)
    self.activation_f = activation
    self.activation = Activation(activation)

  def call(self, x):
    return self.activation(x @ self.w + self.b)

class AutoNN:
  def __init__(self, inp, out, activation='relu', initializer=tf.keras.initializers.GlorotUniform):
    self.activation = activation
    self.initializer = initializer()
    self.identity = tf.keras.initializers.Identity()
    self.layers = list()
    self.layers.append(MyDense(self.initializer([128, out]), tf.zeros([out]), activation='softmax'))
    self.layers.append(MyDense(self.initializer([inp, 128]), tf.zeros([128]), activation='relu'))
  
  def __call__(self, x):
    for i in range(len(self.layers)-1, -1, -1):
      x = self.layers[i](x)
    return x
  
  def proliferate(self, n):
    
    old_layer = self.layers.pop()
    old_layer_w, old_layer_b = old_layer.w, old_layer.b
    old_layer_n = old_layer_b.shape[0]

    new_low_layer_w = tf.concat([old_layer_w, self.initializer([old_layer_w.shape[0], n])], axis=1) # output: old_layer_n + n
    new_low_layer_b = tf.concat([old_layer_b, tf.zeros([n])], axis=0)

    new_high_layer_w = tf.concat([self.identity([old_layer_n, old_layer_n]), tf.zeros([n, old_layer_n])], axis=0) # output: old_layer_n
    new_high_layer_b = tf.zeros([old_layer_n])

    self.layers.append(MyDense(new_high_layer_w, new_high_layer_b, old_layer.activation_f))
    self.layers.append(MyDense(new_low_layer_w, new_low_layer_b, self.activation))

In [None]:
auto_nn = AutoNN(784, 10)

growth = 2

for i in range(5):
  
  inp = Input((28, 28))
  x = Flatten()(inp)
  out = auto_nn(x)

  model = Model(inputs=inp, outputs=out)
  model.summary()

  optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
  loss_obj = tf.keras.losses.CategoricalCrossentropy()
  accuracy = tf.keras.metrics.CategoricalAccuracy()
  accuracy2 = tf.keras.metrics.CategoricalAccuracy()

  loss_train = tf.keras.metrics.Mean()
  loss_test = tf.keras.metrics.Mean()
  acc_train = tf.keras.metrics.Mean()
  acc_test = tf.keras.metrics.Mean()

  @tf.function
  def train_step(x_train, y_train):
    with tf.GradientTape() as tape:
        pred = model(x_train)
        loss = loss_obj(y_train, pred)
    gradients = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(gradients, model.trainable_variables))
    loss_train(loss)
    acc_train(accuracy(y_train, pred))

  @tf.function
  def test_step(x_test, y_test):
    pred = model(x_test)
    loss_test(loss_obj(y_test, pred))
    acc_test(accuracy2(y_test, pred))

  for epoch in range(10):
    
    loss_train.reset_states()
    loss_test.reset_states()
    acc_train.reset_states()
    acc_test.reset_states()
    
    for _x_train, _y_train in train_ds:
        train_step(_x_train, _y_train)
    
    for _x_test, _y_test in test_ds:
        test_step(_x_test, _y_test)
     
    print(f'It: {i+1}, Epoch {epoch+1}, Train Loss: {loss_train.result()}, Train Accuracy: {acc_train.result()}, Test Loss: {loss_test.result()}, Test Accuracy: {acc_test.result()}')

  # auto_nn.proliferate((growth - 1)  * model.layers[2].weights[0].shape[-1])
  auto_nn.proliferate(0)

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 28, 28)]          0         
_________________________________________________________________
flatten (Flatten)            (None, 784)               0         
_________________________________________________________________
my_dense_1 (MyDense)         (None, 128)               100480    
_________________________________________________________________
my_dense (MyDense)           (None, 10)                1290      
Total params: 101,770
Trainable params: 101,770
Non-trainable params: 0
_________________________________________________________________
It: 1, Epoch 1, Train Loss: 0.2682447135448456, Train Accuracy: 0.8742448687553406, Test Loss: 0.14672844111919403, Test Accuracy: 0.9546999931335449
It: 1, Epoch 2, Train Loss: 0.11925798654556274, Train Accuracy: 0.9350552558898926, Test Loss: 0.10502