In [1]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D
from tensorflow.keras.layers import MaxPooling2D
from tensorflow.keras.layers import Activation
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Dense
from tensorflow.keras import layers
from tensorflow.keras import backend as K
import numpy as np
from tqdm import tqdm


def evaluate(model, test_x, test_y):
    acc = tf.keras.metrics.SparseCategoricalAccuracy(name='accuracy')
    for imgs, labels in zip(test_x, test_y):
        preds = model.predict_on_batch(np.array([imgs]))
        acc.update_state(labels, preds)
    return round(100*acc.result().numpy(), 2)


def permute_task(train, test):
    train_shape, test_shape = train.shape, test.shape
    train_flat, test_flat = train.reshape((-1, 3072)), test.reshape((-1, 3072))
    idx = np.arange(train_flat.shape[1])
    np.random.shuffle(idx)
    train_permuted, test_permuted = train_flat[:, idx], test_flat[:, idx]
    return (train_permuted.reshape(train_shape), test_permuted.reshape(test_shape))


class Train:

    def __init__(self, optimizer, loss_fn, prior_weights=None, lambda_=0.1):
        self.optimizer = optimizer
        self.loss_fn = loss_fn
        self.prior_weights = prior_weights
        self.lambda_ = lambda_

    def train(self, model, epochs, train_task, fisher_matrix=None, test_tasks=None):
        # empty list to collect per epoch test acc of each task
        if test_tasks:
            test_acc = [[] for _ in test_tasks]
        else:
            test_acc = None
        for epoch in tqdm(range(epochs)):
            for batch in train_task:
                X, y = batch
                with tf.GradientTape() as tape:
                    pred = model(X)
                    loss = self.loss_fn(y, pred)
                    # if to execute training with EWC
                    if fisher_matrix is not None:
                        loss += self.compute_penalty_loss(model, fisher_matrix)
                grads = tape.gradient(loss, model.trainable_variables)
                self.optimizer.apply_gradients(zip(grads, model.trainable_variables))
            # evaluate with the test set of task after each epoch
            if test_acc:
                for i in range(len(test_tasks)):
                    test_acc[i].append(evaluate(model, test_tasks[i][0], test_tasks[i][1]))
        print(test_acc)
        return test_acc

    def compute_penalty_loss(self, model, fisher_matrix):
        penalty = 0.
        for u, v, w in zip(fisher_matrix, model.weights, self.prior_weights):
            penalty += tf.math.reduce_sum(u * tf.math.square(v - w))
        return 0.5 * self.lambda_ * penalty


class EWC:

    def __init__(self, prior_model, data_samples, num_sample=30):
        self.prior_model = prior_model
        self.prior_weights = prior_model.weights
        self.num_sample = num_sample
        self.data_samples = data_samples
        self.fisher_matrix = self.compute_fisher()

    def compute_fisher(self):
        weights = self.prior_weights
        fisher_accum = np.array([np.zeros(layer.numpy().shape) for layer in weights],
                           dtype=object
                          )
        for j in tqdm(range(self.num_sample)):
            idx = np.random.randint(self.data_samples.shape[0])
            with tf.GradientTape() as tape:
                logits = tf.nn.log_softmax(self.prior_model(np.array([self.data_samples[idx]])))
            grads = tape.gradient(logits, weights)
            for m in range(len(weights)):
                fisher_accum[m] += np.square(grads[m])
        fisher_accum /= self.num_sample
        return fisher_accum

    def get_fisher(self):
        return self.fisher_matrix


from keras.models import Sequential
from keras.layers import Dense, Conv2D, Flatten, MaxPooling2D

class MLP3:
    def __init__(self, input_shape=(32,32,3), hidden_layers_neuron_list=[200, 100, 50, 25, 12], num_classes=10):
        self.input_shape = input_shape
        self.num_classes = num_classes
        self.hidden_layers_neuron_list = hidden_layers_neuron_list
        self.model = self.create_cnn()

    def create_cnn(self):
        model = Sequential()

        # Convolutional layers
        model.add(Conv2D(32, (3, 3), activation='relu', input_shape=self.input_shape))
        model.add(MaxPooling2D((2, 2),padding='same'))
        model.add(Conv2D(64, (3, 3), activation='relu'))
        model.add(MaxPooling2D((2, 2),padding='same'))
        model.add(Conv2D(128, (3, 3), activation='relu'))
        model.add(MaxPooling2D((2, 2),padding='same'))
        model.add(Conv2D(256, (3, 3), activation='relu'))
        model.add(MaxPooling2D((2, 2),padding='same'))

        # Flatten layer
        model.add(Flatten())

        # Dense layers
        model.add(Dense(self.hidden_layers_neuron_list[0], activation='relu'))
        model.add(Dense(self.hidden_layers_neuron_list[1], activation='relu'))
        model.add(Dense(self.hidden_layers_neuron_list[2], activation='relu'))
        model.add(Dense(self.hidden_layers_neuron_list[3], activation='relu'))
        model.add(Dense(self.hidden_layers_neuron_list[4], activation='relu'))

        # Output layer
        model.add(Dense(self.num_classes, activation='softmax'))

        return model

    def get_uncompiled_model(self):
      return self.model

    def get_compiled_model(self, optimizer, loss_fn, metrics ):
      compiled_model = self.model
      compiled_model.compile(optimizer, loss_fn, metrics)
      return compiled_model

In [2]:
from tensorflow.keras.layers import Dense
from tensorflow.keras.datasets import cifar10
from tensorflow.keras.models import Sequential
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
import numpy as np
from tqdm import tqdm

In [3]:
epochs = 5
lambda_ = 0.01
lr = 0.0001
num_sample = 30
opt = tf.keras.optimizers.Adam(learning_rate=lr)
loss_fn=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False)

In [4]:
(x_train_A, y_train_A), (x_test_A, y_test_A) = cifar10.load_data()
x_train_A = x_train_A.astype('float32')
x_test_A = x_test_A.astype('float32')

train_A = tf.data.Dataset.from_tensor_slices((x_train_A, y_train_A)).shuffle(1000).batch(32)
test_A = (x_test_A, y_test_A)

x_train_B, x_test_B = permute_task(x_train_A, x_test_A)
y_train_B, y_test_B = y_train_A, y_test_A

train_B = tf.data.Dataset.from_tensor_slices((x_train_B, y_train_B)).shuffle(1000).batch(4)
test_B = (x_test_B, y_test_B)

Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz


In [5]:
mlp = MLP3()

trn_gd = Train(opt, loss_fn)
model = mlp.get_compiled_model(opt, loss_fn, ['accuracy'])

acc_prior_A = trn_gd.train(model, epochs, train_A, test_tasks=[test_A])[0]
model.save('CIFAR10_A.h5')
print('[INFO] Task A Original (SGD): {}'.format(acc_prior_A[-1]))

100%|██████████| 5/5 [11:19<00:00, 135.91s/it]

[[40.95, 51.1, 55.83, 60.5, 62.28]]
[INFO] Task A Original (SGD): 62.28



  saving_api.save_model(


In [6]:
# construct the fisher matrix using samples from task A
ewc = EWC(model, x_train_A, num_sample=num_sample)
f_matrix = ewc.get_fisher()

100%|██████████| 30/30 [00:00<00:00, 33.00it/s]


In [7]:
model_ewcB = mlp.get_compiled_model(opt, loss_fn, ['accuracy'])
model_ewcB.load_weights('CIFAR10_A.h5')
prior_weights = model_ewcB.get_weights()

In [None]:
trn = Train(opt, loss_fn, prior_weights=prior_weights, lambda_=lambda_)
acc_ewcA, acc_ewcB = trn.train(model_ewcB,
                     epochs,
                     train_B,
                     fisher_matrix=f_matrix,
                     test_tasks=[test_A, test_B]
                    )

print('[INFO] Task A ACC. after training B with EWC: {}'.format(acc_ewcA[-1]))
print('[INFO] Task B ACC. after training B with EWC: {}'.format(acc_ewcB[-1]))

 60%|██████    | 3/5 [53:33<35:38, 1069.13s/it]

In [None]:
model_sgdB = mlp.get_compiled_model(opt, loss_fn, ['accuracy'])
model_sgdB.load_weights('CIFAR10_A.h5')
acc_sgdA, acc_sgdB = trn_gd.train(model_sgdB, epochs, train_B, test_tasks = [test_A, test_B])

print('[INFO] Task A ACC. after training B with GD: {}'.format(acc_sgdA[-1]))
print('[INFO] Task B ACC. after training B with GD: {}'.format(acc_sgdB[-1]))

In [None]:
x = 0
total_width, n = 0.1, 2
width = total_width / n
x = x - (total_width - width) / 2
plt.style.use('ggplot')
plt.bar(x, acc_ewcB[-1], width=width, label='EWC B', hatch='w/', ec='w')
plt.bar(x + width, acc_sgdB[-1], width=width, label='SGD B', hatch='w/', ec='w')
plt.bar(x + 3.5 * width, acc_prior_A[-1], width=width, label='Prior A', hatch='w/', ec='w')
plt.bar(x + 4.5 * width, acc_ewcA[-1], width=width, label='EWC A', hatch='w/', ec='w')
plt.bar(x + 5.5 * width, acc_sgdA[-1], width=width, label='SGD A', hatch='w/', ec='w')
plt.legend(facecolor='white', loc='lower left')
plt.xticks(np.array([0., 3.5 * width]), ('Task B', 'Task A'))
plt.title('Training task B with EWC Vs SGD after \n task A had been trained to criterion')
plt.xlim(-0.15, 0.35)
plt.ylim(0., 105.)
plt.show()

In [None]:
plt.plot(range(0, epochs*2, 1), (acc_prior_A + acc_sgdA), color='green', linestyle='dashed', label = "SGD")
plt.plot(range(0, epochs*2, 1), (acc_prior_A + acc_ewcA), color='red', linestyle='dashed', label = "EWC")
plt.plot(range(0, epochs, 1), (acc_prior_A), color='blue', label = "Prior")
#plt.axvline(x=9, linestyle='dashed', color='green')
plt.xticks(range(0, epochs*2, 50))
plt.title('Training task B with EWC Vs SGD after \n task A had been trained to criterion')
plt.legend(facecolor='white')
plt.ylabel('Test accuracy A')
plt.xlabel('Epochs')