In [None]:
import tensorflow as tf
from tensorflow.keras import layers, optimizers, losses
import numpy as np
import matplotlib.pyplot as plt

# Load MNIST dataset
(train_images, train_labels), (test_images, test_labels) = tf.keras.datasets.mnist.load_data()

# Normalize and reshape images
train_images = train_images.reshape(-1, 784).astype('float32') / 255.0
test_images = test_images.reshape(-1, 784).astype('float32') / 255.0

# Generate permuted tasks
num_tasks = 10
tasks_train_images = []
tasks_test_images = []
for task in range(num_tasks):
    permutation = np.random.permutation(784)
    tasks_train_images.append(train_images[:, permutation])
    tasks_test_images.append(test_images[:, permutation])

# Define MLP model
class MLP(tf.keras.Model):
    def __init__(self, depth, dropout_prob, optimizer_type):
        super(MLP, self).__init__()
        self.depth = depth
        self.dropout_prob = dropout_prob
        self.hidden_layers = [layers.Dense(256, activation='relu') for _ in range(depth - 1)]
        self.dropout = layers.Dropout(dropout_prob)
        self.output_layer = layers.Dense(10, activation='softmax')

        if optimizer_type == "SGD":
            self.optimizer = optimizers.SGD()
        elif optimizer_type == "Adam":
            self.optimizer = optimizers.Adam()
        elif optimizer_type == "RMSProp":
            self.optimizer = optimizers.RMSprop()
        else:
            raise ValueError("Optimizer not supported")

    def call(self, inputs, training=False):
        x = inputs
        for layer in self.hidden_layers:
            x = layer(x)
            if training:
                x = self.dropout(x)
        return self.output_layer(x)

    def compute_loss(self, logits, labels, loss_type):
        if loss_type == "NLL":
            loss = losses.SparseCategoricalCrossentropy()(labels, logits)
        elif loss_type == "L1":
            loss = tf.reduce_mean(tf.abs(tf.one_hot(labels, depth=10) - logits))
        elif loss_type == "L2":
            loss = tf.reduce_mean(tf.square(tf.one_hot(labels, depth=10) - logits))
        elif loss_type == "L1+L2":
            loss = tf.reduce_mean(tf.abs(tf.one_hot(labels, depth=10) - logits)) + \
                   tf.reduce_mean(tf.square(tf.one_hot(labels, depth=10) - logits))
        else:
            raise ValueError("Loss type not supported")
        return loss

# Training function
def train_model(model, train_images, train_labels, epochs, dropout_prob, loss_type):
    for epoch in range(epochs):
        with tf.GradientTape() as tape:
            logits = model(train_images, training=True)
            loss = model.compute_loss(logits, train_labels, loss_type)
        gradients = tape.gradient(loss, model.trainable_variables)
        model.optimizer.apply_gradients(zip(gradients, model.trainable_variables))
    return model

# Testing function
def test_model(model, test_images, test_labels):
    logits = model(test_images, training=False)
    predictions = tf.argmax(logits, axis=1)
    accuracy = tf.reduce_mean(tf.cast(tf.equal(predictions, test_labels), tf.float32))
    return accuracy.numpy()

# Sequential training and evaluation
def sequential_training(model, tasks_train_images, tasks_train_labels, tasks_test_images, tasks_test_labels, epochs_per_task, dropout_prob, loss_type):
    num_tasks = len(tasks_train_images)
    R = np.zeros((num_tasks, num_tasks))  # Result matrix

    for task_index in range(num_tasks):
        epochs = 50 if task_index == 0 else 20  # 50 epochs for Task A, 20 for others
        model = train_model(model, tasks_train_images[task_index], train_labels, epochs, dropout_prob, loss_type)

        # Test on all seen tasks
        for test_task in range(task_index + 1):
            accuracy = test_model(model, tasks_test_images[test_task], test_labels)
            R[task_index][test_task] = accuracy

    return R

# Calculate ACC and BWT
def calculate_ACC(R):
    T = R.shape[0]
    return np.mean(R[-1, :])

def calculate_BWT(R):
    T = R.shape[0]
    bwt = 0
    for i in range(T - 1):
        bwt += R[-1, i] - R[i, i]
    return bwt / (T - 1)

# Optional: Calculate TBWT and CBWT
def calculate_TBWT(R, task_index):
    return R[-1, task_index] - R[task_index, task_index]

def calculate_CBWT(R):
    T = R.shape[0]
    cbwt = 0
    for i in range(T):
        cbwt += calculate_TBWT(R, i)
    return cbwt / T

# Experiment with different configurations
loss_types = ["NLL", "L1", "L2", "L1+L2"]
optimizers_list = ["SGD", "Adam", "RMSProp"]
depths = [2, 3, 4]
dropout_rates = [0.2, 0.4]

results = []

for loss_type in loss_types:
    for optimizer_type in optimizers_list:
        for depth in depths:
            for dropout_prob in dropout_rates:
                model = MLP(depth=depth, dropout_prob=dropout_prob, optimizer_type=optimizer_type)
                R = sequential_training(model, tasks_train_images, train_labels, tasks_test_images, test_labels, epochs_per_task=20, dropout_prob=dropout_prob, loss_type=loss_type)
                ACC = calculate_ACC(R)
                BWT = calculate_BWT(R)
                results.append((loss_type, optimizer_type, depth, dropout_prob, ACC, BWT))
                print(f"Loss: {loss_type}, Optimizer: {optimizer_type}, Depth: {depth}, Dropout: {dropout_prob}, ACC: {ACC}, BWT: {BWT}")

# Plot validation results
def plot_validation_results(all_accuracy_results):
    plt.figure(figsize=(10, 6))
    for i, accuracies in enumerate(all_accuracy_results):
        plt.plot(accuracies, label=f'Task {i+1}')
    plt.xlabel('Epochs')
    plt.ylabel('Validation Accuracy')
    plt.title('Validation Accuracy Over Tasks')
    plt.legend()
    plt.show()

# Assuming all_accuracy_results is a list of lists containing validation accuracies for each task
plot_validation_results(all_accuracy_results)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
[1m11490434/11490434[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
