# Permuted TensorFlow

In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D
from tensorflow.keras.layers import MaxPooling2D
from tensorflow.keras.layers import Activation
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Dense
from tensorflow.keras import layers
from tensorflow.keras import backend as K
import numpy as np
from tqdm import tqdm


def evaluate(model, test_x, test_y):
    acc = tf.keras.metrics.SparseCategoricalAccuracy(name='accuracy')
    for imgs, labels in zip(test_x, test_y):
        preds = model.predict_on_batch(np.array([imgs]))
        acc.update_state(labels, preds)
    return round(100*acc.result().numpy(), 2)


def permute_task(train, test):
    train_shape, test_shape = train.shape, test.shape
    train_flat, test_flat = train.reshape((-1, 3072)), test.reshape((-1, 3072))
    idx = np.arange(train_flat.shape[1])
    np.random.shuffle(idx)
    train_permuted, test_permuted = train_flat[:, idx], test_flat[:, idx]
    return (train_permuted.reshape(train_shape), test_permuted.reshape(test_shape))


class Train:

    def __init__(self, optimizer, loss_fn, prior_weights=None, lambda_=0.1):
        self.optimizer = optimizer
        self.loss_fn = loss_fn
        self.prior_weights = prior_weights
        self.lambda_ = lambda_

    def train(self, model, epochs, train_task, fisher_matrix=None, test_tasks=None):
        # empty list to collect per epoch test acc of each task
        if test_tasks:
            test_acc = [[] for _ in test_tasks]
        else:
            test_acc = None
        for epoch in tqdm(range(epochs)):
            for batch in train_task:
                X, y = batch
                with tf.GradientTape() as tape:
                    pred = model(X)
                    loss = self.loss_fn(y, pred)
                    # if to execute training with EWC
                    if fisher_matrix is not None:
                        loss += self.compute_penalty_loss(model, fisher_matrix)
                grads = tape.gradient(loss, model.trainable_variables)
                self.optimizer.apply_gradients(zip(grads, model.trainable_variables))
            # evaluate with the test set of task after each epoch
            if test_acc:
                for i in range(len(test_tasks)):
                    test_acc[i].append(evaluate(model, test_tasks[i][0], test_tasks[i][1]))
        print(test_acc)
        return test_acc

    def compute_penalty_loss(self, model, fisher_matrix):
        penalty = 0.
        for u, v, w in zip(fisher_matrix, model.weights, self.prior_weights):
            penalty += tf.math.reduce_sum(u * tf.math.square(v - w))
        return 0.5 * self.lambda_ * penalty


class EWC:

    def __init__(self, prior_model, data_samples, num_sample=30):
        self.prior_model = prior_model
        self.prior_weights = prior_model.weights
        self.num_sample = num_sample
        self.data_samples = data_samples
        self.fisher_matrix = self.compute_fisher()

    def compute_fisher(self):
        weights = self.prior_weights
        fisher_accum = np.array([np.zeros(layer.numpy().shape) for layer in weights],
                           dtype=object
                          )
        for j in tqdm(range(self.num_sample)):
            idx = np.random.randint(self.data_samples.shape[0])
            with tf.GradientTape() as tape:
                logits = tf.nn.log_softmax(self.prior_model(np.array([self.data_samples[idx]])))
            grads = tape.gradient(logits, weights)
            for m in range(len(weights)):
                fisher_accum[m] += np.square(grads[m])
        fisher_accum /= self.num_sample
        return fisher_accum

    def get_fisher(self):
        return self.fisher_matrix


from keras.models import Sequential
from keras.layers import Dense, Conv2D, Flatten, MaxPooling2D

class Convmodel:
    def __init__(self, input_shape=(32,32,3), hidden_layers_neuron_list=[200, 100, 50, 25, 12], num_classes=10):
        self.input_shape = input_shape
        self.num_classes = num_classes
        self.hidden_layers_neuron_list = hidden_layers_neuron_list
        self.model = self.create_cnn()

    def create_cnn(self):
        model = Sequential()

        # Convolutional layers
        model.add(Conv2D(32, (3, 3), activation='relu', input_shape=self.input_shape))
        model.add(MaxPooling2D((2, 2),padding='same'))
        model.add(Conv2D(64, (3, 3), activation='relu'))
        model.add(MaxPooling2D((2, 2),padding='same'))
        model.add(Conv2D(128, (3, 3), activation='relu'))
        model.add(MaxPooling2D((2, 2),padding='same'))
        model.add(Conv2D(256, (3, 3), activation='relu'))
        model.add(MaxPooling2D((2, 2),padding='same'))

        # Flatten layer
        model.add(Flatten())

        # Dense layers
        model.add(Dense(self.hidden_layers_neuron_list[0], activation='relu'))
        model.add(Dense(self.hidden_layers_neuron_list[1], activation='relu'))
        model.add(Dense(self.hidden_layers_neuron_list[2], activation='relu'))
        model.add(Dense(self.hidden_layers_neuron_list[3], activation='relu'))
        model.add(Dense(self.hidden_layers_neuron_list[4], activation='relu'))

        # Output layer
        model.add(Dense(self.num_classes, activation='softmax'))

        return model

    def get_uncompiled_model(self):
      return self.model

    def get_compiled_model(self, optimizer, loss_fn, metrics ):
      compiled_model = self.model
      compiled_model.compile(optimizer, loss_fn, metrics)
      return compiled_model

In [None]:
from tensorflow.keras.layers import Dense
from tensorflow.keras.datasets import cifar10
from tensorflow.keras.models import Sequential
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
import numpy as np
from tqdm import tqdm

In [None]:
epochs = 5
lambda_ = 0.01
lr = 0.0001
num_sample = 30
opt = tf.keras.optimizers.Adam(learning_rate=lr)
loss_fn=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False)

In [None]:
(x_train_A, y_train_A), (x_test_A, y_test_A) = cifar10.load_data()
x_train_A = x_train_A.astype('float32')
x_test_A = x_test_A.astype('float32')

train_A = tf.data.Dataset.from_tensor_slices((x_train_A, y_train_A)).shuffle(1000).batch(32)
test_A = (x_test_A, y_test_A)

x_train_B, x_test_B = permute_task(x_train_A, x_test_A)
y_train_B, y_test_B = y_train_A, y_test_A

train_B = tf.data.Dataset.from_tensor_slices((x_train_B, y_train_B)).shuffle(1000).batch(4)
test_B = (x_test_B, y_test_B)

In [None]:
mlp = Convmodel()

trn_gd = Train(opt, loss_fn)
model = mlp.get_compiled_model(opt, loss_fn, ['accuracy'])

acc_prior_A = trn_gd.train(model, epochs, train_A, test_tasks=[test_A])[0]
model.save('CIFAR10_A.h5')
print('[INFO] Task A Original (SGD): {}'.format(acc_prior_A[-1]))

NameError: ignored

In [None]:
# construct the fisher matrix using samples from task A
ewc = EWC(model, x_train_A, num_sample=num_sample)
f_matrix = ewc.get_fisher()

In [None]:
model_ewcB = mlp.get_compiled_model(opt, loss_fn, ['accuracy'])
model_ewcB.load_weights('CIFAR10_A.h5')
prior_weights = model_ewcB.get_weights()

In [None]:
trn = Train(opt, loss_fn, prior_weights=prior_weights, lambda_=lambda_)
acc_ewcA, acc_ewcB = trn.train(model_ewcB,
                     epochs,
                     train_B,
                     fisher_matrix=f_matrix,
                     test_tasks=[test_A, test_B]
                    )

print('[INFO] Task A ACC. after training B with EWC: {}'.format(acc_ewcA[-1]))
print('[INFO] Task B ACC. after training B with EWC: {}'.format(acc_ewcB[-1]))

In [None]:
model_sgdB = mlp.get_compiled_model(opt, loss_fn, ['accuracy'])
model_sgdB.load_weights('CIFAR10_A.h5')
acc_sgdA, acc_sgdB = trn_gd.train(model_sgdB, epochs, train_B, test_tasks = [test_A, test_B])

print('[INFO] Task A ACC. after training B with GD: {}'.format(acc_sgdA[-1]))
print('[INFO] Task B ACC. after training B with GD: {}'.format(acc_sgdB[-1]))

In [None]:
x = 0
total_width, n = 0.1, 2
width = total_width / n
x = x - (total_width - width) / 2
plt.style.use('ggplot')
plt.bar(x, acc_ewcB[-1], width=width, label='EWC B', hatch='w/', ec='w')
plt.bar(x + width, acc_sgdB[-1], width=width, label='SGD B', hatch='w/', ec='w')
plt.bar(x + 3.5 * width, acc_prior_A[-1], width=width, label='Prior A', hatch='w/', ec='w')
plt.bar(x + 4.5 * width, acc_ewcA[-1], width=width, label='EWC A', hatch='w/', ec='w')
plt.bar(x + 5.5 * width, acc_sgdA[-1], width=width, label='SGD A', hatch='w/', ec='w')
plt.legend(facecolor='white', loc='lower left')
plt.xticks(np.array([0., 3.5 * width]), ('Task B', 'Task A'))
plt.title('Training task B with EWC Vs SGD after \n task A had been trained to criterion')
plt.xlim(-0.15, 0.35)
plt.ylim(0., 105.)
plt.show()

In [None]:
plt.plot(range(0, epochs*2, 1), (acc_prior_A + acc_sgdA), color='green', linestyle='dashed', label = "SGD")
plt.plot(range(0, epochs*2, 1), (acc_prior_A + acc_ewcA), color='red', linestyle='dashed', label = "EWC")
plt.plot(range(0, epochs, 1), (acc_prior_A), color='blue', label = "Prior")
#plt.axvline(x=9, linestyle='dashed', color='green')
plt.xticks(range(0, epochs*2, 50))
plt.title('Training task B with EWC Vs SGD after \n task A had been trained to criterion')
plt.legend(facecolor='white')
plt.ylabel('Test accuracy A')
plt.xlabel('Epochs')

# Permuted ONE ARCHITECTURE

In [None]:
import torch
torch.cuda.is_available()
import torch
import torch.nn as nn
import torchvision.datasets as datasets
import torchvision.transforms as transforms
import torch.optim as optim
import torch.nn.functional as F
import numpy as np
import matplotlib.pyplot as plt
import keras

In [None]:
[(x_train, t_train), (x_test, t_test)] = keras.datasets.cifar10.load_data()

Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz


In [None]:
x_train = np.transpose(x_train, (0, 3, 1, 2))
x_test = np.transpose(x_test, (0, 3, 1, 2))

In [None]:
# Assuming x_train and x_test are lists or arrays
try:
    x_train = np.asarray(x_train, dtype=np.float32)
except ValueError as e:
    print("Error converting x_train to float32:", e)

try:
    x_test = np.asarray(x_test, dtype=np.float32)
except ValueError as e:
    print("Error converting x_test to float32:", e)

# Check the data type of elements in x_train and x_test
if x_train.dtype != np.float32:
    try:
        x_train = x_train.astype(np.float32)
    except ValueError as e:
        print("Error converting x_train to float32:", e)

if x_test.dtype != np.float32:
    try:
        x_test = x_test.astype(np.float32)
    except ValueError as e:
        print("Error converting x_test to float32:", e)

t_train=t_train.squeeze()
t_test=t_test.squeeze()

In [None]:
x_train = torch.tensor(x_train, dtype=torch.float32)
t_train = torch.tensor(t_train, dtype=torch.float32)
x_test = torch.tensor(x_test, dtype=torch.float32)
t_test = torch.tensor(t_test, dtype=torch.float32)

x_train = np.asarray(x_train)
t_train = np.asarray(t_train)
x_test = np.asarray(x_test)
t_test = np.asarray(t_test)

In [None]:
print("x_train dim and type: ", x_train.shape, x_train.dtype)
print("t_train dim and type: ", t_train.shape, t_train.dtype)
print("x_test dim and type: ", x_test.shape, x_test.dtype)
print("t_test dim and type: ", t_test.shape, t_test.dtype)

x_train dim and type:  (50000, 3, 32, 32) float32
t_train dim and type:  (50000,) float32
x_test dim and type:  (10000, 3, 32, 32) float32
t_test dim and type:  (10000,) float32


In [None]:
# switch to False to use CPU
use_cuda = True

use_cuda = use_cuda and torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu");
torch.manual_seed(1);
batch_size=256
m = nn.LogSoftmax(dim=1)
Loss = nn.MSELoss()

In [None]:
from sklearn.metrics import precision_score, recall_score, f1_score

import torch.nn.functional as F

class ONEArchitecture(nn.Module):
    def __init__(self):
        super(ONEArchitecture, self).__init__()

        # Convolutional layers
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, padding=1)

        # Fully connected layers
        self.fc1 = nn.Linear(131072, 512)
        self.fc2 = nn.Linear(512, 256)
        self.fc3 = nn.Linear(256, 10)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        x = F.relu(self.conv3(x))

        # Flatten the tensor before passing it to fully connected layers
        x = x.view(-1, 131072)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x


def permute_cifar10(cifar10, seed):
    """ Given the training set, permute pixels of each img the same way. """
    np.random.seed(seed)
    print("starting permutation...")
    h = w = 32
    perm_inds = list(range(h * w))
    np.random.shuffle(perm_inds)

    perm_cifar10 = []
    for data_set in cifar10:
        num_img = data_set.shape[0]
        num_channels = data_set.shape[1]

        # Reshape and permute pixels for each sample in the dataset
        reshaped_set = data_set.reshape((num_img, num_channels, -1))
        permuted_set = np.array([sample[:, perm_inds].reshape((num_channels, h, w)) for sample in reshaped_set])
        perm_cifar10.append(permuted_set)

    print("done.")
    return perm_cifar10

def on_task_update(task_id, x_mem, t_mem):

  model.train()
  optimizer.zero_grad()

  # accumulating gradients
  for start in range(0, len(t_mem)-1, batch_size):
      end = start + batch_size
      x, y = torch.from_numpy(x_mem[start:end]), torch.from_numpy(t_mem[start:end]).long()
      x, y = x.to(device), y.to(device)
      output = model(x)
      y = nn.functional.one_hot(y)
      y = y.squeeze()
      output = output.float()
      y = y.float()
      loss = Loss(m(output), y)
      loss.backward()

  fisher_dict[task_id] = {}
  optpar_dict[task_id] = {}

  # gradients accumulated can be used to calculate fisher
  for name, param in model.named_parameters():

    optpar_dict[task_id][name] = param.data.clone()
    fisher_dict[task_id][name] = param.grad.data.clone().pow(2)

def train_ewc(model, device, task_id, x_train, t_train, optimizer, epoch):
    model.train()

    for start in range(0, len(t_train)-1, batch_size):
        end = start + batch_size
        x, y = torch.from_numpy(x_train[start:end]), torch.from_numpy(t_train[start:end]).long()
        x, y = x.to(device), y.to(device)

        optimizer.zero_grad()

        output = model(x)
        y = nn.functional.one_hot(y)
        y = y.squeeze()
        output = output.float()
        y = y.float()
        loss = F.cross_entropy(output, y)

        ### magic here! :-)
        for task in range(task_id):
            for name, param in model.named_parameters():
                fisher = fisher_dict[task][name]
                optpar = optpar_dict[task][name]
                loss += (fisher * (optpar - param).pow(2)).sum() * ewc_lambda

        loss.backward()
        optimizer.step()

    # After each epoch, evaluate and print metrics
    train_metrics = evaluate_metrics(model, device, x_train, t_train)
    # print(f'Train Epoch: {epoch}\tLoss: {loss.item():.6f}\tPrecision: {train_metrics["precision"]:.4f}\tRecall: {train_metrics["recall"]:.4f}\tF1 Score: {train_metrics["f1"]:.4f}')

def evaluate_metrics(model, device, x_data, t_data):
    model.eval()
    all_preds = []
    all_labels = []

    for start in range(0, len(t_data)-1, batch_size):
        end = start + batch_size
        with torch.no_grad():
            x, y = torch.from_numpy(x_data[start:end]), torch.from_numpy(t_data[start:end]).long()
            x, y = x.to(device), y.to(device)
            output = model(x)

            # Convert predictions to numpy arrays
            preds = output.argmax(dim=1).cpu().numpy()
            labels = y.cpu().numpy()

            all_preds.extend(preds)
            all_labels.extend(labels)

    # Calculate metrics
    precision = precision_score(all_labels, all_preds, average='weighted')
    recall = recall_score(all_labels, all_preds, average='weighted')
    f1 = f1_score(all_labels, all_preds, average='weighted')

    return {'precision': precision, 'recall': recall, 'f1': f1}

def train(model, device, x_train, t_train, optimizer, epoch):
    model.train()

    for start in range(0, len(t_train)-1, batch_size):
      end = start + batch_size
      x, y = torch.from_numpy(x_train[start:end]), torch.from_numpy(t_train[start:end]).long()
      x, y = x.to(device), y.to(device)

      optimizer.zero_grad()

      output = model(x)
      y = nn.functional.one_hot(y)
      y = y.squeeze()
      output = output.float()
      y = y.float()
      loss = Loss(output, y)
      loss.backward()
      optimizer.step()
      #print(loss.item())
    print('Train Epoch: {} \tLoss: {:.6f}'.format(epoch, loss.item()))

def test(model, device, x_test, t_test):
    model.eval()
    test_loss = 0
    correct = 0
    all_preds = []
    all_labels = []

    for start in range(0, len(t_test)-1, batch_size):
        end = start + batch_size
        with torch.no_grad():
            x, y = torch.from_numpy(x_test[start:end]), torch.from_numpy(t_test[start:end]).long()
            x, y = x.to(device), y.to(device)
            output = model(x)
            test_loss += F.cross_entropy(output, y).item()  # sum up batch loss
            pred = output.max(1, keepdim=True)[1]  # get the index of the max logit
            correct += pred.eq(y.view_as(pred)).sum().item()

            # Convert predictions to numpy arrays
            preds = pred.cpu().numpy()
            labels = y.cpu().numpy()

            all_preds.extend(preds)
            all_labels.extend(labels)

    test_loss /= len(t_test)

    # Calculate metrics
    precision = precision_score(all_labels, all_preds, average='weighted')
    recall = recall_score(all_labels, all_preds, average='weighted')
    f1 = f1_score(all_labels, all_preds, average='weighted')

    print('Test set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)'.format(
        test_loss, correct, len(t_test), 100. * correct / len(t_test)))
    print('Precision: {:.4f}, Recall: {:.4f}, F1 Score: {:.4f}\n'.format(precision, recall, f1))

    return 100. * correct / len(t_test)

In [None]:
# task 1
task_1 = [(x_train, t_train), (x_test, t_test)]

# task 2
x_train2, x_test2 = permute_cifar10([x_train, x_test], 1)
task_2 = [(x_train2, t_train), (x_test2, t_test)]

# task 3
x_train3, x_test3 = permute_cifar10([x_train, x_test], 2)
task_3 = [(x_train3, t_train), (x_test3, t_test)]

# task list
tasks = [task_1, task_2, task_3]

starting permutation...
done.
starting permutation...
done.


In [None]:
model = ONEArchitecture().to(device)
optimizer = optim.Adam(model.parameters(), lr=0.0001)
fisher_dict = {}
optpar_dict = {}
ewc_lambda = 0.5

In [None]:
import warnings
warnings.filterwarnings('ignore')
from tqdm.auto import tqdm

ewc_accs = []
for id, task in enumerate(tasks):
  avg_acc = 0
  print("Training on task: ", id)

  (x_train, t_train), _ = task

  for epoch in tqdm(range(1, 16)):
    train_ewc(model, device, id, x_train, t_train, optimizer, epoch)
  on_task_update(id, x_train, t_train)

  for id_test, task in enumerate(tasks):
    print("Testing on task: ", id_test)
    _, (x_test, t_test) = task
    acc = test(model, device, x_test, t_test)
    avg_acc = avg_acc + acc

  print("Avg acc: ", avg_acc / 3)
  ewc_accs.append(avg_acc / 3)

Training on task:  0


  0%|          | 0/15 [00:00<?, ?it/s]

Testing on task:  0
Test set: Average loss: 0.0079, Accuracy: 6469/10000 (65%)
Precision: 0.6464, Recall: 0.6469, F1 Score: 0.6445

Testing on task:  1
Test set: Average loss: 0.0946, Accuracy: 1163/10000 (12%)
Precision: 0.1688, Recall: 0.1163, F1 Score: 0.0517

Testing on task:  2
Test set: Average loss: 0.0926, Accuracy: 1246/10000 (12%)
Precision: 0.1347, Recall: 0.1246, F1 Score: 0.0599

Avg acc:  29.593333333333334
Training on task:  1


  0%|          | 0/15 [00:00<?, ?it/s]

Testing on task:  0
Test set: Average loss: 0.0088, Accuracy: 2767/10000 (28%)
Precision: 0.3324, Recall: 0.2767, F1 Score: 0.2642

Testing on task:  1
Test set: Average loss: 0.0058, Accuracy: 4915/10000 (49%)
Precision: 0.5013, Recall: 0.4915, F1 Score: 0.4856

Testing on task:  2
Test set: Average loss: 0.0100, Accuracy: 1985/10000 (20%)
Precision: 0.1788, Recall: 0.1985, F1 Score: 0.1653

Avg acc:  32.22333333333333
Training on task:  2


  0%|          | 0/15 [00:00<?, ?it/s]

Testing on task:  0
Test set: Average loss: 0.0088, Accuracy: 2730/10000 (27%)
Precision: 0.3449, Recall: 0.2730, F1 Score: 0.2590

Testing on task:  1
Test set: Average loss: 0.0061, Accuracy: 4637/10000 (46%)
Precision: 0.4924, Recall: 0.4637, F1 Score: 0.4596

Testing on task:  2
Test set: Average loss: 0.0072, Accuracy: 3605/10000 (36%)
Precision: 0.3793, Recall: 0.3605, F1 Score: 0.3549

Avg acc:  36.57333333333333


# Permuted MobileNET

In [None]:
import torch
torch.cuda.is_available()
import torch
import torch.nn as nn
import torchvision.datasets as datasets
import torchvision.transforms as transforms
import torch.optim as optim
import torch.nn.functional as F
import numpy as np
import matplotlib.pyplot as plt
import keras

In [None]:
[(x_train, t_train), (x_test, t_test)] = keras.datasets.cifar10.load_data()

Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz


In [None]:
x_train = np.transpose(x_train, (0, 3, 1, 2))
x_test = np.transpose(x_test, (0, 3, 1, 2))

In [None]:
# Assuming x_train and x_test are lists or arrays
try:
    x_train = np.asarray(x_train, dtype=np.float32)
except ValueError as e:
    print("Error converting x_train to float32:", e)

try:
    x_test = np.asarray(x_test, dtype=np.float32)
except ValueError as e:
    print("Error converting x_test to float32:", e)

# Check the data type of elements in x_train and x_test
if x_train.dtype != np.float32:
    try:
        x_train = x_train.astype(np.float32)
    except ValueError as e:
        print("Error converting x_train to float32:", e)

if x_test.dtype != np.float32:
    try:
        x_test = x_test.astype(np.float32)
    except ValueError as e:
        print("Error converting x_test to float32:", e)

t_train=t_train.squeeze()
t_test=t_test.squeeze()

In [None]:
x_train = torch.tensor(x_train, dtype=torch.float32)
t_train = torch.tensor(t_train, dtype=torch.float32)
x_test = torch.tensor(x_test, dtype=torch.float32)
t_test = torch.tensor(t_test, dtype=torch.float32)

x_train = np.asarray(x_train)
t_train = np.asarray(t_train)
x_test = np.asarray(x_test)
t_test = np.asarray(t_test)

In [None]:
print("x_train dim and type: ", x_train.shape, x_train.dtype)
print("t_train dim and type: ", t_train.shape, t_train.dtype)
print("x_test dim and type: ", x_test.shape, x_test.dtype)
print("t_test dim and type: ", t_test.shape, t_test.dtype)

x_train dim and type:  (50000, 3, 32, 32) float32
t_train dim and type:  (50000,) float32
x_test dim and type:  (10000, 3, 32, 32) float32
t_test dim and type:  (10000,) float32


In [None]:
# switch to False to use CPU
use_cuda = True

use_cuda = use_cuda and torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu");
torch.manual_seed(1);
batch_size=256
m = nn.LogSoftmax(dim=1)
Loss = nn.MSELoss()

In [None]:
class DepthwiseSeparableConv(nn.Module):
    def __init__(self, in_channels, out_channels, stride):
        super(DepthwiseSeparableConv, self).__init__()
        self.depthwise = nn.Conv2d(in_channels, in_channels, kernel_size=3, stride=1, padding=1, groups=in_channels)
        self.pointwise = nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=1, padding=0)

    def forward(self, x):
        x = self.depthwise(x)
        x = self.pointwise(x)
        return x


class MobileNetV1(nn.Module):
    def __init__(self, num_classes=10):
        super(MobileNetV1, self).__init__()
        self.model = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, stride=2, padding=1),
            nn.ReLU(inplace=True),
            DepthwiseSeparableConv(32, 64, 1),
            DepthwiseSeparableConv(64, 128, 2),
            DepthwiseSeparableConv(128, 128, 1),
            DepthwiseSeparableConv(128, 256, 2),
            DepthwiseSeparableConv(256, 256, 1),
            DepthwiseSeparableConv(256, 512, 2),
            # Repeat the following block 5 times for depth
            DepthwiseSeparableConv(512, 512, 1),
            DepthwiseSeparableConv(512, 512, 1),
            DepthwiseSeparableConv(512, 512, 1),
            DepthwiseSeparableConv(512, 512, 1),
            DepthwiseSeparableConv(512, 512, 1),
            # End of repeating block
            DepthwiseSeparableConv(512, 1024, 2),
            DepthwiseSeparableConv(1024, 1024, 1),
            nn.AdaptiveAvgPool2d(1)
        )
        self.fc = nn.Linear(1024, 10)

    def forward(self, x):
        x = self.model(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x

model = MobileNetV1()

In [None]:
def permute_cifar10(cifar10, seed):
    """ Given the training set, permute pixels of each img the same way. """
    np.random.seed(seed)
    print("starting permutation...")
    h = w = 32
    perm_inds = list(range(h * w))
    np.random.shuffle(perm_inds)

    perm_cifar10 = []
    for data_set in cifar10:
        num_img = data_set.shape[0]
        num_channels = data_set.shape[1]

        # Reshape and permute pixels for each sample in the dataset
        reshaped_set = data_set.reshape((num_img, num_channels, -1))
        permuted_set = np.array([sample[:, perm_inds].reshape((num_channels, h, w)) for sample in reshaped_set])
        perm_cifar10.append(permuted_set)

    print("done.")
    return perm_cifar10

def on_task_update(task_id, x_mem, t_mem):

  model.train()
  optimizer.zero_grad()

  # accumulating gradients
  for start in range(0, len(t_mem)-1, batch_size):
      end = start + batch_size
      x, y = torch.from_numpy(x_mem[start:end]), torch.from_numpy(t_mem[start:end]).long()
      x, y = x.to(device), y.to(device)
      output = model(x)
      y = nn.functional.one_hot(y)
      y = y.squeeze()
      output = output.float()
      y = y.float()
      loss = Loss(m(output), y)
      loss.backward()

  fisher_dict[task_id] = {}
  optpar_dict[task_id] = {}

  # gradients accumulated can be used to calculate fisher
  for name, param in model.named_parameters():

    optpar_dict[task_id][name] = param.data.clone()
    fisher_dict[task_id][name] = param.grad.data.clone().pow(2)

def train_ewc(model, device, task_id, x_train, t_train, optimizer, epoch):
    model.train()

    for start in range(0, len(t_train)-1, batch_size):
        end = start + batch_size
        x, y = torch.from_numpy(x_train[start:end]), torch.from_numpy(t_train[start:end]).long()
        x, y = x.to(device), y.to(device)

        optimizer.zero_grad()

        output = model(x)
        y = nn.functional.one_hot(y)
        y = y.squeeze()
        output = output.float()
        y = y.float()
        loss = F.cross_entropy(output, y)

        ### magic here! :-)
        for task in range(task_id):
            for name, param in model.named_parameters():
                fisher = fisher_dict[task][name]
                optpar = optpar_dict[task][name]
                loss += (fisher * (optpar - param).pow(2)).sum() * ewc_lambda

        loss.backward()
        optimizer.step()

    # After each epoch, evaluate and print metrics
    train_metrics = evaluate_metrics(model, device, x_train, t_train)
    # print(f'Train Epoch: {epoch}\tLoss: {loss.item():.6f}\tPrecision: {train_metrics["precision"]:.4f}\tRecall: {train_metrics["recall"]:.4f}\tF1 Score: {train_metrics["f1"]:.4f}')

def evaluate_metrics(model, device, x_data, t_data):
    model.eval()
    all_preds = []
    all_labels = []

    for start in range(0, len(t_data)-1, batch_size):
        end = start + batch_size
        with torch.no_grad():
            x, y = torch.from_numpy(x_data[start:end]), torch.from_numpy(t_data[start:end]).long()
            x, y = x.to(device), y.to(device)
            output = model(x)

            # Convert predictions to numpy arrays
            preds = output.argmax(dim=1).cpu().numpy()
            labels = y.cpu().numpy()

            all_preds.extend(preds)
            all_labels.extend(labels)

    # Calculate metrics
    precision = precision_score(all_labels, all_preds, average='weighted')
    recall = recall_score(all_labels, all_preds, average='weighted')
    f1 = f1_score(all_labels, all_preds, average='weighted')

    return {'precision': precision, 'recall': recall, 'f1': f1}

def train(model, device, x_train, t_train, optimizer, epoch):
    model.train()

    for start in range(0, len(t_train)-1, batch_size):
      end = start + batch_size
      x, y = torch.from_numpy(x_train[start:end]), torch.from_numpy(t_train[start:end]).long()
      x, y = x.to(device), y.to(device)

      optimizer.zero_grad()

      output = model(x)
      y = nn.functional.one_hot(y)
      y = y.squeeze()
      output = output.float()
      y = y.float()
      loss = Loss(output, y)
      loss.backward()
      optimizer.step()
      #print(loss.item())
    print('Train Epoch: {} \tLoss: {:.6f}'.format(epoch, loss.item()))

def test(model, device, x_test, t_test):
    model.eval()
    test_loss = 0
    correct = 0
    all_preds = []
    all_labels = []

    for start in range(0, len(t_test)-1, batch_size):
        end = start + batch_size
        with torch.no_grad():
            x, y = torch.from_numpy(x_test[start:end]), torch.from_numpy(t_test[start:end]).long()
            x, y = x.to(device), y.to(device)
            output = model(x)
            test_loss += F.cross_entropy(output, y).item()  # sum up batch loss
            pred = output.max(1, keepdim=True)[1]  # get the index of the max logit
            correct += pred.eq(y.view_as(pred)).sum().item()

            # Convert predictions to numpy arrays
            preds = pred.cpu().numpy()
            labels = y.cpu().numpy()

            all_preds.extend(preds)
            all_labels.extend(labels)

    test_loss /= len(t_test)

    # Calculate metrics
    precision = precision_score(all_labels, all_preds, average='weighted')
    recall = recall_score(all_labels, all_preds, average='weighted')
    f1 = f1_score(all_labels, all_preds, average='weighted')

    print('Test set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)'.format(
        test_loss, correct, len(t_test), 100. * correct / len(t_test)))
    print('Precision: {:.4f}, Recall: {:.4f}, F1 Score: {:.4f}\n'.format(precision, recall, f1))

    return 100. * correct / len(t_test)

In [None]:
# task 1
task_1 = [(x_train, t_train), (x_test, t_test)]

# task 2
x_train2, x_test2 = permute_cifar10([x_train, x_test], 1)
task_2 = [(x_train2, t_train), (x_test2, t_test)]

# task 3
x_train3, x_test3 = permute_cifar10([x_train, x_test], 2)
task_3 = [(x_train3, t_train), (x_test3, t_test)]

# task list
tasks = [task_1, task_2, task_3]

starting permutation...
done.
starting permutation...
done.


In [None]:
optimizer = optim.Adam(model.parameters(), lr=0.0001)
fisher_dict = {}
optpar_dict = {}
ewc_lambda = 0.5

In [None]:
import warnings
warnings.filterwarnings('ignore')
from sklearn.metrics import precision_score, recall_score, f1_score
import torch.nn.functional as F

ewc_accs = []
for id, task in enumerate(tasks):
  avg_acc = 0
  print("Training on task: ", id)

  (x_train, t_train), _ = task

  for epoch in range(1, 16):
    train_ewc(model, device, id, x_train, t_train, optimizer, epoch)
  on_task_update(id, x_train, t_train)

  for id_test, task in enumerate(tasks):
    print("Testing on task: ", id_test)
    _, (x_test, t_test) = task
    acc = test(model, device, x_test, t_test)
    avg_acc = avg_acc + acc

  print("Avg acc: ", avg_acc / 3)
  ewc_accs.append(avg_acc / 3)

Training on task:  0
