# ONE Architecture Permuted

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import torch
torch.cuda.is_available()
import torch
import torch.nn as nn
import torchvision.datasets as datasets
import torchvision.transforms as transforms
import torch.optim as optim
from tqdm.auto import tqdm
import torch.nn.functional as F
import numpy as np
import matplotlib.pyplot as plt
import keras

In [None]:
from torchvision import datasets
import os
import torchvision.transforms as transforms
import torch

batch_size = 8
transform = transforms.Compose([transforms.Resize(256),
                                transforms.ToTensor(),
                                transforms.Normalize([0.485,0.456,0.405], [0.229, 0.224, 0.225])])

In [None]:
source_folder = '/content/drive/MyDrive/IIIT/Various_datasets/Satelite Image Classification/data'
train_data = datasets.ImageFolder(os.path.join(source_folder, 'Train'), transform)
val_data = datasets.ImageFolder(os.path.join(source_folder, 'Test'), transform)

trainloader = torch.utils.data.DataLoader(train_data, batch_size=batch_size)
validloader = torch.utils.data.DataLoader(val_data, batch_size=batch_size)

train_data_size = len(train_data)
val_data_size = len(val_data)
print(train_data_size)
print(val_data_size)

class_names = train_data.classes
print(class_names)

1851
464
['cloudy', 'desert', 'green_area', 'water']


In [None]:
# Iterate through the DataLoader to get all images and labels
train_images = []
train_labels = []

for batch_images, batch_labels in tqdm(trainloader):
    train_images.append(batch_images.numpy())
    train_labels.append(batch_labels.numpy())

# Concatenate the lists into NumPy arrays
train_images = np.concatenate(train_images)
train_labels = np.concatenate(train_labels)

print("Shape of the image array:", train_images.shape)
print("Shape of the label array:", train_labels.shape)

  0%|          | 0/232 [00:00<?, ?it/s]

Shape of the image array: (1851, 3, 256, 256)
Shape of the label array: (1851,)


In [None]:
# Iterate through the DataLoader to get all images and labels
test_images = []
test_labels = []

for batch_images, batch_labels in tqdm(validloader):
    test_images.append(batch_images.numpy())
    test_labels.append(batch_labels.numpy())

# Concatenate the lists into NumPy arrays
test_images = np.concatenate(test_images)
test_labels = np.concatenate(test_labels)

print("Shape of the image array:", test_images.shape)
print("Shape of the label array:", test_labels.shape)

  0%|          | 0/58 [00:00<?, ?it/s]

Shape of the image array: (464, 3, 256, 256)
Shape of the label array: (464,)


In [None]:
x_train=train_images
t_train=train_labels
x_test=test_images
t_test=test_labels

In [None]:
x_train = torch.tensor(x_train, dtype=torch.float32)
t_train = torch.tensor(t_train, dtype=torch.float32)
x_test = torch.tensor(x_test, dtype=torch.float32)
t_test = torch.tensor(t_test, dtype=torch.float32)

x_train = np.asarray(x_train)
t_train = np.asarray(t_train)
x_test = np.asarray(x_test)
t_test = np.asarray(t_test)

**One hot encoding**

In [None]:
y = t_train.astype("int")
temp = np.zeros((y.size, y.max() + 1))
temp[np.arange(y.size), y] = 1
t_train = temp

In [None]:
y = t_test.astype("int")
temp = np.zeros((y.size, y.max() + 1))
temp[np.arange(y.size), y] = 1
t_test = temp

In [None]:
print("x_train dim and type: ", x_train.shape, x_train.dtype)
print("t_train dim and type: ", t_train.shape, t_train.dtype)
print("x_test dim and type: ", x_test.shape, x_test.dtype)
print("t_test dim and type: ", t_test.shape, t_test.dtype)

x_train dim and type:  (1851, 3, 256, 256) float32
t_train dim and type:  (1851, 4) float64
x_test dim and type:  (464, 3, 256, 256) float32
t_test dim and type:  (464, 4) float64


In [None]:
# switch to False to use CPU
use_cuda = True

use_cuda = use_cuda and torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu");
torch.manual_seed(1);
batch_size=16
m = nn.LogSoftmax(dim=1)
Loss = nn.CrossEntropyLoss()

In [None]:
from sklearn.metrics import precision_score, recall_score, f1_score

import torch.nn.functional as F

class ImprovedNet(nn.Module):
    def __init__(self):
        super(ImprovedNet, self).__init__()

        # Convolutional layers
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=0)
        self.mp1 = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=0)
        self.mp2 = nn.MaxPool2d(2, 2)
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, padding=0)
        self.mp3 = nn.MaxPool2d(2, 2)
        self.conv4 = nn.Conv2d(128, 128, kernel_size=3, padding=0)
        self.mp4 = nn.MaxPool2d(2, 2)
        # Fully connected layers
        self.fc1 = nn.Linear(25088, 512)
        self.fc2 = nn.Linear(512, 256)
        self.fc3 = nn.Linear(256, 4)

        # activation
        self.act = nn.Softmax()

    def forward(self, x):
        x = self.mp1(F.relu(self.conv1(x)))
        # print("Conv1", x.shape)
        x = self.mp2(F.relu(self.conv2(x)))
        # print("Conv2", x.shape)
        x = self.mp3(F.relu(self.conv3(x)))
        x = self.mp4(F.relu(self.conv4(x)))
        # print("Conv3", x.shape)
        # Flatten the tensor before passing it to fully connected layers
        x = x.view(-1, 25088)
        # print("Flatten", x.shape)
        x = F.relu(self.fc1(x))
        # print("FC1", x.shape)
        x = F.relu(self.fc2(x))
        # print("FC2", x.shape)
        x = self.act(self.fc3(x))
        return x


def permute_cifar10(cifar10, seed):
    """ Given the training set, permute pixels of each img the same way. """
    np.random.seed(seed)
    print("starting permutation...")
    h = w = 256
    perm_inds = list(range(h * w))
    np.random.shuffle(perm_inds)

    perm_cifar10 = []
    for data_set in cifar10:
        num_img = data_set.shape[0]
        num_channels = data_set.shape[1]

        # Reshape and permute pixels for each sample in the dataset
        reshaped_set = data_set.reshape((num_img, num_channels, -1))
        permuted_set = np.array([sample[:, perm_inds].reshape((num_channels, h, w)) for sample in reshaped_set])
        perm_cifar10.append(permuted_set)

    print("done.")
    return perm_cifar10

def on_task_update(task_id, x_mem, t_mem):

  model.train()
  optimizer.zero_grad()

  # accumulating gradients
  for start in range(0, len(t_mem)-1, batch_size):
      end = start + batch_size
      end = start + batch_size
      x, y = torch.from_numpy(x_train[start:end]), torch.from_numpy(t_train[start:end])
      x, y = x.to(device), y.to(device)
      optimizer.zero_grad()
      output = model(x)
      # print('Label',y.shape)
      # print('Output',output.shape)
      output = output.float()
      y = y.float()
      loss = Loss(output, y)
      loss.backward()

  fisher_dict[task_id] = {}
  optpar_dict[task_id] = {}

  # gradients accumulated can be used to calculate fisher
  for name, param in model.named_parameters():

    optpar_dict[task_id][name] = param.data.clone()
    fisher_dict[task_id][name] = param.grad.data.clone().pow(2)

def train_ewc(model, device, task_id, x_train, t_train, optimizer, epoch):
    model.train()

    for start in range(0, len(t_train)-1, batch_size):
        end = start + batch_size
        x, y = torch.from_numpy(x_train[start:end]), torch.from_numpy(t_train[start:end])
        x, y = x.to(device), y.to(device)
        # print(x.shape)
        # print(y.shape)
        optimizer.zero_grad()
        output = model(x)
        # y = nn.functional.one_hot(y)
        # y = y.squeeze()
        output = output.float()
        y = y.float()
        # print('Label',y.shape)
        # print(y)
        # print('Output',output.shape)
        # print(output)
        loss = Loss(output, y)

        ### magic here! :-)
        for task in range(task_id):
            for name, param in model.named_parameters():
                fisher = fisher_dict[task][name]
                optpar = optpar_dict[task][name]
                loss += (fisher * (optpar - param).pow(2)).sum() * ewc_lambda

        loss.backward()
        optimizer.step()

    # After each epoch, evaluate and print metrics
    train_metrics = evaluate_metrics(model, device, x_train, t_train)
    print(f'Train Epoch: {epoch}\tLoss: {loss.item():.6f}\tPrecision: {train_metrics["precision"]:.4f}\tRecall: {train_metrics["recall"]:.4f}\tF1 Score: {train_metrics["f1"]:.4f}')

def evaluate_metrics(model, device, x_data, t_data):
    model.eval()
    all_preds = []
    all_labels = []

    for start in range(0, len(t_data)-1, batch_size):
        end = start + batch_size
        with torch.no_grad():
            x, y = torch.from_numpy(x_data[start:end]), torch.from_numpy(t_data[start:end])
            x, y = x.to(device), y.to(device)
            output = model(x)
            output = output.float()
            y = y.float()

            # Convert predictions to numpy arrays
            preds = output.argmax(dim=1).cpu().numpy()
            labels = y.argmax(dim=1).cpu().numpy()

            all_preds.extend(preds)
            all_labels.extend(labels)

    # Calculate metrics
    precision = precision_score(all_labels, all_preds, average='weighted')
    recall = recall_score(all_labels, all_preds, average='weighted')
    f1 = f1_score(all_labels, all_preds, average='weighted')

    return {'precision': precision, 'recall': recall, 'f1': f1}

def train(model, device, x_train, t_train, optimizer, epoch):
    model.train()

    for start in range(0, len(t_train)-1, batch_size):
      end = start + batch_size
      x, y = torch.from_numpy(x_train[start:end]), torch.from_numpy(t_train[start:end])
      y = y.type(torch.LongTensor)
      x, y = x.to(device), y.to(device)
      optimizer.zero_grad()
      output = model(x)
      y = nn.functional.one_hot(y)
      y = y.squeeze()
      output = output.float()
      y = y.float()
      loss = Loss(output, y)
      loss.backward()
      optimizer.step()
      #print(loss.item())
    print('Train Epoch: {} \tLoss: {:.6f}'.format(epoch, loss.item()))

def test(model, device, x_test, t_test):
    model.eval()
    test_loss = 0
    correct = 0
    all_preds = []
    all_labels = []

    for start in range(0, len(t_test)-1, batch_size):
        end = start + batch_size
        with torch.no_grad():
            x, y = torch.from_numpy(x_test[start:end]), torch.from_numpy(t_test[start:end])
            x, y = x.to(device), y.to(device)
            output = model(x)
            output = output.float()
            y = y.float()
            test_loss += F.cross_entropy(output, y).item()  # sum up batch loss
            pred = output.max(1)[1]  # get the index of the max logit
            correct += pred.eq(y.max(1)[1]).sum().item()

            # Convert predictions to numpy arrays
            preds = pred.cpu().numpy()
            labels = y.argmax(dim=1).cpu().numpy()

            all_preds.extend(preds)
            all_labels.extend(labels)

    test_loss /= len(t_test)

    # Calculate metrics
    precision = precision_score(all_labels, all_preds, average='weighted')
    recall = recall_score(all_labels, all_preds, average='weighted')
    f1 = f1_score(all_labels, all_preds, average='weighted')

    print('Test set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)'.format(
        test_loss, correct, len(t_test), 100. * correct / len(t_test)))
    print('Precision: {:.4f}, Recall: {:.4f}, F1 Score: {:.4f}\n'.format(precision, recall, f1))

    return 100. * correct / len(t_test)

In [None]:
model = ImprovedNet().to(device)
optimizer = optim.Adam(model.parameters(), lr=0.0001)
fisher_dict = {}
optpar_dict = {}
ewc_lambda = 10**8

In [None]:
# task 1
task_1 = [(x_train, t_train), (x_test, t_test)]

# task 2
x_train2, x_test2 = permute_cifar10([x_train, x_test], 101)
task_2 = [(x_train2, t_train), (x_test2, t_test)]

# task list
tasks = [task_1, task_2]

starting permutation...
done.


In [None]:
import warnings
warnings.filterwarnings('ignore')

ewc_accs = []
for id, task in enumerate(tasks):
  avg_acc = 0
  print("Training on task: ", id)

  (x_train, t_train), _ = task

  for epoch in tqdm(range(1, 16)):
    train_ewc(model, device, id, x_train, t_train, optimizer, epoch)
  on_task_update(id, x_train, t_train)

  for id_test, task in enumerate(tasks):
    print("Testing on task: ", id_test)
    _, (x_test, t_test) = task
    acc = test(model, device, x_test, t_test)
    avg_acc = avg_acc + acc

  print("Avg acc: ", avg_acc / 3)
  ewc_accs.append(avg_acc / 3)

Training on task:  0


  0%|          | 0/15 [00:00<?, ?it/s]

Train Epoch: 1	Loss: 1.743669	Precision: 0.0667	Recall: 0.2582	F1 Score: 0.1060
Train Epoch: 2	Loss: 1.743669	Precision: 0.0667	Recall: 0.2582	F1 Score: 0.1060
Train Epoch: 3	Loss: 1.743669	Precision: 0.0667	Recall: 0.2582	F1 Score: 0.1060
Train Epoch: 4	Loss: 1.743669	Precision: 0.0667	Recall: 0.2582	F1 Score: 0.1060
Train Epoch: 5	Loss: 1.743669	Precision: 0.0667	Recall: 0.2582	F1 Score: 0.1060
Train Epoch: 6	Loss: 1.743669	Precision: 0.0667	Recall: 0.2582	F1 Score: 0.1060
Train Epoch: 7	Loss: 1.743669	Precision: 0.0667	Recall: 0.2582	F1 Score: 0.1060
Train Epoch: 8	Loss: 1.743669	Precision: 0.0667	Recall: 0.2582	F1 Score: 0.1060
Train Epoch: 9	Loss: 1.743669	Precision: 0.0667	Recall: 0.2582	F1 Score: 0.1060
Train Epoch: 10	Loss: 1.743669	Precision: 0.0667	Recall: 0.2582	F1 Score: 0.1060
Train Epoch: 11	Loss: 1.743669	Precision: 0.0667	Recall: 0.2582	F1 Score: 0.1060
Train Epoch: 12	Loss: 1.743669	Precision: 0.0667	Recall: 0.2582	F1 Score: 0.1060
Train Epoch: 13	Loss: 1.743669	Precis

  0%|          | 0/15 [00:00<?, ?it/s]

Train Epoch: 1	Loss: 1.743669	Precision: 0.0667	Recall: 0.2582	F1 Score: 0.1060
Train Epoch: 2	Loss: 1.743669	Precision: 0.0667	Recall: 0.2582	F1 Score: 0.1060
Train Epoch: 3	Loss: 1.743669	Precision: 0.0667	Recall: 0.2582	F1 Score: 0.1060
Train Epoch: 4	Loss: 1.743669	Precision: 0.0667	Recall: 0.2582	F1 Score: 0.1060
Train Epoch: 5	Loss: 1.743669	Precision: 0.0667	Recall: 0.2582	F1 Score: 0.1060
Train Epoch: 6	Loss: 1.743669	Precision: 0.0667	Recall: 0.2582	F1 Score: 0.1060
Train Epoch: 7	Loss: 1.743669	Precision: 0.0667	Recall: 0.2582	F1 Score: 0.1060
Train Epoch: 8	Loss: 1.743669	Precision: 0.0667	Recall: 0.2582	F1 Score: 0.1060
Train Epoch: 9	Loss: 1.743669	Precision: 0.0667	Recall: 0.2582	F1 Score: 0.1060
Train Epoch: 10	Loss: 1.743669	Precision: 0.0667	Recall: 0.2582	F1 Score: 0.1060
Train Epoch: 11	Loss: 1.743669	Precision: 0.0667	Recall: 0.2582	F1 Score: 0.1060
Train Epoch: 12	Loss: 1.743669	Precision: 0.0667	Recall: 0.2582	F1 Score: 0.1060
Train Epoch: 13	Loss: 1.743669	Precis

# One Architecture Task Division

In [None]:
import torch
torch.cuda.is_available()
import torch
import torch.nn as nn
import torchvision.datasets as datasets
import torchvision.transforms as transforms
import torch.optim as optim
import torch.nn.functional as F
import numpy as np
from tqdm.auto import tqdm
import matplotlib.pyplot as plt
import keras
import os
batch_size=16

In [None]:
transform = transforms.Compose([transforms.Resize((32,32)),
                                transforms.ToTensor()])

train_task1_data_dir = '/content/drive/MyDrive/IIIT/Various_datasets/Satelite Image Classification/Task Division/Train/Task-1'
test_task1_data_dir = '/content/drive/MyDrive/IIIT/Various_datasets/Satelite Image Classification/Task Division/Test/Task-1'
train_data_task1 = datasets.ImageFolder(os.path.join(train_task1_data_dir), transform)
val_data_task1 = datasets.ImageFolder(os.path.join(test_task1_data_dir), transform)

trainloader_task1 = torch.utils.data.DataLoader(train_data_task1, batch_size=batch_size)
validloader_task1 = torch.utils.data.DataLoader(val_data_task1, batch_size=batch_size)

train_data_size_task1 = len(train_data_task1)
val_data_size_task1 = len(val_data_task1)
print(train_data_size_task1)
print(val_data_size_task1)

class_names_task1 = train_data_task1.classes
print(class_names_task1)

936
235
['cloudy', 'desert']


In [None]:
transform = transforms.Compose([transforms.Resize((32,32)),
                                transforms.ToTensor()])

train_task2_data_dir = '/content/drive/MyDrive/IIIT/Various_datasets/Satelite Image Classification/Task Division/Train/Task-2'
test_task2_data_dir = '/content/drive/MyDrive/IIIT/Various_datasets/Satelite Image Classification/Task Division/Test/Task-2'
train_data_task2 = datasets.ImageFolder(os.path.join(train_task2_data_dir), transform)
val_data_task2 = datasets.ImageFolder(os.path.join(test_task2_data_dir), transform)


trainloader_task2 = torch.utils.data.DataLoader(train_data_task2, batch_size=batch_size)
validloader_task2 = torch.utils.data.DataLoader(val_data_task2, batch_size=batch_size)

train_data_size_task2 = len(train_data_task2)
val_data_size_task2 = len(val_data_task2)
print(train_data_size_task2)
print(val_data_size_task2)

class_names_task2 = train_data_task2.classes
print(class_names_task2)

915
229
['green_area', 'water']


In [None]:
# Iterate through the DataLoader to get all images and labels
train_images_task1 = []
train_labels_task1 = []

for batch_images, batch_labels in tqdm(trainloader_task1):
    train_images_task1.append(batch_images.numpy())
    train_labels_task1.append(batch_labels.numpy())

# Concatenate the lists into NumPy arrays
train_images_task1 = np.concatenate(train_images_task1)
train_labels_task1 = np.concatenate(train_labels_task1)

print("Shape of the image array:", train_images_task1.shape)
print("Shape of the label array:", train_labels_task1.shape)

# Iterate through the DataLoader to get all images and labels
train_images_task2 = []
train_labels_task2 = []

for batch_images, batch_labels in tqdm(trainloader_task2):
    train_images_task2.append(batch_images.numpy())
    train_labels_task2.append(batch_labels.numpy())

# Concatenate the lists into NumPy arrays
train_images_task2 = np.concatenate(train_images_task2)
train_labels_task2 = np.concatenate(train_labels_task2)

print("Shape of the image array:", train_images_task2.shape)
print("Shape of the label array:", train_labels_task2.shape)

  0%|          | 0/59 [00:00<?, ?it/s]

Shape of the image array: (936, 3, 32, 32)
Shape of the label array: (936,)


  0%|          | 0/58 [00:00<?, ?it/s]

Shape of the image array: (915, 3, 32, 32)
Shape of the label array: (915,)


In [None]:
# Iterate through the DataLoader to get all images and labels
test_images_task1 = []
test_labels_task1 = []

for batch_images, batch_labels in tqdm(validloader_task1):
    test_images_task1.append(batch_images.numpy())
    test_labels_task1.append(batch_labels.numpy())

# Concatenate the lists into NumPy arrays
test_images_task1 = np.concatenate(test_images_task1)
test_labels_task1 = np.concatenate(test_labels_task1)

print("Shape of the image array:", test_images_task1.shape)
print("Shape of the label array:", test_labels_task1.shape)

# Iterate through the DataLoader to get all images and labels
test_images_task2 = []
test_labels_task2 = []

for batch_images, batch_labels in tqdm(validloader_task2):
    test_images_task2.append(batch_images.numpy())
    test_labels_task2.append(batch_labels.numpy())

# Concatenate the lists into NumPy arrays
test_images_task2 = np.concatenate(test_images_task2)
test_labels_task2 = np.concatenate(test_labels_task2)

print("Shape of the image array:", test_images_task2.shape)
print("Shape of the label array:", test_labels_task2.shape)

  0%|          | 0/15 [00:00<?, ?it/s]

Shape of the image array: (235, 3, 32, 32)
Shape of the label array: (235,)


  0%|          | 0/15 [00:00<?, ?it/s]

Shape of the image array: (229, 3, 32, 32)
Shape of the label array: (229,)


In [None]:
x_train_task1=train_images_task1
t_train_task1=train_labels_task1
x_test_task1=test_images_task1
t_test_task1=test_labels_task1

x_train_task1 = torch.tensor(x_train_task1, dtype=torch.float32)
t_train_task1 = torch.tensor(t_train_task1, dtype=torch.float32)
x_test_task1 = torch.tensor(x_test_task1, dtype=torch.float32)
t_test_task1 = torch.tensor(t_test_task1, dtype=torch.float32)

x_train_task1 = np.asarray(x_train_task1)
t_train_task1 = np.asarray(t_train_task1)
x_test_task1 = np.asarray(x_test_task1)
t_test_task1 = np.asarray(t_test_task1)

x_train_task2=train_images_task2
t_train_task2=train_labels_task2
x_test_task2=test_images_task2
t_test_task2=test_labels_task2

x_train_task2 = torch.tensor(x_train_task2, dtype=torch.float32)
t_train_task2 = torch.tensor(t_train_task2, dtype=torch.float32)
x_test_task2 = torch.tensor(x_test_task2, dtype=torch.float32)
t_test_task2 = torch.tensor(t_test_task2, dtype=torch.float32)

x_train_task2 = np.asarray(x_train_task2)
t_train_task2 = np.asarray(t_train_task2)
x_test_task2 = np.asarray(x_test_task2)
t_test_task2 = np.asarray(t_test_task2)

In [None]:
y_task11 = t_train_task1.astype("int")
temp11 = np.zeros((y_task11.size, y_task11.max() + 1))
temp11[np.arange(y_task11.size), y_task11] = 1
t_train_task1 = temp11

y_task12 = t_test_task1.astype("int")
temp12 = np.zeros((y_task12.size, y_task12.max() + 1))
temp12[np.arange(y_task12.size), y_task12] = 1
t_test_task1 = temp12

y_task21 = t_train_task2.astype("int")
temp21 = np.zeros((y_task21.size, y_task21.max() + 1))
temp21[np.arange(y_task21.size), y_task21] = 1
t_train_task2 = temp21

y_task22 = t_test_task2.astype("int")
temp22 = np.zeros((y_task22.size, y_task22.max() + 1))
temp22[np.arange(y_task22.size), y_task22] = 1
t_test_task2 = temp22

In [None]:
print("x_train task-1 dim and type: ", x_train_task1.shape, x_train_task1.dtype)
print("t_train task-1 dim and type: ", t_train_task1.shape, t_train_task1.dtype)
print("x_test task-1 dim and type: ", x_test_task1.shape, x_test_task1.dtype)
print("t_test task-1 dim and type: ", t_test_task1.shape, t_test_task1.dtype)

print("x_train task-2 dim and type: ", x_train_task2.shape, x_train_task2.dtype)
print("t_train task-2 dim and type: ", t_train_task2.shape, t_train_task2.dtype)
print("x_test task-2 dim and type: ", x_test_task2.shape, x_test_task2.dtype)
print("t_test task-2 dim and type: ", t_test_task2.shape, t_test_task2.dtype)

x_train task-1 dim and type:  (936, 3, 32, 32) float32
t_train task-1 dim and type:  (936, 2) float64
x_test task-1 dim and type:  (235, 3, 32, 32) float32
t_test task-1 dim and type:  (235, 2) float64
x_train task-2 dim and type:  (915, 3, 32, 32) float32
t_train task-2 dim and type:  (915, 2) float64
x_test task-2 dim and type:  (229, 3, 32, 32) float32
t_test task-2 dim and type:  (229, 2) float64


In [None]:
# switch to False to use CPU
use_cuda = True
use_cuda = use_cuda and torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu");
torch.manual_seed(123123);
m = nn.LogSoftmax(dim=1)
Loss = nn.MSELoss()

In [None]:
from sklearn.metrics import precision_score, recall_score, f1_score

import torch.nn as nn
import torch.nn.functional as F

class ImprovedNet(nn.Module):
    def __init__(self):
        super(ImprovedNet, self).__init__()

        # Convolutional layers
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=0)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=0)
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, padding=0)

        # Fully connected layers
        self.fc1 = nn.Linear(self.calculate_flattening_size(), 2048)
        self.fc2 = nn.Linear(2048, 256)
        self.fc3 = nn.Linear(256, 2)

        # Activation
        self.act = nn.Softmax()

    def calculate_flattening_size(self):
        # Dummy input to get the size after passing through convolutional layers
        x = torch.randn(4, 3, 32, 32)
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        x = F.relu(self.conv3(x))
        return x.size(1) * x.size(2) * x.size(3)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        x = F.relu(self.conv3(x))

        # Dynamically compute the flattening size based on the shape of the output tensor
        x_size = x.size(1) * x.size(2) * x.size(3)
        x = x.view(-1, x_size)

        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.act(self.fc3(x))
        return x


def permute_cifar10(cifar10, seed):
    """ Given the training set, permute pixels of each img the same way. """
    np.random.seed(seed)
    print("starting permutation...")
    h = w = 16
    perm_inds = list(range(h * w))
    np.random.shuffle(perm_inds)

    perm_cifar10 = []
    for data_set in cifar10:
        num_img = data_set.shape[0]
        num_channels = data_set.shape[1]

        # Reshape and permute pixels for each sample in the dataset
        reshaped_set = data_set.reshape((num_img, num_channels, -1))
        permuted_set = np.array([sample[:, perm_inds].reshape((num_channels, h, w)) for sample in reshaped_set])
        perm_cifar10.append(permuted_set)

    print("done.")
    return perm_cifar10

def on_task_update(task_id, x_mem, t_mem):

  model.train()
  optimizer.zero_grad()

  # accumulating gradients
  for start in range(0, len(t_mem)-1, batch_size):
      end = start + batch_size
      end = start + batch_size
      x, y = torch.from_numpy(x_train[start:end]), torch.from_numpy(t_train[start:end])
      x, y = x.to(device), y.to(device)
      optimizer.zero_grad()
      output = model(x)
      output = output.float()
      y = y.float()
      loss = Loss(output, y)
      loss.backward()

  fisher_dict[task_id] = {}
  optpar_dict[task_id] = {}

  # gradients accumulated can be used to calculate fisher
  for name, param in model.named_parameters():

    optpar_dict[task_id][name] = param.data.clone()
    fisher_dict[task_id][name] = param.grad.data.clone().pow(2)

def train_ewc(model, device, task_id, x_train, t_train, optimizer, epoch):
    model.train()

    for start in range(0, len(t_train)-1, batch_size):
        end = start + batch_size
        x, y = torch.from_numpy(x_train[start:end]), torch.from_numpy(t_train[start:end])
        x, y = x.to(device), y.to(device)
        optimizer.zero_grad()
        output = model(x)
        output = output.float()
        y = y.float()
        loss = Loss(output, y)

        ### magic here! :-)
        for task in range(task_id):
            for name, param in model.named_parameters():
                fisher = fisher_dict[task][name]
                optpar = optpar_dict[task][name]
                loss += (fisher * (optpar - param).pow(2)).sum() * ewc_lambda

        loss.backward()
        optimizer.step()

    # After each epoch, evaluate and print metrics
    train_metrics = evaluate_metrics(model, device, x_train, t_train)
    print(f'Train Epoch: {epoch}\tLoss: {loss.item():.6f}\tPrecision: {train_metrics["precision"]:.4f}\tRecall: {train_metrics["recall"]:.4f}\tF1 Score: {train_metrics["f1"]:.4f}')

def evaluate_metrics(model, device, x_data, t_data):
    model.eval()
    all_preds = []
    all_labels = []

    for start in range(0, len(t_data)-1, batch_size):
        end = start + batch_size
        with torch.no_grad():
            x, y = torch.from_numpy(x_data[start:end]), torch.from_numpy(t_data[start:end])
            x, y = x.to(device), y.to(device)
            output = model(x)
            output = output.float()
            y = y.float()

            # Convert predictions to numpy arrays
            preds = output.argmax(dim=1).cpu().numpy()
            labels = y.argmax(dim=1).cpu().numpy()

            all_preds.extend(preds)
            all_labels.extend(labels)

    # Calculate metrics
    precision = precision_score(all_labels, all_preds, average='weighted')
    recall = recall_score(all_labels, all_preds, average='weighted')
    f1 = f1_score(all_labels, all_preds, average='weighted')

    return {'precision': precision, 'recall': recall, 'f1': f1}

def train(model, device, x_train, t_train, optimizer, epoch):
    model.train()

    for start in range(0, len(t_train)-1, batch_size):
      end = start + batch_size
      x, y = torch.from_numpy(x_train[start:end]), torch.from_numpy(t_train[start:end])
      y = y.type(torch.LongTensor)
      x, y = x.to(device), y.to(device)
      optimizer.zero_grad()
      output = model(x)
      y = nn.functional.one_hot(y)
      y = y.squeeze()
      output = output.float()
      y = y.float()
      loss = Loss(output, y)
      loss.backward()
      optimizer.step()
      #print(loss.item())
    print('Train Epoch: {} \tLoss: {:.6f}'.format(epoch, loss.item()))

def test(model, device, x_test, t_test):
    model.eval()
    test_loss = 0
    correct = 0
    all_preds = []
    all_labels = []
    batch_size = 1

    for start in range(0, len(t_test)-1, batch_size):
        end = start + batch_size
        with torch.no_grad():
            x, y = torch.from_numpy(x_test[start:end]), torch.from_numpy(t_test[start:end])
            x, y = x.to(device), y.to(device)
            output = model(x)
            output = output.float()
            y = y.float()
            test_loss += F.cross_entropy(output, y).item()  # sum up batch loss
            pred = output.max(1)[1]  # get the index of the max logit
            correct += pred.eq(y.max(1)[1]).sum().item()

            # Convert predictions to numpy arrays
            preds = pred.cpu().numpy()
            labels = y.argmax(dim=1).cpu().numpy()

            all_preds.extend(preds)
            all_labels.extend(labels)

    test_loss /= len(t_test)

    # Calculate metrics
    precision = precision_score(all_labels, all_preds, average='weighted')
    recall = recall_score(all_labels, all_preds, average='weighted')
    f1 = f1_score(all_labels, all_preds, average='weighted')

    print('Test set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)'.format(
        test_loss, correct, len(t_test), 100. * correct / len(t_test)))
    print('Precision: {:.4f}, Recall: {:.4f}, F1 Score: {:.4f}\n'.format(precision, recall, f1))

    return 100. * correct / len(t_test)

In [None]:
model = ImprovedNet().to(device)
optimizer = optim.Adam(model.parameters(), lr=10**-5)
fisher_dict = {}
optpar_dict = {}
ewc_lambda = 10**5
batch_size=4

In [None]:
# task 1
task_1 = [(x_train_task1, t_train_task1), (x_test_task1, t_test_task1)]
task_2 = [(x_train_task2, t_train_task2), (x_test_task2, t_test_task2)]

# task list
tasks = [task_1, task_2]

In [None]:
import warnings
warnings.filterwarnings('ignore')
from tqdm.auto import tqdm
ewc_accs = []
for id, task in enumerate(tasks):
  avg_acc = 0
  print("Training on task: ", id)

  (x_train, t_train), _ = task

  for epoch in tqdm(range(1, 31)):
    train_ewc(model, device, id, x_train, t_train, optimizer, epoch)
  on_task_update(id, x_train, t_train)

  for id_test, task in enumerate(tasks):
    print("Testing on task: ", id_test)
    _, (x_test, t_test) = task
    acc = test(model, device, x_test, t_test)
    avg_acc = avg_acc + acc

  print("Avg acc: ", avg_acc / 3)
  ewc_accs.append(avg_acc / 3)

Training on task:  0


  0%|          | 0/30 [00:00<?, ?it/s]

Train Epoch: 1	Loss: 0.000766	Precision: 0.2394	Recall: 0.4893	F1 Score: 0.3215
Train Epoch: 2	Loss: 0.001017	Precision: 0.2394	Recall: 0.4893	F1 Score: 0.3215
Train Epoch: 3	Loss: 0.000512	Precision: 0.7522	Recall: 0.4979	F1 Score: 0.3402
Train Epoch: 4	Loss: 0.000508	Precision: 0.7682	Recall: 0.5801	F1 Score: 0.4976
Train Epoch: 5	Loss: 0.000481	Precision: 0.7899	Recall: 0.7190	F1 Score: 0.7029
Train Epoch: 6	Loss: 0.000339	Precision: 0.8325	Recall: 0.8098	F1 Score: 0.8072
Train Epoch: 7	Loss: 0.000157	Precision: 0.8445	Recall: 0.8226	F1 Score: 0.8204
Train Epoch: 8	Loss: 0.000113	Precision: 0.8514	Recall: 0.8344	F1 Score: 0.8328
Train Epoch: 9	Loss: 0.000082	Precision: 0.8713	Recall: 0.8611	F1 Score: 0.8604
Train Epoch: 10	Loss: 0.000067	Precision: 0.8801	Recall: 0.8729	F1 Score: 0.8725
Train Epoch: 11	Loss: 0.000055	Precision: 0.8844	Recall: 0.8782	F1 Score: 0.8779
Train Epoch: 12	Loss: 0.000049	Precision: 0.8906	Recall: 0.8857	F1 Score: 0.8855
Train Epoch: 13	Loss: 0.000038	Precis

  0%|          | 0/30 [00:00<?, ?it/s]

Train Epoch: 1	Loss: 0.000004	Precision: 0.2527	Recall: 0.5027	F1 Score: 0.3364
Train Epoch: 2	Loss: 0.000820	Precision: 0.2527	Recall: 0.5027	F1 Score: 0.3364
Train Epoch: 3	Loss: 0.001714	Precision: 0.2527	Recall: 0.5027	F1 Score: 0.3364
Train Epoch: 4	Loss: 0.012331	Precision: 0.2527	Recall: 0.5027	F1 Score: 0.3364
Train Epoch: 5	Loss: 0.003829	Precision: 0.2527	Recall: 0.5027	F1 Score: 0.3364
Train Epoch: 6	Loss: 0.012814	Precision: 0.2527	Recall: 0.5027	F1 Score: 0.3364
Train Epoch: 7	Loss: 0.006108	Precision: 0.2527	Recall: 0.5027	F1 Score: 0.3364
Train Epoch: 8	Loss: 0.009159	Precision: 0.2527	Recall: 0.5027	F1 Score: 0.3364
Train Epoch: 9	Loss: 0.009243	Precision: 0.2527	Recall: 0.5027	F1 Score: 0.3364
Train Epoch: 10	Loss: 0.006405	Precision: 0.2527	Recall: 0.5027	F1 Score: 0.3364
Train Epoch: 11	Loss: 0.006088	Precision: 0.2527	Recall: 0.5027	F1 Score: 0.3364
Train Epoch: 12	Loss: 0.004600	Precision: 0.2527	Recall: 0.5027	F1 Score: 0.3364
Train Epoch: 13	Loss: 0.003356	Precis