# Permute-MNIST [Simple Model] PyTorch


In [None]:
import torch
torch.cuda.is_available()
import torch
import torch.nn as nn
import torchvision.datasets as datasets
import torchvision.transforms as transforms
import torch.optim as optim
import torch.nn.functional as F
import numpy as np
import matplotlib.pyplot as plt

In [None]:
!git clone https://github.com/ContinualAI/colab.git continualai/colab
from continualai.colab.scripts import mnist
mnist.init()

Cloning into 'continualai/colab'...
remote: Enumerating objects: 378, done.[K
remote: Counting objects: 100% (120/120), done.[K
remote: Compressing objects: 100% (58/58), done.[K
remote: Total 378 (delta 78), reused 64 (delta 62), pack-reused 258[K
Receiving objects: 100% (378/378), 26.97 MiB | 7.26 MiB/s, done.
Resolving deltas: 100% (198/198), done.
Downloading train-images-idx3-ubyte.gz...
Downloading t10k-images-idx3-ubyte.gz...
Downloading train-labels-idx1-ubyte.gz...
Downloading t10k-labels-idx1-ubyte.gz...
Download complete.
Save complete.
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz to data/mnist/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:01<00:00, 5016005.25it/s]


Extracting data/mnist/MNIST/raw/train-images-idx3-ubyte.gz to data/mnist/MNIST/raw

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz to data/mnist/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 131985.28it/s]


Extracting data/mnist/MNIST/raw/train-labels-idx1-ubyte.gz to data/mnist/MNIST/raw

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz to data/mnist/MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1648877/1648877 [00:01<00:00, 1203774.89it/s]


Extracting data/mnist/MNIST/raw/t10k-images-idx3-ubyte.gz to data/mnist/MNIST/raw

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz to data/mnist/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<00:00, 4222191.66it/s]

Extracting data/mnist/MNIST/raw/t10k-labels-idx1-ubyte.gz to data/mnist/MNIST/raw






In [None]:
x_train, t_train, x_test, t_test = mnist.load()

print("x_train dim and type: ", x_train.shape, x_train.dtype)
print("t_train dim and type: ", t_train.shape, t_train.dtype)
print("x_test dim and type: ", x_test.shape, x_test.dtype)
print("t_test dim and type: ", t_test.shape, t_test.dtype)
batch_size=128

x_train dim and type:  (60000, 1, 28, 28) float32
t_train dim and type:  (60000,) uint8
x_test dim and type:  (10000, 1, 28, 28) float32
t_test dim and type:  (10000,) uint8


In [None]:
# switch to False to use CPU
use_cuda = True

use_cuda = use_cuda and torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu");
torch.manual_seed(1);

In [None]:
import torch.nn.functional as F

class SimpleNet(nn.Module):
    def __init__(self):
        super(SimpleNet, self).__init__()
        self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
        self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
        self.conv2_drop = nn.Dropout2d()
        self.fc1 = nn.Linear(320, 50)
        self.fc2 = nn.Linear(50, 10)

    def forward(self, x):
        x = F.relu(F.max_pool2d(self.conv1(x), 2))
        x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
        x = x.view(-1, 320)
        x = F.relu(self.fc1(x))
        x = F.dropout(x, training=self.training)
        x = self.fc2(x)
        # print(x.shape)
        return x

def permute_mnist(mnist, seed):
    """ Given the training set, permute pixels of each img the same way. """

    np.random.seed(seed)
    print("starting permutation...")
    h = w = 28
    perm_inds = list(range(h*w))
    np.random.shuffle(perm_inds)
    # print(perm_inds)
    perm_mnist = []
    for set in mnist:
        num_img = set.shape[0]
        flat_set = set.reshape(num_img, w * h)
        perm_mnist.append(flat_set[:, perm_inds].reshape(num_img, 1, w, h))
    print("done.")
    return perm_mnist

def on_task_update(task_id, x_mem, t_mem):

  model.train()
  fisher_dict[task_id] = {}
  optpar_dict[task_id] = {}
  count=0
  for name, param in model.named_parameters():
    fisher_dict[task_id][name] = param.data*0.0
  for start in range(0, len(t_mem)-1, batch_size):
      optimizer.zero_grad()
      count+=1
      # print(len(t_mem),len(x_mem))
      end = start + batch_size
      x, y = torch.from_numpy(x_mem[start:end]), torch.from_numpy(t_mem[start:end]).long()
      x, y = x.to(device), y.to(device)
      output = model(x)
      loss = F.cross_entropy(output, y)
      loss.backward()
      # gradients accumulated can be used to calculate fisher
      for name, param in model.named_parameters():
        fisher_dict[task_id][name] += param.grad.data.clone().pow(2)
  for name, param in model.named_parameters():
        optpar_dict[task_id][name] = param.data.clone()
        fisher_dict[task_id][name] = fisher_dict[task_id][name]/count

def train_ewc(model, device, task_id, x_train, t_train, optimizer, epoch):
    model.train()

    for start in range(0, len(t_train)-1, batch_size):
      end = start + batch_size
      x, y = torch.from_numpy(x_train[start:end]), torch.from_numpy(t_train[start:end]).long()
      x, y = x.to(device), y.to(device)

      optimizer.zero_grad()

      output = model(x)
      loss = F.cross_entropy(output, y)

      ### magic here! :-)
      for task in range(task_id):
        for name, param in model.named_parameters():
          fisher = fisher_dict[task][name]
          optpar = optpar_dict[task][name]
          loss += (fisher * (optpar - param).pow(2)).sum() * ewc_lambda

      loss.backward()
      optimizer.step()
      #print(loss.item())
    print('Train Epoch: {} \tLoss: {:.6f}'.format(epoch, loss.item()))

def train(model, device, x_train, t_train, optimizer, epoch):
    model.train()

    for start in range(0, len(t_train)-1, batch_size):
      end = start + batch_size
      x, y = torch.from_numpy(x_train[start:end]), torch.from_numpy(t_train[start:end]).long()
      x, y = x.to(device), y.to(device)

      optimizer.zero_grad()

      output = model(x)
      loss = F.cross_entropy(output, y)
      loss.backward()
      optimizer.step()
      #print(loss.item())
    print('Train Epoch: {} \tLoss: {:.6f}'.format(epoch, loss.item()))

def test(model, device, x_test, t_test):
    model.eval()
    test_loss = 0
    correct = 0
    for start in range(0, len(t_test)-1, batch_size):
      end = start + batch_size
      with torch.no_grad():
        x, y = torch.from_numpy(x_test[start:end]), torch.from_numpy(t_test[start:end]).long()
        x, y = x.to(device), y.to(device)
        output = model(x)
        test_loss += F.cross_entropy(output, y).item() # sum up batch loss
        pred = output.max(1, keepdim=True)[1] # get the index of the max logit
        correct += pred.eq(y.view_as(pred)).sum().item()

    test_loss /= len(t_test)
    print('Test set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(t_test),
        100. * correct / len(t_test)))
    return 100. * correct / len(t_test)

In [None]:
# ACCURACY PRECISION RECALL F1-SCORE

from sklearn.metrics import precision_score, recall_score, f1_score
class SimpleNet(nn.Module):
    def __init__(self):
        super(SimpleNet, self).__init__()
        self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
        self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
        self.conv2_drop = nn.Dropout2d()
        self.fc1 = nn.Linear(320, 50)
        self.fc2 = nn.Linear(50, 10)

    def forward(self, x):
        x = F.relu(F.max_pool2d(self.conv1(x), 2))
        x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
        x = x.view(-1, 320)
        x = F.relu(self.fc1(x))
        x = F.dropout(x, training=self.training)
        x = self.fc2(x)
        # print(x.shape)
        return x

def permute_mnist(mnist, seed):
    """ Given the training set, permute pixels of each img the same way. """

    np.random.seed(seed)
    print("starting permutation...")
    h = w = 28
    perm_inds = list(range(h*w))
    np.random.shuffle(perm_inds)
    # print(perm_inds)
    perm_mnist = []
    for set in mnist:
        num_img = set.shape[0]
        flat_set = set.reshape(num_img, w * h)
        perm_mnist.append(flat_set[:, perm_inds].reshape(num_img, 1, w, h))
    print("done.")
    return perm_mnist


def on_task_update(task_id, x_mem, t_mem):

  model.train()
  fisher_dict[task_id] = {}
  optpar_dict[task_id] = {}
  count=0
  for name, param in model.named_parameters():
    fisher_dict[task_id][name] = param.data*0.0
  for start in range(0, len(t_mem)-1, batch_size):
      optimizer.zero_grad()
      count+=1
      # print(len(t_mem),len(x_mem))
      end = start + batch_size
      x, y = torch.from_numpy(x_mem[start:end]), torch.from_numpy(t_mem[start:end]).long()
      x, y = x.to(device), y.to(device)
      output = model(x)
      loss = F.cross_entropy(output, y)
      loss.backward()
      # gradients accumulated can be used to calculate fisher
      for name, param in model.named_parameters():
        fisher_dict[task_id][name] += param.grad.data.clone().pow(2)
  for name, param in model.named_parameters():
        optpar_dict[task_id][name] = param.data.clone()
        fisher_dict[task_id][name] = fisher_dict[task_id][name]/count

def train_ewc(model, device, task_id, x_train, t_train, optimizer, epoch):
    model.train()

    for start in range(0, len(t_train)-1, batch_size):
        end = start + batch_size
        x, y = torch.from_numpy(x_train[start:end]), torch.from_numpy(t_train[start:end]).long()
        x, y = x.to(device), y.to(device)

        optimizer.zero_grad()

        output = model(x)
        loss = F.cross_entropy(output, y)

        ### magic here! :-)
        for task in range(task_id):
            for name, param in model.named_parameters():
                fisher = fisher_dict[task][name]
                optpar = optpar_dict[task][name]
                loss += (fisher * (optpar - param).pow(2)).sum() * ewc_lambda

        loss.backward()
        optimizer.step()

    # After each epoch, evaluate and print metrics
    train_metrics = evaluate_metrics(model, device, x_train, t_train)
    # print(f'Train Epoch: {epoch}\tLoss: {loss.item():.6f}\tPrecision: {train_metrics["precision"]:.4f}\tRecall: {train_metrics["recall"]:.4f}\tF1 Score: {train_metrics["f1"]:.4f}')

def evaluate_metrics(model, device, x_data, t_data):
    model.eval()
    all_preds = []
    all_labels = []

    for start in range(0, len(t_data)-1, batch_size):
        end = start + batch_size
        with torch.no_grad():
            x, y = torch.from_numpy(x_data[start:end]), torch.from_numpy(t_data[start:end]).long()
            x, y = x.to(device), y.to(device)
            output = model(x)

            # Convert predictions to numpy arrays
            preds = output.argmax(dim=1).cpu().numpy()
            labels = y.cpu().numpy()

            all_preds.extend(preds)
            all_labels.extend(labels)

    # Calculate metrics
    precision = precision_score(all_labels, all_preds, average='weighted')
    recall = recall_score(all_labels, all_preds, average='weighted')
    f1 = f1_score(all_labels, all_preds, average='weighted')

    return {'precision': precision, 'recall': recall, 'f1': f1}

def train(model, device, x_train, t_train, optimizer, epoch):
    model.train()

    for start in range(0, len(t_train)-1, batch_size):
      end = start + batch_size
      x, y = torch.from_numpy(x_train[start:end]), torch.from_numpy(t_train[start:end]).long()
      x, y = x.to(device), y.to(device)

      optimizer.zero_grad()

      output = model(x)
      loss = F.cross_entropy(output, y)
      loss.backward()
      optimizer.step()
      #print(loss.item())
    # print('Train Epoch: {} \tLoss: {:.6f}'.format(epoch, loss.item()))

def test(model, device, x_test, t_test):
    model.eval()
    test_loss = 0
    correct = 0
    all_preds = []
    all_labels = []

    for start in range(0, len(t_test)-1, batch_size):
        end = start + batch_size
        with torch.no_grad():
            x, y = torch.from_numpy(x_test[start:end]), torch.from_numpy(t_test[start:end]).long()
            x, y = x.to(device), y.to(device)
            output = model(x)
            test_loss += F.cross_entropy(output, y).item()  # sum up batch loss
            pred = output.max(1, keepdim=True)[1]  # get the index of the max logit
            correct += pred.eq(y.view_as(pred)).sum().item()

            # Convert predictions to numpy arrays
            preds = pred.cpu().numpy()
            labels = y.cpu().numpy()

            all_preds.extend(preds)
            all_labels.extend(labels)

    test_loss /= len(t_test)

    # Calculate metrics
    precision = precision_score(all_labels, all_preds, average='weighted')
    recall = recall_score(all_labels, all_preds, average='weighted')
    f1 = f1_score(all_labels, all_preds, average='weighted')

    print('Test set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)'.format(
        test_loss, correct, len(t_test), 100. * correct / len(t_test)))
    print('Precision: {:.4f}, Recall: {:.4f}, F1 Score: {:.4f}\n'.format(precision, recall, f1))

    return 100. * correct / len(t_test)

In [None]:
# task 1
task_1 = [(x_train, t_train), (x_test, t_test)]

# task 2
x_train2, x_test2 = permute_mnist([x_train, x_test], 1)
task_2 = [(x_train2, t_train), (x_test2, t_test)]

# task 3
x_train3, x_test3 = permute_mnist([x_train, x_test], 2)
task_3 = [(x_train3, t_train), (x_test3, t_test)]

# task list
tasks = [task_1, task_2, task_3]

starting permutation...
done.
starting permutation...
done.


In [None]:
model = SimpleNet().to(device)
optimizer = optim.Adam(model.parameters(), lr=0.0001)
fisher_dict = {}
optpar_dict = {}
ewc_lambda = 5000

In [None]:
import warnings
warnings.filterwarnings('ignore')
ewc_accs = []
for id, task in enumerate(tasks):
  avg_acc = 0
  print("Training on task: ", id)

  (x_train, t_train), _ = task

  for epoch in range(1, 11):
    train_ewc(model, device, id, x_train, t_train, optimizer, epoch)
  on_task_update(id, x_train, t_train)

  for id_test, task in enumerate(tasks):
    print("Testing on task: ", id_test)
    _, (x_test, t_test) = task
    acc = test(model, device, x_test, t_test)
    avg_acc = avg_acc + acc

  print("Avg acc: ", avg_acc / 3)
  ewc_accs.append(avg_acc / 3)

Training on task:  0
Testing on task:  0
Test set: Average loss: 0.0012, Accuracy: 9531/10000 (95%)
Precision: 0.9532, Recall: 0.9531, F1 Score: 0.9530

Testing on task:  1
Test set: Average loss: 0.0226, Accuracy: 691/10000 (7%)
Precision: 0.0356, Recall: 0.0691, F1 Score: 0.0428

Testing on task:  2
Test set: Average loss: 0.0198, Accuracy: 1200/10000 (12%)
Precision: 0.0964, Recall: 0.1200, F1 Score: 0.0782

Avg acc:  38.07333333333333
Training on task:  1
Testing on task:  0
Test set: Average loss: 0.0018, Accuracy: 9430/10000 (94%)
Precision: 0.9443, Recall: 0.9430, F1 Score: 0.9431

Testing on task:  1
Test set: Average loss: 0.0172, Accuracy: 2292/10000 (23%)
Precision: 0.2094, Recall: 0.2292, F1 Score: 0.1913

Testing on task:  2
Test set: Average loss: 0.0181, Accuracy: 1454/10000 (15%)
Precision: 0.1185, Recall: 0.1454, F1 Score: 0.1037

Avg acc:  43.919999999999995
Training on task:  2
Testing on task:  0
Test set: Average loss: 0.0015, Accuracy: 9480/10000 (95%)
Precision: 

# Permuted-MNIST ONE ARCHITECTURE


In [10]:
import torch
# torch.cuda.is_available()
import torch
import torch.nn
import torch.nn as nn
import torchvision.datasets as datasets
import torchvision.transforms as transforms
import torch.optim as optim
import torch.nn.functional as F
import numpy as np
import matplotlib.pyplot as plt
from tqdm.auto import tqdm

In [2]:
!git clone https://github.com/ContinualAI/colab.git continualai/colab
from continualai.colab.scripts import mnist
mnist.init()

Cloning into 'continualai/colab'...
remote: Enumerating objects: 378, done.[K
remote: Counting objects: 100% (120/120), done.[K
remote: Compressing objects: 100% (58/58), done.[K
remote: Total 378 (delta 78), reused 64 (delta 62), pack-reused 258[K
Receiving objects: 100% (378/378), 26.97 MiB | 11.41 MiB/s, done.
Resolving deltas: 100% (198/198), done.
Downloading train-images-idx3-ubyte.gz...
Downloading t10k-images-idx3-ubyte.gz...
Downloading train-labels-idx1-ubyte.gz...
Downloading t10k-labels-idx1-ubyte.gz...
Download complete.
Save complete.
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz to data/mnist/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:02<00:00, 4411339.14it/s]


Extracting data/mnist/MNIST/raw/train-images-idx3-ubyte.gz to data/mnist/MNIST/raw

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz to data/mnist/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 134616.31it/s]


Extracting data/mnist/MNIST/raw/train-labels-idx1-ubyte.gz to data/mnist/MNIST/raw

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz to data/mnist/MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1648877/1648877 [00:01<00:00, 1275467.83it/s]


Extracting data/mnist/MNIST/raw/t10k-images-idx3-ubyte.gz to data/mnist/MNIST/raw

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz to data/mnist/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<00:00, 9855420.99it/s]

Extracting data/mnist/MNIST/raw/t10k-labels-idx1-ubyte.gz to data/mnist/MNIST/raw






In [4]:
x_train, t_train, x_test, t_test = mnist.load()

print("x_train dim and type: ", x_train.shape, x_train.dtype)
print("t_train dim and type: ", t_train.shape, t_train.dtype)
print("x_test dim and type: ", x_test.shape, x_test.dtype)
print("t_test dim and type: ", t_test.shape, t_test.dtype)

x_train dim and type:  (60000, 1, 28, 28) float32
t_train dim and type:  (60000,) uint8
x_test dim and type:  (10000, 1, 28, 28) float32
t_test dim and type:  (10000,) uint8


In [5]:
# switch to False to use CPU
use_cuda = True

use_cuda = use_cuda and torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu");
torch.manual_seed(1);

In [6]:
import torch.nn.functional as F

class ONEArchitecture(nn.Module):
    def __init__(self):
        super(ONEArchitecture, self).__init__()

        # Convolutional layers
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, padding=1)

        # Fully connected layers
        self.fc1 = nn.Linear(100352, 512)
        self.fc2 = nn.Linear(512, 256)
        self.fc3 = nn.Linear(256, 10)

    def forward(self, x):
        x = F.relu(self.conv1(x))

        x = F.relu(self.conv2(x))

        x = F.relu(self.conv3(x))

        # Flatten the tensor before passing it to fully connected layers
        x = x.view(-1, 100352)

        x = F.relu(self.fc1(x))

        x = F.relu(self.fc2(x))

        x = self.fc3(x)
        return x

def permute_mnist(mnist, seed):
    """ Given the training set, permute pixels of each img the same way. """

    np.random.seed(seed)
    print("starting permutation...")
    h = w = 28
    perm_inds = list(range(h*w))
    np.random.shuffle(perm_inds)
    # print(perm_inds)
    perm_mnist = []
    for set in mnist:
        print(set.shape)
        num_img = set.shape[0]
        flat_set = set.reshape(num_img, w * h)
        perm_mnist.append(flat_set[:, perm_inds].reshape(num_img, 1, w, h))
    print("done.")
    return perm_mnist

from sklearn.metrics import precision_score, recall_score, f1_score

def on_task_update(task_id, x_mem, t_mem):

  model.train()
  fisher_dict[task_id] = {}
  optpar_dict[task_id] = {}
  count=0
  for name, param in model.named_parameters():
    fisher_dict[task_id][name] = param.data*0.0
  for start in range(0, len(t_mem)-1, batch_size):
      optimizer.zero_grad()
      count+=1
      # print(len(t_mem),len(x_mem))
      end = start + batch_size
      x, y = torch.from_numpy(x_mem[start:end]), torch.from_numpy(t_mem[start:end]).long()
      x, y = x.to(device), y.to(device)
      output = model(x)
      loss = F.cross_entropy(output, y)
      loss.backward()
      # gradients accumulated can be used to calculate fisher
      for name, param in model.named_parameters():
        fisher_dict[task_id][name] += param.grad.data.clone().pow(2)
  for name, param in model.named_parameters():
        optpar_dict[task_id][name] = param.data.clone()
        fisher_dict[task_id][name] = fisher_dict[task_id][name]/count

def train_ewc(model, device, task_id, x_train, t_train, optimizer, epoch):
    model.train()

    for start in range(0, len(t_train)-1, batch_size):
        end = start + batch_size
        x, y = torch.from_numpy(x_train[start:end]), torch.from_numpy(t_train[start:end]).long()
        x, y = x.to(device), y.to(device)

        optimizer.zero_grad()

        output = model(x)
        loss = F.cross_entropy(output, y)

        ### magic here! :-)
        for task in range(task_id):
            for name, param in model.named_parameters():
                fisher = fisher_dict[task][name]
                optpar = optpar_dict[task][name]
                loss += (fisher * (optpar - param).pow(2)).sum() * ewc_lambda

        loss.backward()
        optimizer.step()

    # After each epoch, evaluate and print metrics
    train_metrics = evaluate_metrics(model, device, x_train, t_train)
    # print(f'Train Epoch: {epoch}\tLoss: {loss.item():.6f}\tPrecision: {train_metrics["precision"]:.4f}\tRecall: {train_metrics["recall"]:.4f}\tF1 Score: {train_metrics["f1"]:.4f}')

def evaluate_metrics(model, device, x_data, t_data):
    model.eval()
    all_preds = []
    all_labels = []

    for start in range(0, len(t_data)-1, batch_size):
        end = start + batch_size
        with torch.no_grad():
            x, y = torch.from_numpy(x_data[start:end]), torch.from_numpy(t_data[start:end]).long()
            x, y = x.to(device), y.to(device)
            output = model(x)

            # Convert predictions to numpy arrays
            preds = output.argmax(dim=1).cpu().numpy()
            labels = y.cpu().numpy()

            all_preds.extend(preds)
            all_labels.extend(labels)

    # Calculate metrics
    precision = precision_score(all_labels, all_preds, average='weighted')
    recall = recall_score(all_labels, all_preds, average='weighted')
    f1 = f1_score(all_labels, all_preds, average='weighted')

    return {'precision': precision, 'recall': recall, 'f1': f1}

def train(model, device, x_train, t_train, optimizer, epoch):
    model.train()

    for start in range(0, len(t_train)-1, batch_size):
      end = start + batch_size
      x, y = torch.from_numpy(x_train[start:end]), torch.from_numpy(t_train[start:end]).long()
      x, y = x.to(device), y.to(device)

      optimizer.zero_grad()

      output = model(x)
      loss = F.cross_entropy(output, y)
      loss.backward()
      optimizer.step()
      #print(loss.item())
    # print('Train Epoch: {} \tLoss: {:.6f}'.format(epoch, loss.item()))

def test(model, device, x_test, t_test):
    model.eval()
    test_loss = 0
    correct = 0
    all_preds = []
    all_labels = []

    for start in range(0, len(t_test)-1, batch_size):
        end = start + batch_size
        with torch.no_grad():
            x, y = torch.from_numpy(x_test[start:end]), torch.from_numpy(t_test[start:end]).long()
            x, y = x.to(device), y.to(device)
            output = model(x)
            test_loss += F.cross_entropy(output, y).item()  # sum up batch loss
            pred = output.max(1, keepdim=True)[1]  # get the index of the max logit
            correct += pred.eq(y.view_as(pred)).sum().item()

            # Convert predictions to numpy arrays
            preds = pred.cpu().numpy()
            labels = y.cpu().numpy()

            all_preds.extend(preds)
            all_labels.extend(labels)

    test_loss /= len(t_test)

    # Calculate metrics
    precision = precision_score(all_labels, all_preds, average='weighted')
    recall = recall_score(all_labels, all_preds, average='weighted')
    f1 = f1_score(all_labels, all_preds, average='weighted')

    print('Test set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)'.format(
        test_loss, correct, len(t_test), 100. * correct / len(t_test)))
    print('Precision: {:.4f}, Recall: {:.4f}, F1 Score: {:.4f}\n'.format(precision, recall, f1))

    return 100. * correct / len(t_test)

In [7]:
# task 1
task_1 = [(x_train, t_train), (x_test, t_test)]

# task 2
x_train2, x_test2 = permute_mnist([x_train, x_test], 1)
task_2 = [(x_train2, t_train), (x_test2, t_test)]

# task 3
x_train3, x_test3 = permute_mnist([x_train, x_test], 2)
task_3 = [(x_train3, t_train), (x_test3, t_test)]

# task list
tasks = [task_1, task_2, task_3]

starting permutation...
(60000, 1, 28, 28)
(10000, 1, 28, 28)
done.
starting permutation...
(60000, 1, 28, 28)
(10000, 1, 28, 28)
done.


In [8]:
model = ONEArchitecture().to(device)
optimizer = optim.Adam(model.parameters(), lr=0.0001)
fisher_dict = {}
optpar_dict = {}
ewc_lambda = 0.4
batch_size=256

In [11]:
import warnings
warnings.filterwarnings('ignore')
ewc_accs = []
for id, task in enumerate(tasks):
  avg_acc = 0
  print("Training on task: ", id)

  (x_train, t_train), _ = task
  print("Training...")
  for epoch in tqdm(range(1, 16)):
    train_ewc(model, device, id, x_train, t_train, optimizer, epoch)
  on_task_update(id, x_train, t_train)

  print("Testing...")
  for id_test, task in tqdm(enumerate(tasks)):
    # print("Testing on task: ", id_test)
    try:
      _, (x_test, t_test) = task
      acc = test(model, device, x_test, t_test)
      avg_acc = avg_acc + acc
    except Exception as e:
      print("Exception on task: {}".format(id_test))
      print("Exception: {}".format(e))

  print("Avg acc: ", avg_acc / 3)
  ewc_accs.append(avg_acc / 3)

Training on task:  0
Training...


  0%|          | 0/15 [00:00<?, ?it/s]

Testing...


0it [00:00, ?it/s]

Test set: Average loss: 0.0002, Accuracy: 9895/10000 (99%)
Precision: 0.9895, Recall: 0.9895, F1 Score: 0.9895



  _warn_prf(average, modifier, msg_start, len(result))


Test set: Average loss: 0.0382, Accuracy: 787/10000 (8%)
Precision: 0.0667, Recall: 0.0787, F1 Score: 0.0418

Test set: Average loss: 0.0277, Accuracy: 1258/10000 (13%)
Precision: 0.1578, Recall: 0.1258, F1 Score: 0.0843

Avg acc:  39.800000000000004
Training on task:  1
Training...


  _warn_prf(average, modifier, msg_start, len(result))


  0%|          | 0/15 [00:00<?, ?it/s]

Testing...


0it [00:00, ?it/s]

Test set: Average loss: 0.0113, Accuracy: 6858/10000 (69%)
Precision: 0.7652, Recall: 0.6858, F1 Score: 0.6859

Test set: Average loss: 0.0005, Accuracy: 9759/10000 (98%)
Precision: 0.9760, Recall: 0.9759, F1 Score: 0.9759

Test set: Average loss: 0.0454, Accuracy: 980/10000 (10%)
Precision: 0.1671, Recall: 0.0980, F1 Score: 0.0506

Avg acc:  58.65666666666667
Training on task:  2
Training...


  0%|          | 0/15 [00:00<?, ?it/s]

Testing...


0it [00:00, ?it/s]

Test set: Average loss: 0.0262, Accuracy: 4579/10000 (46%)
Precision: 0.4992, Recall: 0.4579, F1 Score: 0.4331

Test set: Average loss: 0.0087, Accuracy: 6691/10000 (67%)
Precision: 0.7403, Recall: 0.6691, F1 Score: 0.6475

Test set: Average loss: 0.0005, Accuracy: 9747/10000 (97%)
Precision: 0.9750, Recall: 0.9747, F1 Score: 0.9747

Avg acc:  70.05666666666666


# MNIST PyTorch OWN Dataset Task Division

In [None]:
import pandas as pd
import torch
import torch.nn as nn
import torchvision.datasets as datasets
import torchvision.transforms as transforms
import torch.optim as optim
import torch.nn.functional as F
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

In [None]:
complete_training_data=pd.read_csv('/content/drive/MyDrive/IIIT/Thesis/Continual_Learning/digit-recognizer/train.csv')
complete_testing_data=pd.read_csv('/content/drive/MyDrive/IIIT/Thesis/Continual_Learning/digit-recognizer/test.csv')

In [None]:
number_of_samples=complete_training_data.shape[0]
number_of_pixels=complete_training_data.shape[1]-1
print('Number of Samples',number_of_samples)
print('Number of Pixels',number_of_pixels)

Number of Samples 42000
Number of Pixels 784


In [None]:
task1_data=pd.read_csv('/content/drive/MyDrive/IIIT/Thesis/Continual_Learning/digit-recognizer/Task1_data.csv')
print('Shape of the Task 1 dataset',task1_data.shape)

Shape of the Task 1 dataset (21416, 786)


In [None]:
task2_data=pd.read_csv('/content/drive/MyDrive/IIIT/Thesis/Continual_Learning/digit-recognizer/Task2_data.csv')
print('Shape of the Task 2 dataset',task2_data.shape)

Shape of the Task 2 dataset (12333, 786)


In [None]:
task3_data=pd.read_csv('/content/drive/MyDrive/IIIT/Thesis/Continual_Learning/digit-recognizer/Task3_data.csv')
print('Shape of the Task 3 dataset',task3_data.shape)

Shape of the Task 3 dataset (8251, 786)


In [None]:
#Splitting the training and test dataset shape
#Dropping the label
task1_label=task1_data['label']
print(task1_label.unique())
task1_data=task1_data.drop('label',axis=1)
task1_data=task1_data.drop('Unnamed: 0',axis=1)
X_train_task1,X_test_task1,Y_train_task1,Y_test_task1 = train_test_split(task1_data,task1_label,random_state=421212, test_size=0.1,shuffle=True)
print(X_train_task1.shape)
print(X_test_task1.shape)
X_train_task1=np.asarray(X_train_task1)
Y_train_task1=np.asarray(Y_train_task1)
X_test_task1=np.asarray(X_test_task1)
Y_test_task1=np.asarray(Y_test_task1)
X_train_task1 = np.asarray(torch.tensor(X_train_task1, dtype=torch.float32))
Y_train_task1 = np.asarray(torch.tensor(Y_train_task1, dtype=torch.float32))
X_test_task1 = np.asarray(torch.tensor(X_test_task1, dtype=torch.float32))
Y_test_task1 = np.asarray(torch.tensor(Y_test_task1, dtype=torch.float32))

[0 1 2 3 4]
(19274, 784)
(2142, 784)


In [None]:
#Splitting the training and test dataset shape
#Dropping the label
task2_label=task2_data['label']
print(task2_label.unique())
task2_data=task2_data.drop('label',axis=1)
task2_data=task2_data.drop('Unnamed: 0',axis=1)
X_train_task2,X_test_task2,Y_train_task2,Y_test_task2 = train_test_split(task2_data,task2_label,random_state=421212, test_size=0.1,shuffle=True)
print(X_train_task2.shape)
print(X_test_task2.shape)

X_train_task2=np.asarray(X_train_task2)
Y_train_task2=np.asarray(Y_train_task2)
X_test_task2=np.asarray(X_test_task2)
Y_test_task2=np.asarray(Y_test_task2)
X_train_task2 = np.asarray(torch.tensor(X_train_task2, dtype=torch.float32))
Y_train_task2 = np.asarray(torch.tensor(Y_train_task2, dtype=torch.float32))
X_test_task2 = np.asarray(torch.tensor(X_test_task2, dtype=torch.float32))
Y_test_task2 = np.asarray(torch.tensor(Y_test_task2, dtype=torch.float32))

[7 6 5]
(11099, 784)
(1234, 784)


In [None]:
#Splitting the training and test dataset shape
#Dropping the label
task3_label=task3_data['label']
print(task3_label.unique())
task3_data=task3_data.drop('label',axis=1)
task3_data=task3_data.drop('Unnamed: 0',axis=1)
X_train_task3,X_test_task3,Y_train_task3,Y_test_task3 = train_test_split(task3_data,task3_label,random_state=421212, test_size=0.1,shuffle=True)
print(X_train_task3.shape)
print(X_test_task3.shape)

X_train_task3=np.asarray(X_train_task3)
Y_train_task3=np.asarray(Y_train_task3)
X_test_task3=np.asarray(X_test_task3)
Y_test_task3=np.asarray(Y_test_task3)
X_train_task3 = np.asarray(torch.tensor(X_train_task3, dtype=torch.float32))
Y_train_task3 = np.asarray(torch.tensor(Y_train_task3, dtype=torch.float32))
X_test_task3 = np.asarray(torch.tensor(X_test_task3, dtype=torch.float32))
Y_test_task3 = np.asarray(torch.tensor(Y_test_task3, dtype=torch.float32))

[8 9]
(7425, 784)
(826, 784)


In [None]:
# task 1
task_1 = [(X_train_task1, Y_train_task1), (X_test_task1, Y_test_task1)]

# task 2
task_2 = [(X_train_task2, Y_train_task2), (X_test_task2, Y_test_task2)]

# task 3
task_3 = [(X_train_task3, Y_train_task3), (X_test_task3, Y_test_task3)]

# task list
tasks = [task_1, task_2, task_3]

In [None]:
import torch.nn as nn
import torch.nn.functional as F

class ClassificationModel(nn.Module):
    def __init__(self, number_of_classes):
        super(ClassificationModel, self).__init__()
        self.linear1 = nn.Linear(784, 390)
        self.linear2 = nn.Linear(390, 190)
        self.linear3 = nn.Linear(190, 95)
        self.linear4 = nn.Linear(95, 45)
        self.linear5 = nn.Linear(45, 20)
        self.linear6 = nn.Linear(20, number_of_classes)
        self.dropout = nn.Dropout(0.5)  # Adding Dropout

    def forward(self, x):
        x = F.relu(self.linear1(x))  # Applying Batch Normalization before activation
        x = F.relu(self.dropout(x))
        x = F.relu(self.linear2(x))
        x = F.relu(self.dropout(x))
        x = F.relu(self.linear3(x))
        x = F.relu(self.dropout(x))
        x = F.relu(self.linear4(x))
        x = F.relu(self.linear5(x))
        x = self.linear6(x)
        return x

In [None]:
# switch to False to use CPU
use_cuda = True

use_cuda = use_cuda and torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu");
torch.manual_seed(1);

In [None]:
model = ClassificationModel(10).to(device)
optimizer = optim.Adam(model.parameters(), lr=0.00001)
fisher_dict = {}
optpar_dict = {}
ewc_lambda = 10**6
batch_size=4

In [None]:
from sklearn.metrics import precision_score, recall_score, f1_score

def on_task_update(task_id, x_mem, t_mem):

  model.train()
  fisher_dict[task_id] = {}
  optpar_dict[task_id] = {}
  count=0
  for name, param in model.named_parameters():
    fisher_dict[task_id][name] = param.data*0.0
  for start in range(0, len(t_mem)-1, batch_size):
      optimizer.zero_grad()
      count+=1
      # print(len(t_mem),len(x_mem))
      end = start + batch_size
      x, y = torch.from_numpy(x_mem[start:end]), torch.from_numpy(t_mem[start:end]).long()
      x, y = x.to(device), y.to(device)
      output = model(x)
      loss = F.cross_entropy(output, y)
      loss.backward()
      # gradients accumulated can be used to calculate fisher
      for name, param in model.named_parameters():
        fisher_dict[task_id][name] += param.grad.data.clone().pow(2)
  for name, param in model.named_parameters():
        optpar_dict[task_id][name] = param.data.clone()
        fisher_dict[task_id][name] = fisher_dict[task_id][name]/count

def train_ewc(model, device, task_id, x_train, t_train, optimizer, epoch):
    model.train()

    for start in range(0, len(t_train)-1, batch_size):
        end = start + batch_size
        x, y = torch.from_numpy(x_train[start:end]), torch.from_numpy(t_train[start:end]).long()
        x, y = x.to(device), y.to(device)

        optimizer.zero_grad()

        output = model(x)
        loss = F.cross_entropy(output, y)

        ### magic here! :-)
        for task in range(task_id):
            for name, param in model.named_parameters():
                fisher = fisher_dict[task][name]
                optpar = optpar_dict[task][name]
                loss += (fisher * (optpar - param).pow(2)).sum() * ewc_lambda

        loss.backward()
        optimizer.step()

    # After each epoch, evaluate and print metrics
    train_metrics = evaluate_metrics(model, device, x_train, t_train)
    # print(f'Train Epoch: {epoch}\tLoss: {loss.item():.6f}\tPrecision: {train_metrics["precision"]:.4f}\tRecall: {train_metrics["recall"]:.4f}\tF1 Score: {train_metrics["f1"]:.4f}')

def evaluate_metrics(model, device, x_data, t_data):
    model.eval()
    all_preds = []
    all_labels = []

    for start in range(0, len(t_data)-1, batch_size):
        end = start + batch_size
        with torch.no_grad():
            x, y = torch.from_numpy(x_data[start:end]), torch.from_numpy(t_data[start:end]).long()
            x, y = x.to(device), y.to(device)
            output = model(x)

            # Convert predictions to numpy arrays
            preds = output.argmax(dim=1).cpu().numpy()
            labels = y.cpu().numpy()

            all_preds.extend(preds)
            all_labels.extend(labels)

    # Calculate metrics
    precision = precision_score(all_labels, all_preds, average='weighted')
    recall = recall_score(all_labels, all_preds, average='weighted')
    f1 = f1_score(all_labels, all_preds, average='weighted')

    return {'precision': precision, 'recall': recall, 'f1': f1}

def train(model, device, x_train, t_train, optimizer, epoch):
    model.train()

    for start in range(0, len(t_train)-1, batch_size):
      end = start + batch_size
      x, y = torch.from_numpy(x_train[start:end]), torch.from_numpy(t_train[start:end]).long()
      x, y = x.to(device), y.to(device)

      optimizer.zero_grad()

      output = model(x)
      loss = F.cross_entropy(output, y)
      loss.backward()
      optimizer.step()
      #print(loss.item())
    # print('Train Epoch: {} \tLoss: {:.6f}'.format(epoch, loss.item()))

def test(model, device, x_test, t_test):
    model.eval()
    test_loss = 0
    correct = 0
    all_preds = []
    all_labels = []

    for start in range(0, len(t_test)-1, batch_size):
        end = start + batch_size
        with torch.no_grad():
            x, y = torch.from_numpy(x_test[start:end]), torch.from_numpy(t_test[start:end]).long()
            x, y = x.to(device), y.to(device)
            output = model(x)
            test_loss += F.cross_entropy(output, y).item()  # sum up batch loss
            pred = output.max(1, keepdim=True)[1]  # get the index of the max logit
            correct += pred.eq(y.view_as(pred)).sum().item()

            # Convert predictions to numpy arrays
            preds = pred.cpu().numpy()
            labels = y.cpu().numpy()

            all_preds.extend(preds)
            all_labels.extend(labels)

    test_loss /= len(t_test)

    # Calculate metrics
    precision = precision_score(all_labels, all_preds, average='weighted')
    recall = recall_score(all_labels, all_preds, average='weighted')
    f1 = f1_score(all_labels, all_preds, average='weighted')

    print('Test set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)'.format(
        test_loss, correct, len(t_test), 100. * correct / len(t_test)))
    print('Precision: {:.4f}, Recall: {:.4f}, F1 Score: {:.4f}\n'.format(precision, recall, f1))

    return 100. * correct / len(t_test)

In [None]:
import warnings
warnings.filterwarnings('ignore')
ewc_accs = []
for id, task in enumerate(tasks):
  avg_acc = 0
  print("Training on task: ", id)

  (x_train, t_train), _ = task

  for epoch in range(1, 11):
    train_ewc(model, device, id, x_train, t_train, optimizer, epoch)
  on_task_update(id, x_train, t_train)

  for id_test, task in enumerate(tasks):
    print("Testing on task: ", id_test)
    _, (x_test, t_test) = task
    acc = test(model, device, x_test, t_test)
    avg_acc = avg_acc + acc

  print("Avg acc: ", avg_acc / 3)
  ewc_accs.append(avg_acc / 3)

Training on task:  0
Testing on task:  0
Test set: Average loss: 0.0267, Accuracy: 2084/2142 (97%)
Precision: 0.9730, Recall: 0.9729, F1 Score: 0.9729

Testing on task:  1
Test set: Average loss: 3.1087, Accuracy: 0/1234 (0%)
Precision: 0.0000, Recall: 0.0000, F1 Score: 0.0000

Testing on task:  2
Test set: Average loss: 3.5799, Accuracy: 0/826 (0%)
Precision: 0.0000, Recall: 0.0000, F1 Score: 0.0000

Avg acc:  32.4307500778089
Training on task:  1
Testing on task:  0
Test set: Average loss: 0.2510, Accuracy: 1573/2142 (73%)
Precision: 0.9947, Recall: 0.7344, F1 Score: 0.8314

Testing on task:  1
Test set: Average loss: 0.4080, Accuracy: 716/1234 (58%)
Precision: 0.6645, Recall: 0.5802, F1 Score: 0.5756

Testing on task:  2
Test set: Average loss: 1.1783, Accuracy: 0/826 (0%)
Precision: 0.0000, Recall: 0.0000, F1 Score: 0.0000

Avg acc:  43.81957717356707
Training on task:  2
Testing on task:  0
Test set: Average loss: 0.2550, Accuracy: 1575/2142 (74%)
Precision: 0.9948, Recall: 0.7353

# UNET Architecture

In [None]:
import pandas as pd
import torch
import torch.nn as nn
import torchvision.datasets as datasets
import torchvision.transforms as transforms
import torch.optim as optim
import torch.nn.functional as F
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

In [None]:
complete_training_data=pd.read_csv('/content/drive/MyDrive/IIIT/Thesis/Continual_Learning/digit-recognizer/train.csv')
complete_testing_data=pd.read_csv('/content/drive/MyDrive/IIIT/Thesis/Continual_Learning/digit-recognizer/test.csv')

In [None]:
number_of_samples=complete_training_data.shape[0]
number_of_pixels=complete_training_data.shape[1]-1
print('Number of Samples',number_of_samples)
print('Number of Pixels',number_of_pixels)

Number of Samples 42000
Number of Pixels 784


In [None]:
task1_data=pd.read_csv('/content/drive/MyDrive/IIIT/Thesis/Continual_Learning/digit-recognizer/Task1_data.csv')
print('Shape of the Task 1 dataset',task1_data.shape)

Shape of the Task 1 dataset (21416, 786)


In [None]:
task2_data=pd.read_csv('/content/drive/MyDrive/IIIT/Thesis/Continual_Learning/digit-recognizer/Task2_data.csv')
print('Shape of the Task 2 dataset',task2_data.shape)

Shape of the Task 2 dataset (12333, 786)


In [None]:
task3_data=pd.read_csv('/content/drive/MyDrive/IIIT/Thesis/Continual_Learning/digit-recognizer/Task3_data.csv')
print('Shape of the Task 3 dataset',task3_data.shape)

Shape of the Task 3 dataset (8251, 786)


In [None]:
#Splitting the training and test dataset shape
#Dropping the label
task1_label=task1_data['label']
print(task1_label.unique())
task1_data=task1_data.drop('label',axis=1)
task1_data=task1_data.drop('Unnamed: 0',axis=1)
X_train_task1,X_test_task1,Y_train_task1,Y_test_task1 = train_test_split(task1_data,task1_label,random_state=421212, test_size=0.1,shuffle=True)
print(X_train_task1.shape)
print(X_test_task1.shape)
X_train_task1=np.asarray(X_train_task1)
Y_train_task1=np.asarray(Y_train_task1)
X_test_task1=np.asarray(X_test_task1)
Y_test_task1=np.asarray(Y_test_task1)
X_train_task1 = np.asarray(torch.tensor(X_train_task1, dtype=torch.float32))
Y_train_task1 = np.asarray(torch.tensor(Y_train_task1, dtype=torch.float32))
X_test_task1 = np.asarray(torch.tensor(X_test_task1, dtype=torch.float32))
Y_test_task1 = np.asarray(torch.tensor(Y_test_task1, dtype=torch.float32))

[0 1 2 3 4]
(19274, 784)
(2142, 784)


In [None]:
#Splitting the training and test dataset shape
#Dropping the label
task2_label=task2_data['label']
print(task2_label.unique())
task2_data=task2_data.drop('label',axis=1)
task2_data=task2_data.drop('Unnamed: 0',axis=1)
X_train_task2,X_test_task2,Y_train_task2,Y_test_task2 = train_test_split(task2_data,task2_label,random_state=421212, test_size=0.1,shuffle=True)
print(X_train_task2.shape)
print(X_test_task2.shape)

X_train_task2=np.asarray(X_train_task2)
Y_train_task2=np.asarray(Y_train_task2)
X_test_task2=np.asarray(X_test_task2)
Y_test_task2=np.asarray(Y_test_task2)
X_train_task2 = np.asarray(torch.tensor(X_train_task2, dtype=torch.float32))
Y_train_task2 = np.asarray(torch.tensor(Y_train_task2, dtype=torch.float32))
X_test_task2 = np.asarray(torch.tensor(X_test_task2, dtype=torch.float32))
Y_test_task2 = np.asarray(torch.tensor(Y_test_task2, dtype=torch.float32))

[7 6 5]
(11099, 784)
(1234, 784)


In [None]:
#Splitting the training and test dataset shape
#Dropping the label
task3_label=task3_data['label']
print(task3_label.unique())
task3_data=task3_data.drop('label',axis=1)
task3_data=task3_data.drop('Unnamed: 0',axis=1)
X_train_task3,X_test_task3,Y_train_task3,Y_test_task3 = train_test_split(task3_data,task3_label,random_state=421212, test_size=0.1,shuffle=True)
print(X_train_task3.shape)
print(X_test_task3.shape)

X_train_task3=np.asarray(X_train_task3)
Y_train_task3=np.asarray(Y_train_task3)
X_test_task3=np.asarray(X_test_task3)
Y_test_task3=np.asarray(Y_test_task3)
X_train_task3 = np.asarray(torch.tensor(X_train_task3, dtype=torch.float32))
Y_train_task3 = np.asarray(torch.tensor(Y_train_task3, dtype=torch.float32))
X_test_task3 = np.asarray(torch.tensor(X_test_task3, dtype=torch.float32))
Y_test_task3 = np.asarray(torch.tensor(Y_test_task3, dtype=torch.float32))

[8 9]
(7425, 784)
(826, 784)


In [None]:
# task 1
task_1 = [(X_train_task1, Y_train_task1), (X_test_task1, Y_test_task1)]

# task 2
task_2 = [(X_train_task2, Y_train_task2), (X_test_task2, Y_test_task2)]

# task 3
task_3 = [(X_train_task3, Y_train_task3), (X_test_task3, Y_test_task3)]

# task list
tasks = [task_1, task_2, task_3]

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class UNet(nn.Module):
    def __init__(self, input_size, output_size):
        super(UNet, self).__init__()

        # Encoder layers
        self.enc1 = nn.Linear(input_size, 256)
        self.enc2 = nn.Linear(256, 128)
        self.enc3 = nn.Linear(128, 64)
        self.enc4 = nn.Linear(64, 32)
        self.enc5 = nn.Linear(32, 16)

        # Decoder layers
        self.dec5 = nn.Linear(16, 32)
        self.dec4 = nn.Linear(32, 64)
        self.dec3 = nn.Linear(64, 128)
        self.dec2 = nn.Linear(128, 256)
        self.dec1 = nn.Linear(256, output_size)

    def forward(self, x):
        # Encoding
        x1 = F.relu(self.enc1(x))
        x2 = F.relu(self.enc2(x1))
        x3 = F.relu(self.enc3(x2))
        x4 = F.relu(self.enc4(x3))
        x5 = F.relu(self.enc5(x4))

        # Decoding with skip connections
        x_dec5 = F.relu(self.dec5(x5))
        x_dec4 = F.relu(self.dec4(x_dec5))
        x_dec3 = F.relu(self.dec3(x_dec4))
        x_dec2 = F.relu(self.dec2(x_dec3))
        x_out = self.dec1(x_dec2)

        return x_out

# Example usage
input_size = 784
output_size = 10

model = UNet(input_size, output_size)

In [None]:
# switch to False to use CPU
use_cuda = True

use_cuda = use_cuda and torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu");
torch.manual_seed(1);

In [None]:
optimizer = optim.Adam(model.parameters(), lr=0.00001)
fisher_dict = {}
optpar_dict = {}
ewc_lambda = 10**6
batch_size=256

In [None]:
from sklearn.metrics import precision_score, recall_score, f1_score

def on_task_update(task_id, x_mem, t_mem):

  model.train()
  fisher_dict[task_id] = {}
  optpar_dict[task_id] = {}
  count=0
  for name, param in model.named_parameters():
    fisher_dict[task_id][name] = param.data*0.0
  for start in range(0, len(t_mem)-1, batch_size):
      optimizer.zero_grad()
      count+=1
      # print(len(t_mem),len(x_mem))
      end = start + batch_size
      x, y = torch.from_numpy(x_mem[start:end]), torch.from_numpy(t_mem[start:end]).long()
      x, y = x.to(device), y.to(device)
      output = model(x)
      loss = F.cross_entropy(output, y)
      loss.backward()
      # gradients accumulated can be used to calculate fisher
      for name, param in model.named_parameters():
        fisher_dict[task_id][name] += param.grad.data.clone().pow(2)
  for name, param in model.named_parameters():
        optpar_dict[task_id][name] = param.data.clone()
        fisher_dict[task_id][name] = fisher_dict[task_id][name]/count

def train_ewc(model, device, task_id, x_train, t_train, optimizer, epoch):
    model.train()

    for start in range(0, len(t_train)-1, batch_size):
        end = start + batch_size
        x, y = torch.from_numpy(x_train[start:end]), torch.from_numpy(t_train[start:end]).long()
        x, y = x.to(device), y.to(device)

        optimizer.zero_grad()

        output = model(x)
        loss = F.cross_entropy(output, y)

        ### magic here! :-)
        for task in range(task_id):
            for name, param in model.named_parameters():
                fisher = fisher_dict[task][name]
                optpar = optpar_dict[task][name]
                loss += (fisher * (optpar - param).pow(2)).sum() * ewc_lambda

        loss.backward()
        optimizer.step()

    # After each epoch, evaluate and print metrics
    train_metrics = evaluate_metrics(model, device, x_train, t_train)
    # print(f'Train Epoch: {epoch}\tLoss: {loss.item():.6f}\tPrecision: {train_metrics["precision"]:.4f}\tRecall: {train_metrics["recall"]:.4f}\tF1 Score: {train_metrics["f1"]:.4f}')

def evaluate_metrics(model, device, x_data, t_data):
    model.eval()
    all_preds = []
    all_labels = []

    for start in range(0, len(t_data)-1, batch_size):
        end = start + batch_size
        with torch.no_grad():
            x, y = torch.from_numpy(x_data[start:end]), torch.from_numpy(t_data[start:end]).long()
            x, y = x.to(device), y.to(device)
            output = model(x)

            # Convert predictions to numpy arrays
            preds = output.argmax(dim=1).cpu().numpy()
            labels = y.cpu().numpy()

            all_preds.extend(preds)
            all_labels.extend(labels)

    # Calculate metrics
    precision = precision_score(all_labels, all_preds, average='weighted')
    recall = recall_score(all_labels, all_preds, average='weighted')
    f1 = f1_score(all_labels, all_preds, average='weighted')

    return {'precision': precision, 'recall': recall, 'f1': f1}

def train(model, device, x_train, t_train, optimizer, epoch):
    model.train()

    for start in range(0, len(t_train)-1, batch_size):
      end = start + batch_size
      x, y = torch.from_numpy(x_train[start:end]), torch.from_numpy(t_train[start:end]).long()
      x, y = x.to(device), y.to(device)

      optimizer.zero_grad()

      output = model(x)
      loss = F.cross_entropy(output, y)
      loss.backward()
      optimizer.step()
      #print(loss.item())
    # print('Train Epoch: {} \tLoss: {:.6f}'.format(epoch, loss.item()))

def test(model, device, x_test, t_test):
    model.eval()
    test_loss = 0
    correct = 0
    all_preds = []
    all_labels = []

    for start in range(0, len(t_test)-1, batch_size):
        end = start + batch_size
        with torch.no_grad():
            x, y = torch.from_numpy(x_test[start:end]), torch.from_numpy(t_test[start:end]).long()
            x, y = x.to(device), y.to(device)
            output = model(x)
            test_loss += F.cross_entropy(output, y).item()  # sum up batch loss
            pred = output.max(1, keepdim=True)[1]  # get the index of the max logit
            correct += pred.eq(y.view_as(pred)).sum().item()

            # Convert predictions to numpy arrays
            preds = pred.cpu().numpy()
            labels = y.cpu().numpy()

            all_preds.extend(preds)
            all_labels.extend(labels)

    test_loss /= len(t_test)

    # Calculate metrics
    precision = precision_score(all_labels, all_preds, average='weighted')
    recall = recall_score(all_labels, all_preds, average='weighted')
    f1 = f1_score(all_labels, all_preds, average='weighted')

    print('Test set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)'.format(
        test_loss, correct, len(t_test), 100. * correct / len(t_test)))
    print('Precision: {:.4f}, Recall: {:.4f}, F1 Score: {:.4f}\n'.format(precision, recall, f1))

    return 100. * correct / len(t_test)

In [None]:
import warnings
warnings.filterwarnings('ignore')
ewc_accs = []
for id, task in enumerate(tasks):
  avg_acc = 0
  print("Training on task: ", id)

  (x_train, t_train), _ = task

  for epoch in range(1, 11):
    train_ewc(model, device, id, x_train, t_train, optimizer, epoch)
  on_task_update(id, x_train, t_train)

  for id_test, task in enumerate(tasks):
    print("Testing on task: ", id_test)
    _, (x_test, t_test) = task
    acc = test(model, device, x_test, t_test)
    avg_acc = avg_acc + acc

  print("Avg acc: ", avg_acc / 3)
  ewc_accs.append(avg_acc / 3)

Training on task:  0
Testing on task:  0
Test set: Average loss: 0.0030, Accuracy: 1978/2142 (92%)
Precision: 0.9263, Recall: 0.9234, F1 Score: 0.9247

Testing on task:  1
Test set: Average loss: 0.1833, Accuracy: 21/1234 (2%)
Precision: 0.2839, Recall: 0.0170, F1 Score: 0.0321

Testing on task:  2
Test set: Average loss: 0.2763, Accuracy: 2/826 (0%)
Precision: 0.1418, Recall: 0.0024, F1 Score: 0.0048

Avg acc:  31.429172559670857
Training on task:  1
Testing on task:  0
Test set: Average loss: 0.0194, Accuracy: 1289/2142 (60%)
Precision: 0.9606, Recall: 0.6018, F1 Score: 0.7316

Testing on task:  1
Test set: Average loss: 0.0148, Accuracy: 788/1234 (64%)
Precision: 0.7755, Recall: 0.6386, F1 Score: 0.6986

Testing on task:  2
Test set: Average loss: 0.2870, Accuracy: 1/826 (0%)
Precision: 0.1241, Recall: 0.0012, F1 Score: 0.0024

Avg acc:  41.38528135419148
Training on task:  2
Testing on task:  0
Test set: Average loss: 0.0185, Accuracy: 1304/2142 (61%)
Precision: 0.9620, Recall: 0.6