In [1]:
import pandas as pd
import torch
import torch.nn as nn
import torchvision.datasets as datasets
import torchvision.transforms as transforms
import torch.optim as optim
import torch.nn.functional as F
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

In [2]:
from tensorflow.keras.datasets import cifar10
(x_train, t_train), (x_test, t_test) = cifar10.load_data()

In [3]:
x_train=torch.as_tensor(x_train)
x_train = x_train.permute(0, 3 , 1, 2)
print("x_train dim and type: ", x_train.shape, x_train.dtype)
print("t_train dim and type: ", t_train.shape, t_train.dtype)
x_test=torch.as_tensor(x_test)
x_test = x_test.permute(0, 3 , 1, 2)
print("x_test dim and type: ", x_test.shape, x_test.dtype)
print("t_test dim and type: ", t_test.shape, t_test.dtype)

x_train dim and type:  torch.Size([50000, 3, 32, 32]) torch.uint8
t_train dim and type:  (50000, 1) uint8
x_test dim and type:  torch.Size([10000, 3, 32, 32]) torch.uint8
t_test dim and type:  (10000, 1) uint8


In [4]:
def split_cifar10(train_x, train_y, test_x, test_y, n_splits=2):
    """ Given the training set, split the tensors by the class label. """
    n_classes = 10
    if n_classes % n_splits != 0:
        print("n_classes should be a multiple of the number of splits!")
        raise NotImplemented
    class_for_split = n_classes // n_splits
    cifar_train_test = [[],[]]  # train and test
    for id, data_set in enumerate([(train_x, train_y), (test_x, test_y)]):
        for i in range(n_splits):
            start = i * class_for_split
            end = (i + 1) * class_for_split
            split_idxs = np.where(np.logical_and(data_set[1] >= start, data_set[1] < end))[0]
            cifar_train_test[id].append((data_set[0][split_idxs], data_set[1][split_idxs]))
    return cifar_train_test

In [5]:
import numpy as np
splitcifar = split_cifar10(x_train, t_train, x_test, t_test, n_splits=2)
# converting list to array
splitcifar = np.asarray(splitcifar,dtype='object')

In [6]:
train_task1_data=splitcifar[0][0][0]
train_task1_label=splitcifar[0][0][1]
train_task2_data=splitcifar[0][1][0]
train_task2_label=splitcifar[0][1][1]
test_task1_data=splitcifar[1][0][0]
test_task1_label=splitcifar[1][0][1]
test_task2_data=splitcifar[1][1][0]
test_task2_label=splitcifar[1][1][1]

In [7]:
X_train_task1 = torch.as_tensor(train_task1_data, dtype=torch.float32)
Y_train_task1 = torch.as_tensor(train_task1_label, dtype=torch.float32)
Y_train_task1=Y_train_task1.long()
X_test_task1 = torch.as_tensor(test_task1_data, dtype=torch.float32)
Y_test_task1 = torch.as_tensor(test_task1_label, dtype=torch.float32)
Y_test_task1=Y_test_task1.long()

X_train_task2 = torch.as_tensor(train_task2_data, dtype=torch.float32)
Y_train_task2 = torch.as_tensor(train_task2_label, dtype=torch.float32)
Y_train_task2=Y_train_task2.long()
X_test_task2= torch.as_tensor(test_task2_data, dtype=torch.float32)
Y_test_task2 = torch.as_tensor(test_task2_label, dtype=torch.float32)
Y_test_task2=Y_test_task2.long()

In [8]:
Y_train_task1=torch.squeeze(Y_train_task1)
Y_test_task1=torch.squeeze(Y_test_task1)
Y_train_task2=torch.squeeze(Y_train_task2)
Y_test_task2=torch.squeeze(Y_test_task2)
Y_train_task1=torch.nn.functional.one_hot(Y_train_task1, num_classes=10)
Y_test_task1=torch.nn.functional.one_hot(Y_test_task1, num_classes=10)
Y_train_task2=torch.nn.functional.one_hot(Y_train_task2, num_classes=10)
Y_test_task2=torch.nn.functional.one_hot(Y_test_task2, num_classes=10)

In [9]:
print("x_train task-1 dim and type: ", X_train_task1.shape, X_train_task1.dtype)
print("t_train task-1 dim and type: ", Y_train_task1.shape, Y_train_task1.dtype)
print("x_test task-1 dim and type: ", X_test_task1.shape, X_test_task1.dtype)
print("t_test task-1 dim and type: ", Y_test_task1.shape, Y_test_task1.dtype)

print("x_train task-2 dim and type: ", X_train_task2.shape, X_train_task2.dtype)
print("t_train task-2 dim and type: ", Y_train_task2.shape, Y_train_task2.dtype)
print("x_test task-2 dim and type: ", X_test_task2.shape, X_test_task2.dtype)
print("t_test task-2 dim and type: ", Y_test_task2.shape, Y_test_task2.dtype)

x_train task-1 dim and type:  torch.Size([25000, 3, 32, 32]) torch.float32
t_train task-1 dim and type:  torch.Size([25000, 10]) torch.int64
x_test task-1 dim and type:  torch.Size([5000, 3, 32, 32]) torch.float32
t_test task-1 dim and type:  torch.Size([5000, 10]) torch.int64
x_train task-2 dim and type:  torch.Size([25000, 3, 32, 32]) torch.float32
t_train task-2 dim and type:  torch.Size([25000, 10]) torch.int64
x_test task-2 dim and type:  torch.Size([5000, 3, 32, 32]) torch.float32
t_test task-2 dim and type:  torch.Size([5000, 10]) torch.int64


In [10]:
# task 1
task_1 = [(X_train_task1, Y_train_task1), (X_test_task1, Y_test_task1)]

# task 2
task_2 = [(X_train_task2, Y_train_task2), (X_test_task2, Y_test_task2)]

# task list
tasks = [task_1, task_2]

In [11]:
# switch to False to use CPU
use_cuda = True
use_cuda = use_cuda and torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu");
torch.manual_seed(123123);
m = nn.LogSoftmax(dim=1)
Loss = nn.MSELoss()
batch_size=64

In [12]:
from sklearn.metrics import precision_score, recall_score, f1_score
import torch.nn as nn
import torch.nn.functional as F
number_of_classes=10

class ImprovedNet(nn.Module):
    def __init__(self):
        super(ImprovedNet, self).__init__()

        # Convolutional layers
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=0)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=0)
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, padding=0)

        # Fully connected layers
        self.fc1 = nn.Linear(self.calculate_flattening_size(), 2048)
        self.fc2 = nn.Linear(2048, 256)
        self.fc3 = nn.Linear(256, number_of_classes)

        # Activation
        self.act = nn.Softmax()

    def calculate_flattening_size(self):
        # Dummy input to get the size after passing through convolutional layers
        x = torch.randn(512, 3, 32, 32)
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        x = F.relu(self.conv3(x))
        return x.size(1) * x.size(2) * x.size(3)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        x = F.relu(self.conv3(x))

        # Dynamically compute the flattening size based on the shape of the output tensor
        x_size = x.size(1) * x.size(2) * x.size(3)
        x = x.view(-1, x_size)

        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.act(self.fc3(x))
        return x


def evaluate_metrics(model, device, x_data, t_data):
    model.eval()
    all_preds = []
    all_labels = []

    for start in tqdm(range(0, len(t_data)-1, batch_size)):
        end = start + batch_size
        with torch.no_grad():
            x, y = torch.from_numpy(x_data[start:end]), torch.from_numpy(t_data[start:end])
            x, y = x.to(device), y.to(device)
            output = model(x)
            output = output.float()
            y = y.float()

            # Convert predictions to numpy arrays
            preds = output.argmax(dim=1).cpu().numpy()
            labels = y.argmax(dim=1).cpu().numpy()

            all_preds.extend(preds)
            all_labels.extend(labels)

    # Calculate metrics
    precision = precision_score(all_labels, all_preds, average='weighted')
    recall = recall_score(all_labels, all_preds, average='weighted')
    f1 = f1_score(all_labels, all_preds, average='weighted')

    return {'precision': precision, 'recall': recall, 'f1': f1}

def train(model, device, x_train, t_train, optimizer, epoch):
    model.train()

    for start in range(0, len(t_train)-1, batch_size):
      end = start + batch_size
      x, y = torch.from_numpy(x_train[start:end]), torch.from_numpy(t_train[start:end])
      y = y.type(torch.LongTensor)
      x, y = x.to(device), y.to(device)
      optimizer.zero_grad()
      output = model(x)
      y = y.squeeze()
      output = output.float()
      y = y.float()
      loss = Loss(output, y)
      loss.backward()
      optimizer.step()
      #print(loss.item())
    print('Train Epoch: {} \tLoss: {:.6f}'.format(epoch, loss.item()))

def test(model, device, x_test, t_test):
    model.eval()
    test_loss = 0
    correct = 0
    all_preds = []
    all_labels = []
    batch_size = 1

    for start in tqdm(range(0, len(t_test)-1, batch_size)):
        end = start + batch_size
        with torch.no_grad():
            x, y = (x_test[start:end]), (t_test[start:end])
            x, y = x.to(device), y.to(device)
            output = model(x)
            output = output.float()
            y = y.float()
            test_loss += Loss(output, y) # sum up batch loss
            pred = output.max(1)[1]  # get the index of the max logit
            correct += pred.eq(y.max(1)[1]).sum().item()

            # Convert predictions to numpy arrays
            preds = pred.cpu().numpy()
            labels = y.argmax(dim=1).cpu().numpy()

            all_preds.extend(preds)
            all_labels.extend(labels)

    test_loss /= len(t_test)

    # Calculate metrics
    precision = precision_score(all_labels, all_preds, average='weighted')
    recall = recall_score(all_labels, all_preds, average='weighted')
    f1 = f1_score(all_labels, all_preds, average='weighted')

    print('Test set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)'.format(
        test_loss, correct, len(t_test), 100. * correct / len(t_test)))
    print('Precision: {:.4f}, Recall: {:.4f}, F1 Score: {:.4f}\n'.format(precision, recall, f1))

    return 100. * correct / len(t_test)

In [13]:
model = ImprovedNet().to(device)
optimizer = optim.Adam(model.parameters(), lr=10**-5)

In [14]:
def shuffle_in_unison(dataset, seed, in_place=False):
    """ Shuffle two (or more) list in unison. """

    np.random.seed(seed)
    rng_state = np.random.get_state()
    new_dataset = []
    for x in dataset:
        if in_place:
            np.random.shuffle(x)
        else:
            new_dataset.append(np.random.permutation(x))
        np.random.set_state(rng_state)

    if not in_place:
        return new_dataset

In [15]:
from sklearn.utils import shuffle
from sklearn.preprocessing import OneHotEncoder

def balanced_replay_samples(data_features, data_labels, num_samples_per_task):

  # Check if labels array has only two unique values
  unique_labels = np.unique(data_labels)

  # print(unique_labels)
  if len(unique_labels) != 2:
    raise ValueError("This function expects data with only two unique labels.")

  # Create empty NumPy arrays for each class
  class1_features = np.empty((0,) + data_features.shape[1:])
  class2_features = np.empty((0,) + data_features.shape[1:])
  class1_labels = np.empty((0,) + data_labels.shape[1:])
  class2_labels = np.empty((0,) + data_labels.shape[1:])

  # Zip features and labels for combined iteration
  combined_data = zip(data_features, data_labels)

  # Iterate through zipped data, sorting into class arrays
  for features, label in tqdm(combined_data):
    templabel = (np.argmax(label, axis=0)).reshape(-1, 1)
    if (templabel == unique_labels[0]):
      class1_features = np.concatenate((class1_features, [features]))
      class1_labels= np.concatenate((class1_labels, [label]))
    else:
      class2_features = np.concatenate((class2_features, [features]))
      class2_labels= np.concatenate((class2_labels, [label]))


  # Randomly select samples from each class array
  selected_class1_features = class1_features[:num_samples_per_task]
  selected_class1_labels = class1_labels[:num_samples_per_task]
  selected_class1_features, selected_class1_labels = shuffle(selected_class1_features, selected_class1_labels, random_state=45896)

  selected_class2_features = class2_features[:num_samples_per_task]
  selected_class2_labels = class2_labels[:num_samples_per_task]
  selected_class2_features, selected_class2_labels = shuffle(selected_class2_features, selected_class2_labels, random_state=45896)

  # Combine and return selected features and labels
  balanced_features = np.concatenate((selected_class1_features, selected_class2_features))
  balanced_labels = np.concatenate((selected_class1_labels,selected_class2_labels))
  balanced_features = torch.tensor(balanced_features, dtype=torch.float32)
  balanced_labels = torch.tensor(balanced_labels, dtype=torch.float32)
  balanced_features=np.asarray(balanced_features)
  balanced_labels=np.asarray(balanced_labels)

  return balanced_features, balanced_labels

In [16]:
import warnings
warnings.filterwarnings('ignore')
from tqdm.auto import tqdm
num_samples_per_task=300
rehe_accs = []
for id, task in enumerate(tasks):
  avg_acc = 0
  print("Training on task: ", id)

  (x_train, t_train), _ = task

  # for previous task
  for i in range(id):
    (past_x_train, past_t_train), _ = tasks[i]

    past_x_train, past_t_train = balanced_replay_samples(past_x_train, past_t_train, num_samples_per_task)

    x_train = np.concatenate((x_train, past_x_train))
    t_train = np.concatenate((t_train, past_t_train))

  x_train, t_train = shuffle_in_unison([x_train, t_train], 0)

  for epoch in range(1, 11):
    train(model, device, x_train, t_train, optimizer, epoch)

  for id_test, task in enumerate(tasks):
    print("Testing on task: ", id_test)
    _, (x_test, t_test) = task
    acc = test(model, device, x_test, t_test)
    avg_acc = avg_acc + acc

  print("Avg acc: ", avg_acc / 2)
  rehe_accs.append(avg_acc/2)

Training on task:  0


  0%|          | 0/391 [00:00<?, ?it/s]

Train Epoch: 1 	Loss: 0.048449


  0%|          | 0/391 [00:00<?, ?it/s]

Train Epoch: 2 	Loss: 0.034776


  0%|          | 0/391 [00:00<?, ?it/s]

Train Epoch: 3 	Loss: 0.024267


  0%|          | 0/391 [00:00<?, ?it/s]

Train Epoch: 4 	Loss: 0.018230


  0%|          | 0/391 [00:00<?, ?it/s]

Train Epoch: 5 	Loss: 0.013253


  0%|          | 0/391 [00:00<?, ?it/s]

Train Epoch: 6 	Loss: 0.013277


  0%|          | 0/391 [00:00<?, ?it/s]

Train Epoch: 7 	Loss: 0.007168


  0%|          | 0/391 [00:00<?, ?it/s]

Train Epoch: 8 	Loss: 0.003792


  0%|          | 0/391 [00:00<?, ?it/s]

Train Epoch: 9 	Loss: 0.008359


  0%|          | 0/391 [00:00<?, ?it/s]

Train Epoch: 10 	Loss: 0.011226
Testing on task:  0


  0%|          | 0/4999 [00:00<?, ?it/s]

Test set: Average loss: 0.0427, Accuracy: 3503/5000 (70%)
Precision: 0.7425, Recall: 0.7007, F1 Score: 0.6895

Testing on task:  1


  0%|          | 0/4999 [00:00<?, ?it/s]

Test set: Average loss: 0.1665, Accuracy: 0/5000 (0%)
Precision: 0.0000, Recall: 0.0000, F1 Score: 0.0000

Avg acc:  35.03
Training on task:  1


0it [00:00, ?it/s]

  0%|          | 0/400 [00:00<?, ?it/s]

Train Epoch: 1 	Loss: 0.025909


  0%|          | 0/400 [00:00<?, ?it/s]

Train Epoch: 2 	Loss: 0.013326


  0%|          | 0/400 [00:00<?, ?it/s]

Train Epoch: 3 	Loss: 0.007896


  0%|          | 0/400 [00:00<?, ?it/s]

Train Epoch: 4 	Loss: 0.004947


  0%|          | 0/400 [00:00<?, ?it/s]

Train Epoch: 5 	Loss: 0.005213


  0%|          | 0/400 [00:00<?, ?it/s]

Train Epoch: 6 	Loss: 0.007496


  0%|          | 0/400 [00:00<?, ?it/s]

Train Epoch: 7 	Loss: 0.004355


  0%|          | 0/400 [00:00<?, ?it/s]

Train Epoch: 8 	Loss: 0.002746


  0%|          | 0/400 [00:00<?, ?it/s]

Train Epoch: 9 	Loss: 0.002515


  0%|          | 0/400 [00:00<?, ?it/s]

Train Epoch: 10 	Loss: 0.002508
Testing on task:  0


  0%|          | 0/4999 [00:00<?, ?it/s]

Test set: Average loss: 0.1520, Accuracy: 334/5000 (7%)
Precision: 0.4688, Recall: 0.0668, F1 Score: 0.0974

Testing on task:  1


  0%|          | 0/4999 [00:00<?, ?it/s]

Test set: Average loss: 0.0228, Accuracy: 4223/5000 (84%)
Precision: 0.8486, Recall: 0.8448, F1 Score: 0.8460

Avg acc:  45.56999999999999
