## **CNN**

In [17]:
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
import random

# define model structure
def SimpleCNN_Sam():
    model = tf.keras.models.Sequential()
    model.add(tf.keras.layers.Conv2D(4, (5, 5), activation='relu', input_shape=(28, 28, 1)))
    model.add(tf.keras.layers.MaxPooling2D((2, 2)))
    model.add(tf.keras.layers.Conv2D(8, (3, 3), activation='relu'))
    model.add(tf.keras.layers.Flatten())
    model.add(tf.keras.layers.Dense(10, activation='relu'))
    model.add(tf.keras.layers.Dense(10))
    return model

# function to train model on specified training set and test set
def train(model, train_data, train_label, test_data, test_label, train_acc, test_acc, epochs):
    # define optimizer and loss function to use
    model.compile(optimizer='adam',
                  loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
                  metrics=['accuracy'])

    for i in range(epochs):
        history = model.fit(train_data, train_label)
        train_loss, train_accuracy = model.evaluate(test_data,  test_label, verbose=2)

        # append accuracy to lists
        train_acc += history.history['accuracy']
        test_acc.append(train_accuracy)

## **Example Forgetting and Simple Testing**

In [47]:
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
import random

# define model structure
def SimpleCNN():
    model = tf.keras.models.Sequential()
    model.add(tf.keras.layers.Conv2D(4, (5, 5), activation='relu', input_shape=(28, 28, 1)))
    model.add(tf.keras.layers.MaxPooling2D((2, 2)))
    model.add(tf.keras.layers.Conv2D(8, (3, 3), activation='relu'))
    model.add(tf.keras.layers.Flatten())
    model.add(tf.keras.layers.Dense(10, activation='relu'))
    model.add(tf.keras.layers.Dense(10))
    return model

# function to train model on specified training set and test set
def train_f(model, train_data, train_label, epochs):
    # define optimizer and loss function to use
    opt = tf.keras.optimizers.SGD(learning_rate=0.01)
    
    stat = {}
    for i in range(epochs):
        random_arrange = random.sample(range(len(train_data)), len(train_data))
        train = []
        label = [] 
        for i in range(len(train_data)):
            train.append(train_data[random_arrange[i]])
            label.append(train_label[random_arrange[i]])

        model.compile(optimizer=opt,
                  loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
                  metrics=['accuracy'])
        history = model.fit(tf.convert_to_tensor(train), tf.convert_to_tensor(label))

        probability_model = tf.keras.Sequential([model, tf.keras.layers.Softmax()])
        predictions = probability_model.predict(train_data)
        for i in range(len(train_data)):
            if i in stat:
              stat[i].append(np.argmax(predictions[i]) == train_label[i])
            else:
              stat[i] = [np.argmax(predictions[i]) == train_label[i]]
            
    forgetness = {}
    
    for index, lst in stat.items():
      acc_full = np.array(list(map(int, lst)))
      transition = acc_full[1:] - acc_full[:-1]
      if len(np.where(transition == -1)[0]) > 0:
        forgetness[index] = len(np.where(transition == -1)[0])
      elif len(np.where(acc_full == 1)[0]) == 0:
        forgetness[index] = epochs
      else:
        forgetness[index] = 0
  
    #print(dict(sorted(forgetness.items(), key = lambda item: item[1], reverse = True)))
    result = []
    for i,j in forgetness.items():
      if i <= len(train_data) * 0.1:
        result.append(i)
        
    return result

## **Test code for Example Forgetting**

In [4]:
(train_images, train_labels), (test_images, test_labels) = tf.keras.datasets.mnist.load_data()
model = SimpleCNN()
result = train_f(model, train_images, train_labels, 5)
len(result)



649

## **Continual Learning 1: Independent Tasks**

In [21]:
# split training data into 2 different data sets
choice = np.random.choice(range(train_images.shape[0]), size=(len(train_images)//2,), replace=False)
ind = np.zeros(train_images.shape[0], dtype=bool)
ind[choice] = True

first_data = train_images[ind]
first_label = train_labels[ind]
second_data = train_images[~ind]
second_label = train_labels[~ind]

# split first data into training (80%) and test data set (20%)
choice = np.random.choice(range(first_data.shape[0]), size=(int(len(first_data)*0.8),), replace=False)
ind = np.zeros(first_data.shape[0], dtype=bool)
ind[choice] = True

first_data_train = first_data[ind]
first_label_train = first_label[ind]
first_data_test= first_data[~ind]
first_label_test = first_label[~ind]

# train on the first dataset and check the test accuracy
model = SimpleCNN_Sam()
train_acc = []
test_acc = []
train(model, first_data_train, first_label_train, first_data_test, first_label_test, train_acc, test_acc, 5)
model.evaluate(first_data_test, first_label_test, verbose=2)

# use this model to predict second data
model.evaluate(second_data, second_label, verbose=2)

188/188 - 1s - loss: 2.0294 - accuracy: 0.2177 - 1s/epoch - 6ms/step
188/188 - 1s - loss: 1.9632 - accuracy: 0.2332 - 939ms/epoch - 5ms/step
188/188 - 1s - loss: 1.5290 - accuracy: 0.3933 - 937ms/epoch - 5ms/step
188/188 - 1s - loss: 0.5398 - accuracy: 0.8787 - 942ms/epoch - 5ms/step
188/188 - 1s - loss: 0.2439 - accuracy: 0.9303 - 942ms/epoch - 5ms/step
188/188 - 1s - loss: 0.2439 - accuracy: 0.9303 - 994ms/epoch - 5ms/step
938/938 - 5s - loss: 0.2353 - accuracy: 0.9335 - 5s/epoch - 5ms/step


[0.23532122373580933, 0.9334666728973389]

In [22]:
# split second data into training (80%) and test data set (20%)
choice = np.random.choice(range(second_data.shape[0]), size=(int(len(second_data)*0.8),), replace=False)
ind = np.zeros(second_data.shape[0], dtype=bool)
ind[choice] = True

second_data_train = second_data[ind]
second_label_train = second_label[ind]
second_data_test = second_data[~ind]
second_label_test = second_label[~ind]

# train on the second dataset and check the test accuracy
model = SimpleCNN_Sam()
train_acc = []
test_acc = []
train(model, second_data_train, second_label_train, second_data_test, second_label_test, train_acc, test_acc, 5)
model.evaluate(second_data_test, second_label_test, verbose=2)

188/188 - 1s - loss: 1.6373 - accuracy: 0.3683 - 1s/epoch - 6ms/step
188/188 - 1s - loss: 1.1750 - accuracy: 0.5700 - 929ms/epoch - 5ms/step
188/188 - 1s - loss: 1.0096 - accuracy: 0.6148 - 920ms/epoch - 5ms/step
188/188 - 1s - loss: 0.8338 - accuracy: 0.6705 - 912ms/epoch - 5ms/step
188/188 - 1s - loss: 0.6067 - accuracy: 0.8093 - 928ms/epoch - 5ms/step
188/188 - 1s - loss: 0.6067 - accuracy: 0.8093 - 928ms/epoch - 5ms/step


[0.6066831350326538, 0.809333324432373]

Using first model to predict the second data set, accuracy is 93%. However, using second model to predict the second data set, accuracy is 81%. There is some forgetting.

## **Continual Learning 2: Random Selection Replay Strategy**

In [37]:
import array
# randomly chosen 10% (hyperparameter?) of data from first data
choice = np.random.choice(range(train_images.shape[0]), size=(int(len(second_data)*0.1),), replace=False)
ind = np.zeros(train_images.shape[0], dtype=bool)
ind[choice] = True

data_to_add = train_images[ind]
label_to_add = train_labels[ind]

second_data_random = []
second_label_random = []
for element in second_data_train:
  second_data_random.append(element)
for element in data_to_add:
  second_data_random.append(element)
for element in second_label_train:
  second_label_random.append(element)
for element in label_to_add:
  second_label_random.append(element)

second_data_random = np.array(second_data_random)
second_label_random = np.array(second_label_random)

# train on the second dataset and check the test accuracy
model = SimpleCNN_Sam()
train_acc = []
test_acc = []
train(model, second_data_random, second_label_random, second_data_test, second_label_test, train_acc, test_acc, 5)
model.evaluate(second_data_test, second_label_test, verbose=2)

188/188 - 1s - loss: 2.0441 - accuracy: 0.2102 - 1s/epoch - 5ms/step
188/188 - 1s - loss: 1.7990 - accuracy: 0.3098 - 913ms/epoch - 5ms/step
188/188 - 1s - loss: 1.1949 - accuracy: 0.5903 - 909ms/epoch - 5ms/step
188/188 - 1s - loss: 0.6285 - accuracy: 0.7857 - 926ms/epoch - 5ms/step
188/188 - 1s - loss: 0.4101 - accuracy: 0.9007 - 913ms/epoch - 5ms/step
188/188 - 1s - loss: 0.4101 - accuracy: 0.9007 - 909ms/epoch - 5ms/step


[0.4100704789161682, 0.9006666541099548]

With random selection of data from the first data and put into the second test data, the test accuracy improved to 90%.

## **Continual Learning 3: Example Forgetting Replay Strategy**

In [48]:
# obtain data with highest forgetting statistics from the first data
model = SimpleCNN()
result = train_f(model, first_data, first_label, 5)
print(len(result)/len(first_data))

0.10003333333333334


10% of data is included into second data test set

In [None]:
result

In [49]:
second_data_forget = []
second_label_forget = []
for element in second_data_train:
  second_data_forget.append(element)
for element in result:
  second_data_forget.append(first_data[element])
for element in second_label_train:
  second_label_forget.append(element)
for element in result:
  second_label_forget.append(first_label[element])

second_data_forget = np.array(second_data_forget)
second_label_forget = np.array(second_label_forget)

# train on the second dataset and check the test accuracy
model = SimpleCNN_Sam()
train_acc = []
test_acc = []
train(model, second_data_forget, second_label_forget, second_data_test, second_label_test, train_acc, test_acc, 5)
model.evaluate(second_data_test, second_label_test, verbose=2)

188/188 - 1s - loss: 1.7917 - accuracy: 0.2975 - 1s/epoch - 5ms/step
188/188 - 1s - loss: 1.3750 - accuracy: 0.4572 - 938ms/epoch - 5ms/step
188/188 - 1s - loss: 1.0074 - accuracy: 0.6073 - 912ms/epoch - 5ms/step
188/188 - 1s - loss: 0.3727 - accuracy: 0.9013 - 913ms/epoch - 5ms/step
188/188 - 1s - loss: 0.2549 - accuracy: 0.9353 - 909ms/epoch - 5ms/step
188/188 - 1s - loss: 0.2549 - accuracy: 0.9353 - 919ms/epoch - 5ms/step


[0.2549079358577728, 0.9353333115577698]

This time, the accuracy improved to 93.5%!