In [1]:
#!/usr/bin/env python3
from argparse import ArgumentParser
import numpy as np
import torch
from data import get_dataset, DATASET_CONFIGS
from train import train
from model import MLP
import utils



import matplotlib.pyplot as plt





In [2]:
task_number = 10
hidden_size=512
hidden_layer_num=2
hidden_dropout_prob=0.5
input_dropout_prob=0.2
lamda=1000

epochs_per_task = 100
batch_size = 128
test_size= 8192
fisher_estimation_sample_size = 2048
lr=1.e-3
weight_decay = 0
eval_log_interval = 250
loss_log_interval= 250
cuda=False

# decide whether to use cuda or not.
cuda = torch.cuda.is_available()

# generate permutations for the tasks.
np.random.seed(0)
permutations = [
    np.random.permutation(DATASET_CONFIGS['mnist']['size']**2) for
    _ in range(task_number)
]

# prepare mnist datasets.
train_datasets = [
    get_dataset('mnist', permutation=p) for p in permutations
]
test_datasets = [
    get_dataset('mnist', train=False, permutation=p) for p in permutations
]

    

In [3]:
def loss_plot(x):
    for t, v in x.items():
        plt.plot(list(range((t-1) * epochs_per_task, t * epochs_per_task)), v)

def accuracy_plot(x):
    for t, v in x.items():
        plt.plot(list(range((t-1) * epochs_per_task, task_number * epochs_per_task)), v)
    plt.ylim(0.8, 1)

In [4]:
def avg_precision_plot(precisions, labels = []): #precisions needs to be in the form of the return value of train
    for num, precision in enumerate(precisions):    
        avg_precisions = []
        total_epochs = task_number*epochs_per_task
        for epoch in range (total_epochs):
            avg_precision = 0
            tasks_considered = epoch // epochs_per_task +1 #gives 1 for first task, 2 for second,...
            for i in range(1,tasks_considered+1): #
                avg_precision += precision[i][epoch - (i-1)*epochs_per_task]
            avg_precision/=tasks_considered
            avg_precisions.append(avg_precision)
        plt.ylim(0.88, 0.94)
        if (len(labels) == len(precisions)):
            plt.plot(range(total_epochs), avg_precisions, label = labels[num])
        else:
            plt.plot(range(total_epochs), avg_precisions)
    plt.legend()
    

In [5]:
# prepare the model.
mlp_no_dropout = MLP(
    DATASET_CONFIGS['mnist']['size']**2,
    DATASET_CONFIGS['mnist']['classes'],
    hidden_size,
    hidden_layer_num,
    0,
    0,
    lamda,
)

# initialize the parameters.
utils.gaussian_initialize(mlp_no_dropout)

# prepare the cuda if needed.
if cuda:
    mlp_no_dropout.cuda()




In [6]:
# run the standard experiment.
consolidate = False
standard_prec_no_dropout, standard_total_loss_no_dropout, standard_ce_loss_no_dropout, standard_ewc_loss_no_dropout = train(
    mlp_no_dropout, train_datasets, test_datasets,
    epochs_per_task,
    batch_size,
    test_size,
    consolidate,
    fisher_estimation_sample_size,
    lr,
    weight_decay,
    eval_log_interval,
    loss_log_interval,
    cuda
)

=> task: 1/10 | epoch: 1/100 | progress: [60032/60000] (100%) | prec: 0.08333 | loss => ce: 2.288 / ewc: 0.0 / total: 2.288: : 469it [00:30, 15.16it/s]
=> task: 1/10 | epoch: 2/100 | progress: [60032/60000] (100%) | prec: 0.3854 | loss => ce: 2.227 / ewc: 0.0 / total: 2.227: : 469it [00:29, 16.13it/s]
=> task: 1/10 | epoch: 3/100 | progress: [60032/60000] (100%) | prec: 0.4792 | loss => ce: 2.165 / ewc: 0.0 / total: 2.165: : 469it [00:28, 16.38it/s]
=> task: 1/10 | epoch: 4/100 | progress: [60032/60000] (100%) | prec: 0.6875 | loss => ce: 1.981 / ewc: 0.0 / total: 1.981: : 469it [00:28, 16.41it/s]
=> task: 1/10 | epoch: 5/100 | progress: [60032/60000] (100%) | prec: 0.6562 | loss => ce: 1.781 / ewc: 0.0 / total: 1.781: : 469it [00:28, 16.46it/s]
=> task: 1/10 | epoch: 6/100 | progress: [60032/60000] (100%) | prec: 0.8021 | loss => ce: 1.191 / ewc: 0.0 / total: 1.191: : 469it [00:28, 16.42it/s]
=> task: 1/10 | epoch: 7/100 | progress: [60032/60000] (100%) | prec: 0.8021 | loss => ce: 0.

=> task: 1/10 | epoch: 54/100 | progress: [60032/60000] (100%) | prec: 0.9896 | loss => ce: 0.09801 / ewc: 0.0 / total: 0.09801: : 469it [00:28, 16.26it/s]
=> task: 1/10 | epoch: 55/100 | progress: [60032/60000] (100%) | prec: 0.9271 | loss => ce: 0.2527 / ewc: 0.0 / total: 0.2527: : 469it [00:28, 16.30it/s] 
=> task: 1/10 | epoch: 56/100 | progress: [60032/60000] (100%) | prec: 0.8854 | loss => ce: 0.3338 / ewc: 0.0 / total: 0.3338: : 469it [00:28, 16.27it/s] 
=> task: 1/10 | epoch: 57/100 | progress: [60032/60000] (100%) | prec: 0.9479 | loss => ce: 0.1815 / ewc: 0.0 / total: 0.1815: : 469it [00:28, 16.27it/s]
=> task: 1/10 | epoch: 58/100 | progress: [60032/60000] (100%) | prec: 0.9375 | loss => ce: 0.2587 / ewc: 0.0 / total: 0.2587: : 469it [00:28, 16.33it/s] 
=> task: 1/10 | epoch: 59/100 | progress: [60032/60000] (100%) | prec: 0.9583 | loss => ce: 0.1914 / ewc: 0.0 / total: 0.1914: : 469it [00:28, 16.28it/s] 
=> task: 1/10 | epoch: 60/100 | progress: [60032/60000] (100%) | prec:

=> task: 2/10 | epoch: 6/100 | progress: [60032/60000] (100%) | prec: 0.8542 | loss => ce: 0.5824 / ewc: 0.0 / total: 0.5824: : 469it [00:28, 16.38it/s]
=> task: 2/10 | epoch: 7/100 | progress: [60032/60000] (100%) | prec: 0.8854 | loss => ce: 0.3683 / ewc: 0.0 / total: 0.3683: : 469it [00:29, 15.93it/s]
=> task: 2/10 | epoch: 8/100 | progress: [60032/60000] (100%) | prec: 0.9479 | loss => ce: 0.2826 / ewc: 0.0 / total: 0.2826: : 469it [00:29, 15.67it/s]
=> task: 2/10 | epoch: 9/100 | progress: [60032/60000] (100%) | prec: 0.9062 | loss => ce: 0.3227 / ewc: 0.0 / total: 0.3227: : 469it [00:30, 15.60it/s] 
=> task: 2/10 | epoch: 10/100 | progress: [60032/60000] (100%) | prec: 0.9271 | loss => ce: 0.313 / ewc: 0.0 / total: 0.313: : 469it [00:29, 16.07it/s]   
=> task: 2/10 | epoch: 11/100 | progress: [60032/60000] (100%) | prec: 0.8646 | loss => ce: 0.3983 / ewc: 0.0 / total: 0.3983: : 469it [00:29, 16.10it/s]
=> task: 2/10 | epoch: 12/100 | progress: [60032/60000] (100%) | prec: 0.9062 

=> task: 2/10 | epoch: 58/100 | progress: [60032/60000] (100%) | prec: 1.0 | loss => ce: 0.04877 / ewc: 0.0 / total: 0.04877: : 469it [00:27, 17.18it/s]   
=> task: 2/10 | epoch: 59/100 | progress: [60032/60000] (100%) | prec: 0.9688 | loss => ce: 0.1064 / ewc: 0.0 / total: 0.1064: : 469it [00:27, 17.14it/s]  
=> task: 2/10 | epoch: 60/100 | progress: [60032/60000] (100%) | prec: 0.9583 | loss => ce: 0.1547 / ewc: 0.0 / total: 0.1547: : 469it [00:27, 17.11it/s]  
=> task: 2/10 | epoch: 61/100 | progress: [60032/60000] (100%) | prec: 0.9792 | loss => ce: 0.09334 / ewc: 0.0 / total: 0.09334: : 469it [00:27, 17.23it/s]
=> task: 2/10 | epoch: 62/100 | progress: [60032/60000] (100%) | prec: 0.9896 | loss => ce: 0.05061 / ewc: 0.0 / total: 0.05061: : 469it [00:27, 17.16it/s]
=> task: 2/10 | epoch: 63/100 | progress: [60032/60000] (100%) | prec: 0.9792 | loss => ce: 0.08079 / ewc: 0.0 / total: 0.08079: : 469it [00:27, 17.19it/s]
=> task: 2/10 | epoch: 64/100 | progress: [60032/60000] (100%) |

KeyboardInterrupt: 

In [None]:
loss_plot(standard_total_loss_no_dropout)

In [None]:
accuracy_plot(standard_prec_no_dropout)

In [None]:
avg_precision_plot([standard_prec_no_dropout])

In [None]:
lamda = 400
mlp_consolidation_no_dropout = MLP(
    DATASET_CONFIGS['mnist']['size']**2,
    DATASET_CONFIGS['mnist']['classes'],
    hidden_size,
    hidden_layer_num,
    0,
    0,
    lamda,
)
torch.autograd.set_detect_anomaly(True)

utils.gaussian_initialize(mlp_consolidation_no_dropout)

# run the standard experiment.
consolidate = True
ewc_prec_no_dropout, ewc_total_loss_no_dropout, ewc_ce_loss_no_dropout, ewc_ewc_loss_no_dropout =train(
    mlp_consolidation_no_dropout, train_datasets, test_datasets,
    epochs_per_task,
    batch_size,
    test_size,
    consolidate,
    fisher_estimation_sample_size,
    lr,
    weight_decay,
    eval_log_interval,
    loss_log_interval,
    cuda
)

In [None]:
loss_plot(ewc_total_loss_no_dropout)

In [None]:
accuracy_plot(ewc_prec_no_dropout)

In [None]:
avg_precision_plot([ewc_prec_no_dropout])

In [None]:
# prepare the model.
mlp_dropout = MLP(
    DATASET_CONFIGS['mnist']['size']**2,
    DATASET_CONFIGS['mnist']['classes'],
    hidden_size,
    hidden_layer_num,
    hidden_dropout_prob,
    input_dropout_prob,
    lamda,
)
#different to xavier intialisation ??? test
# initialize the parameters.
utils.gaussian_initialize(mlp_dropout)

# prepare the cuda if needed.
if cuda:
    mlp.cuda()

In [None]:
# run the standard experiment.
consolidate = False
standard_prec_dropout, standard_total_loss_dropout, standard_ce_loss_dropout, standard_ewc_loss_dropout = train(
    mlp_dropout, train_datasets, test_datasets,
    epochs_per_task,
    batch_size,
    test_size,
    consolidate,
    fisher_estimation_sample_size,
    lr,
    weight_decay,
    eval_log_interval,
    loss_log_interval,
    cuda
)

In [None]:
loss_plot(standard_total_loss_dropout)

In [None]:
accuracy_plot(standard_prec_dropout)

In [None]:
avg_precision_plot([standard_prec_dropout])

In [None]:
mlp_consolidation_dropout = MLP(
    DATASET_CONFIGS['mnist']['size']**2,
    DATASET_CONFIGS['mnist']['classes'],
    hidden_size,
    hidden_layer_num,
    hidden_dropout_prob,
    input_dropout_prob,
    lamda,
)

utils.gaussian_initialize(mlp_consolidation_dropout)

# run the standard experiment.
consolidate = True
ewc_prec_dropout, ewc_total_loss_dropout, ewc_ce_loss_dropout, ewc_ewc_loss_dropout =train(
    mlp_consolidation_dropout, train_datasets, test_datasets,
    epochs_per_task,
    batch_size,
    test_size,
    consolidate,
    fisher_estimation_sample_size,
    lr,
    weight_decay,
    eval_log_interval,
    loss_log_interval,
    cuda
)

In [None]:
loss_plot(ewc_total_loss_dropout)

In [None]:
accuracy_plot(ewc_prec_dropout)

In [None]:
avg_precision_plot([ewc_prec_dropout])
    


In [None]:
avg_precision_plot([standard_prec_no_dropout,ewc_prec, standard_prec,  ewc_prec_dropout], ["no dropout", "dropout", "ewc_40", "ewc_100"])

In [None]:
'''lamda = 1000
mlp_consolidation_1000 = MLP(
    DATASET_CONFIGS['mnist']['size']**2,
    DATASET_CONFIGS['mnist']['classes'],
    hidden_size,
    hidden_layer_num,
    hidden_dropout_prob,
    input_dropout_prob,
    lamda,
)

# run the standard experiment.
consolidate = True
ewc_prec_1000, ewc_total_loss_1000, ewc_ce_loss_1000, ewc_ewc_loss_1000 =train(
    mlp_consolidation_1000, train_datasets, test_datasets,
    epochs_per_task,
    batch_size,
    test_size,
    consolidate,
    fisher_estimation_sample_size,
    lr,
    weight_decay,
    eval_log_interval,
    loss_log_interval,
    cuda
)'''

In [None]:
#loss_plot(ewc_total_loss_1000)

In [None]:
#accuracy_plot(ewc_prec_1000)

In [None]:
'''lamda = 10
mlp_consolidation_10 = MLP(
    DATASET_CONFIGS['mnist']['size']**2,
    DATASET_CONFIGS['mnist']['classes'],
    hidden_size,
    hidden_layer_num,
    hidden_dropout_prob,
    input_dropout_prob,
    lamda,
)

# run the standard experiment.
consolidate = True
ewc_prec_10, ewc_total_loss_10, ewc_ce_loss_10, ewc_ewc_loss_10 =train(
    mlp_consolidation_10, train_datasets, test_datasets,
    epochs_per_task,
    batch_size,
    test_size,
    consolidate,
    fisher_estimation_sample_size,
    lr,
    weight_decay,
    eval_log_interval,
    loss_log_interval,
    cuda
)'''

In [None]:
#loss_plot(ewc_total_loss_10)

In [None]:
#accuracy_plot(ewc_prec_10)

In [None]:
'''lamda = 80
mlp_consolidation_80 = MLP(
    DATASET_CONFIGS['mnist']['size']**2,
    DATASET_CONFIGS['mnist']['classes'],
    hidden_size,
    hidden_layer_num,
    hidden_dropout_prob,
    input_dropout_prob,
    lamda,
)

# run the standard experiment.
consolidate = True
ewc_prec_80, ewc_total_loss_80, ewc_ce_loss_80, ewc_ewc_loss_80 =train(
    mlp_consolidation_80, train_datasets, test_datasets,
    epochs_per_task,
    batch_size,
    test_size,
    consolidate,
    fisher_estimation_sample_size,
    lr,
    weight_decay,
    eval_log_interval,
    loss_log_interval,
    cuda
)'''

In [None]:
#loss_plot(ewc_total_loss_80)

In [None]:
#accuracy_plot(ewc_prec_80)