In [1]:
#!/usr/bin/env python3
from argparse import ArgumentParser
import numpy as np
import torch
from data import get_dataset, DATASET_CONFIGS
from train import train
from model import MLP
import utils



import matplotlib.pyplot as plt





In [2]:
task_number = 10
hidden_size1=512
hidden_size2 = 256
hidden_dropout_prob=0.5
input_dropout_prob=0.2
lamda=1000

epochs_per_task = 100
batch_size = 128
test_size= 8192
fisher_estimation_sample_size = 2048
lr=1.e-3
weight_decay = 0
eval_log_interval = 250
loss_log_interval= 250
cuda=False

# decide whether to use cuda or not.
cuda = torch.cuda.is_available()

# generate permutations for the tasks.
np.random.seed(0)
permutations = [
    np.random.permutation(DATASET_CONFIGS['mnist']['size']**2) for
    _ in range(task_number)
]

# prepare mnist datasets.
train_datasets = [
    get_dataset('mnist', permutation=p) for p in permutations
]
test_datasets = [
    get_dataset('mnist', train=False, permutation=p) for p in permutations
]

    

In [3]:
def loss_plot(x):
    for t, v in x.items():
        plt.plot(list(range((t-1) * epochs_per_task, t * epochs_per_task)), v)

def accuracy_plot(x):
    for t, v in x.items():
        plt.plot(list(range((t-1) * epochs_per_task, task_number * epochs_per_task)), v)
    plt.ylim(0.8, 1)

In [4]:
def avg_precision_plot(precisions, labels = []): #precisions needs to be in the form of the return value of train
    for num, precision in enumerate(precisions):    
        avg_precisions = []
        total_epochs = task_number*epochs_per_task
        for epoch in range (total_epochs):
            avg_precision = 0
            tasks_considered = epoch // epochs_per_task +1 #gives 1 for first task, 2 for second,...
            for i in range(1,tasks_considered+1): #
                avg_precision += precision[i][epoch - (i-1)*epochs_per_task]
            avg_precision/=tasks_considered
            avg_precisions.append(avg_precision)
        plt.ylim(0.88, 0.94)
        if (len(labels) == len(precisions)):
            plt.plot(range(total_epochs), avg_precisions, label = labels[num])
        else:
            plt.plot(range(total_epochs), avg_precisions)
    plt.legend()
    

In [5]:
# prepare the model.
mlp_no_dropout = MLP(
    DATASET_CONFIGS['mnist']['size']**2,
    DATASET_CONFIGS['mnist']['classes'],
    hidden_size1,
    hidden_size2,
    0,
    0,
    lamda,
)

# initialize the parameters.
#utils.gaussian_initialize(mlp_no_dropout)

# prepare the cuda if needed.
if cuda:
    mlp_no_dropout.cuda()




In [6]:
# run the standard experiment.
consolidate = False
standard_prec_no_dropout, standard_total_loss_no_dropout, standard_ce_loss_no_dropout, standard_ewc_loss_no_dropout = train(
    mlp_no_dropout, train_datasets, test_datasets,
    epochs_per_task,
    batch_size,
    test_size,
    consolidate,
    fisher_estimation_sample_size,
    lr,
    weight_decay,
    eval_log_interval,
    loss_log_interval,
    cuda
)

=> task: 1/10 | epoch: 1/100 | progress: [1024/60000] (2%) | prec: 0.1094 | loss => ce: -0.01833 / ewc: 0.0 / total: -0.01833: : 8it [00:40,  5.12s/it]


KeyboardInterrupt: 

In [None]:
loss_plot(standard_total_loss_no_dropout)

In [None]:
accuracy_plot(standard_prec_no_dropout)

In [None]:
avg_precision_plot([standard_prec_no_dropout])

In [None]:
lamda = 400
mlp_consolidation_no_dropout = MLP(
    DATASET_CONFIGS['mnist']['size']**2,
    DATASET_CONFIGS['mnist']['classes'],
    hidden_size,
    hidden_layer_num,
    0,
    0,
    lamda,
)
torch.autograd.set_detect_anomaly(True)

utils.gaussian_initialize(mlp_consolidation_no_dropout)

# run the standard experiment.
consolidate = True
ewc_prec_no_dropout, ewc_total_loss_no_dropout, ewc_ce_loss_no_dropout, ewc_ewc_loss_no_dropout =train(
    mlp_consolidation_no_dropout, train_datasets, test_datasets,
    epochs_per_task,
    batch_size,
    test_size,
    consolidate,
    fisher_estimation_sample_size,
    lr,
    weight_decay,
    eval_log_interval,
    loss_log_interval,
    cuda
)

In [None]:
loss_plot(ewc_total_loss_no_dropout)

In [None]:
accuracy_plot(ewc_prec_no_dropout)

In [None]:
avg_precision_plot([ewc_prec_no_dropout])

In [None]:
# prepare the model.
mlp_dropout = MLP(
    DATASET_CONFIGS['mnist']['size']**2,
    DATASET_CONFIGS['mnist']['classes'],
    hidden_size,
    hidden_layer_num,
    hidden_dropout_prob,
    input_dropout_prob,
    lamda,
)
#different to xavier intialisation ??? test
# initialize the parameters.
utils.gaussian_initialize(mlp_dropout)

# prepare the cuda if needed.
if cuda:
    mlp.cuda()

In [None]:
# run the standard experiment.
consolidate = False
standard_prec_dropout, standard_total_loss_dropout, standard_ce_loss_dropout, standard_ewc_loss_dropout = train(
    mlp_dropout, train_datasets, test_datasets,
    epochs_per_task,
    batch_size,
    test_size,
    consolidate,
    fisher_estimation_sample_size,
    lr,
    weight_decay,
    eval_log_interval,
    loss_log_interval,
    cuda
)

In [None]:
loss_plot(standard_total_loss_dropout)

In [None]:
accuracy_plot(standard_prec_dropout)

In [None]:
avg_precision_plot([standard_prec_dropout])

In [None]:
mlp_consolidation_dropout = MLP(
    DATASET_CONFIGS['mnist']['size']**2,
    DATASET_CONFIGS['mnist']['classes'],
    hidden_size,
    hidden_layer_num,
    hidden_dropout_prob,
    input_dropout_prob,
    lamda,
)

utils.gaussian_initialize(mlp_consolidation_dropout)

# run the standard experiment.
consolidate = True
ewc_prec_dropout, ewc_total_loss_dropout, ewc_ce_loss_dropout, ewc_ewc_loss_dropout =train(
    mlp_consolidation_dropout, train_datasets, test_datasets,
    epochs_per_task,
    batch_size,
    test_size,
    consolidate,
    fisher_estimation_sample_size,
    lr,
    weight_decay,
    eval_log_interval,
    loss_log_interval,
    cuda
)

In [None]:
loss_plot(ewc_total_loss_dropout)

In [None]:
accuracy_plot(ewc_prec_dropout)

In [None]:
avg_precision_plot([ewc_prec_dropout])
    


In [None]:
avg_precision_plot([standard_prec_no_dropout,ewc_prec, standard_prec,  ewc_prec_dropout], ["no dropout", "dropout", "ewc_40", "ewc_100"])

In [None]:
'''lamda = 1000
mlp_consolidation_1000 = MLP(
    DATASET_CONFIGS['mnist']['size']**2,
    DATASET_CONFIGS['mnist']['classes'],
    hidden_size,
    hidden_layer_num,
    hidden_dropout_prob,
    input_dropout_prob,
    lamda,
)

# run the standard experiment.
consolidate = True
ewc_prec_1000, ewc_total_loss_1000, ewc_ce_loss_1000, ewc_ewc_loss_1000 =train(
    mlp_consolidation_1000, train_datasets, test_datasets,
    epochs_per_task,
    batch_size,
    test_size,
    consolidate,
    fisher_estimation_sample_size,
    lr,
    weight_decay,
    eval_log_interval,
    loss_log_interval,
    cuda
)'''

In [None]:
#loss_plot(ewc_total_loss_1000)

In [None]:
#accuracy_plot(ewc_prec_1000)

In [None]:
'''lamda = 10
mlp_consolidation_10 = MLP(
    DATASET_CONFIGS['mnist']['size']**2,
    DATASET_CONFIGS['mnist']['classes'],
    hidden_size,
    hidden_layer_num,
    hidden_dropout_prob,
    input_dropout_prob,
    lamda,
)

# run the standard experiment.
consolidate = True
ewc_prec_10, ewc_total_loss_10, ewc_ce_loss_10, ewc_ewc_loss_10 =train(
    mlp_consolidation_10, train_datasets, test_datasets,
    epochs_per_task,
    batch_size,
    test_size,
    consolidate,
    fisher_estimation_sample_size,
    lr,
    weight_decay,
    eval_log_interval,
    loss_log_interval,
    cuda
)'''

In [None]:
#loss_plot(ewc_total_loss_10)

In [None]:
#accuracy_plot(ewc_prec_10)

In [None]:
'''lamda = 80
mlp_consolidation_80 = MLP(
    DATASET_CONFIGS['mnist']['size']**2,
    DATASET_CONFIGS['mnist']['classes'],
    hidden_size,
    hidden_layer_num,
    hidden_dropout_prob,
    input_dropout_prob,
    lamda,
)

# run the standard experiment.
consolidate = True
ewc_prec_80, ewc_total_loss_80, ewc_ce_loss_80, ewc_ewc_loss_80 =train(
    mlp_consolidation_80, train_datasets, test_datasets,
    epochs_per_task,
    batch_size,
    test_size,
    consolidate,
    fisher_estimation_sample_size,
    lr,
    weight_decay,
    eval_log_interval,
    loss_log_interval,
    cuda
)'''

In [None]:
#loss_plot(ewc_total_loss_80)

In [None]:
#accuracy_plot(ewc_prec_80)