In [1]:
from lib.abstract_torch import get_device, get_loss, get_optimizer
from lib.dataloader import get_MNIST_loaders, get_FMNIST_loaders, get_CIFAR100_loaders, get_CIFAR10_loaders
from lib.model import ANN
from lib.train import train
from lib.test import test
from lib.growth_schedules import get_CL_schedule
from lib.visualize import (visualize_pathes,
                          visualize_statistical_reliability,
                          visualize_box_plot)               

import os
import numpy as np

In [2]:
device = get_device()

In [3]:
path = os.path.dirname(os.path.abspath("__file__"))
data_path = path + "\\data"

# MNIST

## Get data loaders

In [4]:
batch_size=128

In [5]:
# Get data for task 2
train_loader_0_to_4, val_loader_0_to_4, test_loader_0_to_4 = get_MNIST_loaders(data_path, range(5), batch_size)
train_loader_0_to_9, val_loader_0_to_9, test_loader_0_to_9 = get_MNIST_loaders(data_path, range(10), batch_size)

train_loaders, val_loaders, test_loaders = [], [], []

for i in range(10) :
    class_name = [i]
    train_loader, val_loader, test_loader = get_MNIST_loaders(data_path, [i], batch_size)
    train_loaders.append(train_loader)
    val_loaders.append(val_loader)
    test_loaders.append(test_loader)

## Random Initialization

In [6]:
init_name = "gradmax"
savefig = "MNIST_random_CL"

### Define, train and test both root & target models

In [7]:
# Network's initial architecture
num_inputs = 28*28
num_hidden = 50
num_outputs = 10

# Network's final architecture
num_hidden_target = 95

# Loss & optimizer
loss_name = "MSE" # "CE"
optimizer_name = "Adam"

# Hyperparameters
num_epochs = 3
lr = 5e-3
growth_schedule = None

# Experiment parameters
num_repetitions = 1

#### Root model

#### Target model

### Grow root model

In [8]:
# Growth parameters
num_neurons = 9
lr_root = 1e-3
lr_growth = 2e-4

In [9]:
growth_schedules = get_CL_schedule(num_neurons)

In [10]:
test_accs_matrices_repeted = []
for i in range(num_repetitions) :
    test_accs_matrices = []
    for growth_schedule in growth_schedules :
        # Define & pretrain the root model
        root_model = ANN(num_inputs, num_hidden, num_outputs).to(device)
        _ = train(root_model, num_outputs, None, loss_name, optimizer_name, lr_root, train_loader_0_to_4, val_loader_0_to_4, num_epochs, batch_size, device, verbose=0)
        # Initialize the matrix containing test accuracies on various task
        test_accs_matrix = np.zeros((6,10))
        for j in range(5) :
            test_accs_matrix[0,j] = round(test(root_model, test_loaders[j], batch_size, device),2)
        for i, train_loader, val_loader in zip(range(5,10), train_loaders[5:], val_loaders[5:]):
            print("iteration :", i)
            _ = train(root_model,
                      num_outputs,
                      growth_schedule[i-5],
                      loss_name, 
                      optimizer_name, lr_growth, 
                      train_loaders[i], val_loaders[i], 
                      2, batch_size, 
                      device,
                      init_name=init_name,
                      verbose=0)
            for j in range(i+1) :
                test_accs_matrix[i-4,j] = round(test(root_model, test_loaders[j], batch_size, device),2)
        test_accs_matrices.append(test_accs_matrix)
    test_accs_matrices_repeted.append(test_accs_matrices)

  0%|                                                                                            | 0/3 [00:00<?, ?it/s]

grad : tensor([ 0.0001,  0.0001, -0.0001,  0.0002,  0.0002,  0.0002,  0.0002,  0.0002,
         0.0002,  0.0002], device='cuda:0')


 33%|████████████████████████████                                                        | 1/3 [00:04<00:08,  4.08s/it]

grad : tensor([ 5.3189e-05,  6.4909e-05, -2.0389e-04,  5.6954e-05,  5.6337e-05,
         3.0880e-06,  4.7856e-06,  5.5083e-06,  5.5670e-06,  5.6702e-06],
       device='cuda:0')


 67%|████████████████████████████████████████████████████████                            | 2/3 [00:08<00:04,  4.02s/it]

grad : tensor([ 7.8131e-05,  1.7348e-05,  8.9368e-05,  3.6957e-05, -2.3138e-04,
         1.7819e-06,  3.0771e-06,  2.8782e-06,  3.5145e-06,  3.0906e-06],
       device='cuda:0')


100%|████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:11<00:00,  3.91s/it]


iteration : 5


 50%|██████████████████████████████████████████                                          | 1/2 [00:00<00:00,  1.44it/s]

grad : tensor([ 6.1155e-05,  6.1471e-06,  4.4127e-06,  3.9942e-05,  1.8362e-05,
        -7.9795e-05,  3.4234e-07,  7.2138e-07,  3.1883e-07,  4.8288e-07],
       device='cuda:0')


100%|████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:01<00:00,  1.28it/s]


iteration : 6


  0%|                                                                                            | 0/2 [00:00<?, ?it/s]


KeyboardInterrupt: 

In [None]:
[ 6.1155e-05,  6.1471e-06,  4.4127e-06,  3.9942e-05,  1.8362e-05,
 -7.9795e-05,  3.4234e-07,  7.2138e-07,  3.1883e-07,  4.8288e-07]

In [None]:
[ 1.4339e-05,  2.7668e-06,  5.7152e-05,  8.6110e-05,  1.0572e-05,
 -3.4817e-05,  1.8731e-06,  5.3046e-07,  5.1305e-06,  4.9971e-07]

In [None]:
[ 2.0637e-04,  4.7135e-07,  4.2411e-05,  3.6538e-05,  1.1733e-05,
 -3.2191e-05,  4.4685e-07,  3.1756e-07,  2.0377e-06,  3.4610e-07]

In [None]:
print(test_accs_matrices_repeted[0][0])

In [None]:
np.array(test_accs_matrices_repeted[0][0]).sum(axis=1)

In [None]:
root_model.named_parameters

In [None]:
for j in range(10) :
    i = 0
    for b in train_loaders[j] :
        i += 1
    print(i)

### Visualize results