In [1]:
import os
import pickle

import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import torch.nn as nn
import torch.optim
from tqdm.auto import tqdm
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression

import multitask.dataset as dataset
from multitask.models.task_switching import get_task_model
import multitask.models.task_switching.hooks as hooks
from multitask.utils.training import get_device
from multitask.utils.argparse import check_runs

In [3]:
path_data = os.path.join('..', '..', 'data')
path_pickle = os.path.join('pickle', 'results_linear_decoder_all.pickle')
path_model_task_switching = os.path.join('..', '..', 'results', 'task_switching')

In [4]:
num_runs = 10
initial_seed = 6789
max_seed = 10e5
num_epochs = 50
num_hidden = 10 * [100]
batch_size = 100
num_train = 50000
num_test = 10000
tasks_names = ['parity', 'value']
idxs_contexts = list(range(len(num_hidden)))

In [5]:
parameters = {
    'num_runs': num_runs,
    'initial_seed': initial_seed,
    'max_seed': max_seed,
    'num_epochs': num_epochs,
    'num_hidden': num_hidden,
    'batch_size': batch_size,
    'num_train': num_train,
    'num_test': num_test,
    'tasks': tasks_names,
    'idxs_contexts': idxs_contexts
}

data_folder = check_runs(path_model_task_switching, parameters)

Found simulation in ../../results/task_switching with the same parameters (2022-09-28_02_23_14)


In [6]:
pickle_data = os.path.join(data_folder, 'data.pickle')
with open(pickle_data, 'rb') as handle:
    results_task_switching = pickle.load(handle)

In [7]:
seeds = sorted(list(results_task_switching.keys()))
num_seeds = len(seeds)
num_tasks = len(tasks_names)

print(seeds)
print(tasks_names)

[10612, 17350, 130146, 173249, 213794, 341996, 440064, 668870, 858781, 894813]
['parity', 'value']


In [8]:
tasks_datasets = dataset.get_tasks_dict(tasks_names, root=path_data)

task_switching_tasks = {}
num_tasks = len(tasks_names)

for i_context, task_name in enumerate(tasks_names):
    task_switching_tasks[task_name] = {}
    task_switching_tasks[task_name]['data'] = tasks_datasets[task_name]
    task_switching_tasks[task_name]['activations'] = num_tasks * [0]
    task_switching_tasks[task_name]['activations'][i_context] = 1  # Set to 0 for Removed

for key, value in task_switching_tasks.items():
    print(f'{key}: {value["activations"]}')

  return torch.from_numpy(parsed.astype(m[2], copy=False)).view(*s)


parity: [1, 0]
value: [0, 1]


In [9]:
device = get_device()
criterion = nn.CrossEntropyLoss()

seeds_task_swithing  = sorted(list(results_task_switching.keys()))
list_activations = []
list_numbers = []

for i_seed, seed in tqdm(enumerate(seeds_task_swithing), total=num_runs):
    state_dict = results_task_switching[seed]['model']
    model = get_task_model(task_switching_tasks,
                           num_hidden,
                           idxs_contexts,
                           device)
    model.load_state_dict(state_dict)
    
    indices = results_task_switching[seed]['indices']

    test_sampler = dataset.SequentialSampler(indices['test'])
    _, test_dataloaders = dataset.create_dict_dataloaders(task_switching_tasks,
                                                          indices,
                                                          batch_size=batch_size)
    tasks_testloader = dataset.SequentialTaskDataloader(test_dataloaders)

    numbers = test_dataloaders[tasks_names[0]].dataset.numbers.numpy()
    numbers = numbers[indices['test']]

    _, activations = hooks.get_layer_activations(model,
                                                tasks_testloader,
                                                criterion,
                                                device=device,
                                                disable=True)
    
    list_activations.append(activations)
    list_numbers.append(numbers)

  0%|          | 0/10 [00:00<?, ?it/s]

Running on GPU.


100%|██████████| 10/10 [00:05<00:00,  1.81it/s]


In [10]:
num_layers = len(num_hidden)
max_iter = 8000

acc_numbers_all = np.zeros((num_seeds, num_layers))
acc_tasks_all = np.zeros((num_seeds, num_layers))
acc_congruency_all = np.zeros((num_seeds, num_layers))

for i_seed, seed in enumerate(seeds):
    activations = list_activations[i_seed]
    numbers = list_numbers[i_seed]

    labels_numbers = np.hstack((numbers, numbers))
    labels_task = np.concatenate((np.zeros_like(numbers), np.ones_like(numbers)))
    labels_congruency = np.array([1 if number in [0, 2, 4, 5, 7, 9] else 0 for number in labels_numbers])

    for j_layer in tqdm(range(num_layers), desc=f'{i_seed}'):
        activations_decoder = None
        for task in tasks_names:
            activations_task = activations[task][f'layer{j_layer+1}']
            if activations_decoder is None:
                activations_decoder = activations_task
            else:
                activations_decoder = np.vstack((activations_decoder, 
                                                activations_task))
        assert activations_decoder.shape[0] == labels_numbers.shape[0]

        activations_decoder = (activations_decoder - activations_decoder.mean()) / activations_decoder.std()

        # Numbers task
        seed = np.random.randint(0, 1e8, 1)[0]
        X_train, X_test, y_train, y_test = train_test_split(activations_decoder,
                                                            labels_numbers,
                                                            test_size=0.1,
                                                            random_state=seed)
        clf = LogisticRegression(random_state=seed,
                                max_iter=max_iter,
                                tol=1e-3).fit(X_train, y_train)
        acc_numbers_all[i_seed, j_layer] = clf.score(X_test, y_test)

        # Labels task
        seed = np.random.randint(0, 1e8, 1)[0]
        X_train, X_test, y_train, y_test = train_test_split(activations_decoder,
                                                            labels_task,
                                                            test_size=0.1,
                                                            random_state=seed)
        clf = LogisticRegression(random_state=seed,
                                max_iter=max_iter,
                                tol=1e-3).fit(X_train, y_train)
        acc_tasks_all[i_seed, j_layer] = clf.score(X_test, y_test)

        # Congruency task
        seed = np.random.randint(0, 1e8, 1)[0]
        X_train, X_test, y_train, y_test = train_test_split(activations_decoder,
                                                            labels_congruency,
                                                            test_size=0.1,
                                                            random_state=seed)
        clf = LogisticRegression(random_state=seed,
                                max_iter=max_iter,
                                tol=1e-3).fit(X_train, y_train)
        acc_congruency_all[i_seed, j_layer] = clf.score(X_test, y_test)

0: 100%|██████████| 10/10 [01:09<00:00,  6.94s/it]
1: 100%|██████████| 10/10 [01:10<00:00,  7.01s/it]
2: 100%|██████████| 10/10 [01:04<00:00,  6.42s/it]
3: 100%|██████████| 10/10 [00:51<00:00,  5.16s/it]
4: 100%|██████████| 10/10 [01:00<00:00,  6.09s/it]
5: 100%|██████████| 10/10 [01:04<00:00,  6.48s/it]
6: 100%|██████████| 10/10 [01:13<00:00,  7.37s/it]
7: 100%|██████████| 10/10 [01:13<00:00,  7.38s/it]
8: 100%|██████████| 10/10 [01:10<00:00,  7.04s/it]
9: 100%|██████████| 10/10 [01:03<00:00,  6.39s/it]


In [11]:
results = {}
results['numbers'] = acc_numbers_all
results['tasks'] = acc_tasks_all
results['congruency'] = acc_congruency_all

In [12]:
with open(path_pickle, 'wb') as f:
        pickle.dump(results, f, protocol=pickle.HIGHEST_PROTOCOL)