# Notebook to extract hidden-states and attention heads activations from bert model predictions

In [1]:
import os
import glob
import torch
import numpy as np
import pandas as pd
from tqdm import tqdm
from model import GPT2Extractor
from tokenizer import tokenize

In [2]:
def check_folder(path):
    """Create adequate folders if necessary."""
    try:
        if not os.path.isdir(path):
            check_folder(os.path.dirname(path))
            os.mkdir(path)
    except:
        pass

Defining variables:

In [3]:
template = '/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/data/text/english/text_english_run*.txt' # path to text input
language = 'english'

Creating iterator for each run:

In [4]:
pretrained_gpt2_models = ['gpt2']
names = ['gpt2']
config_paths = [None]
saving_path_folders = [
    '/Users/alexpsq/Code/Parietal/data/stimuli-representations/{}/gpt2'.format(language)]
#saving_path_folders = [
#    '/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/data/stimuli-representations/{}/gpt2'.format(language)]
prediction_types = ['sentence']


In [5]:
names

['gpt2']

In [6]:
template = '/Users/alexpsq/Code/Parietal/data/text_english_run*.txt' # path to text input


In [7]:
paths = sorted(glob.glob(template))

In [8]:
iterator_list = [tokenize(path, language, train=False) for path in paths]

100%|██████████| 135/135 [00:00<00:00, 658408.19it/s]
100%|██████████| 135/135 [00:00<00:00, 395965.76it/s]
100%|██████████| 176/176 [00:00<00:00, 721600.69it/s]
100%|██████████| 173/173 [00:00<00:00, 827382.66it/s]
100%|██████████| 177/177 [00:00<00:00, 396576.82it/s]
100%|██████████| 216/216 [00:00<00:00, 722463.85it/s]
100%|██████████| 196/196 [00:00<00:00, 855445.98it/s]
100%|██████████| 145/145 [00:00<00:00, 650453.56it/s]
100%|██████████| 207/207 [00:00<00:00, 758934.38it/s]

Tokenizing...
Preprocessing...
Preprocessed.
Tokenized.
Tokenizing...
Preprocessing...
Preprocessed.
Tokenized.
Tokenizing...
Preprocessing...
Preprocessed.
Tokenized.
Tokenizing...
Preprocessing...
Preprocessed.
Tokenized.
Tokenizing...
Preprocessing...
Preprocessed.
Tokenized.
Tokenizing...
Preprocessing...
Preprocessed.
Tokenized.
Tokenizing...
Preprocessing...
Preprocessed.
Tokenized.
Tokenizing...
Preprocessing...
Preprocessed.
Tokenized.
Tokenizing...
Preprocessing...
Preprocessed.
Tokenized.





## Activation extraction

In [9]:
output_attentions = True
output_hidden_states = True

In [10]:
for index, gpt2_model in enumerate(pretrained_gpt2_models):
    extractor = GPT2Extractor(gpt2_model, language, names[index], prediction_types[index], output_hidden_states=output_hidden_states, output_attentions=output_attentions, config_path=config_paths[index])
    print(extractor.name, ' - Extracting activations ...')
    for run_index, iterator in tqdm(enumerate(iterator_list)):
        print("############# Run {} #############".format(run_index))
        check_folder(saving_path_folders[index])
        activations  = extractor.extract_activations(iterator, language)
        hidden_states_activations = activations[0]
        attention_heads_activations = activations[1]
        activations = pd.concat([hidden_states_activations, attention_heads_activations], axis=1)
        
        activations.to_csv(os.path.join(saving_path_folders[index], 'activations_run{}.csv'.format(run_index + 1)), index=False)
        

0it [00:00, ?it/s]

gpt2  - Extracting activations ...
############# Run 0 #############


1it [00:54, 54.52s/it]

############# Run 1 #############


2it [01:54, 56.03s/it]

############# Run 2 #############


3it [03:01, 59.34s/it]

############# Run 3 #############


4it [04:03, 60.20s/it]

############# Run 4 #############


4it [04:43, 70.80s/it]


KeyboardInterrupt: 

In [11]:
extractor.model

GPT2Model(
  (wte): Embedding(50257, 768)
  (wpe): Embedding(1024, 768)
  (drop): Dropout(p=0.1, inplace=False)
  (h): ModuleList(
    (0): Block(
      (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (attn): Attention(
        (c_attn): Conv1D()
        (c_proj): Conv1D()
        (attn_dropout): Dropout(p=0.1, inplace=False)
        (resid_dropout): Dropout(p=0.1, inplace=False)
      )
      (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (mlp): MLP(
        (c_fc): Conv1D()
        (c_proj): Conv1D()
        (dropout): Dropout(p=0.1, inplace=False)
      )
    )
    (1): Block(
      (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (attn): Attention(
        (c_attn): Conv1D()
        (c_proj): Conv1D()
        (attn_dropout): Dropout(p=0.1, inplace=False)
        (resid_dropout): Dropout(p=0.1, inplace=False)
      )
      (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (mlp): MLP(
        (c_fc): Conv1D

In [14]:
for p in list(extractor.model.named_parameters()):
    print(p[0])

wte.weight
wpe.weight
h.0.ln_1.weight
h.0.ln_1.bias
h.0.attn.c_attn.weight
h.0.attn.c_attn.bias
h.0.attn.c_proj.weight
h.0.attn.c_proj.bias
h.0.ln_2.weight
h.0.ln_2.bias
h.0.mlp.c_fc.weight
h.0.mlp.c_fc.bias
h.0.mlp.c_proj.weight
h.0.mlp.c_proj.bias
h.1.ln_1.weight
h.1.ln_1.bias
h.1.attn.c_attn.weight
h.1.attn.c_attn.bias
h.1.attn.c_proj.weight
h.1.attn.c_proj.bias
h.1.ln_2.weight
h.1.ln_2.bias
h.1.mlp.c_fc.weight
h.1.mlp.c_fc.bias
h.1.mlp.c_proj.weight
h.1.mlp.c_proj.bias
h.2.ln_1.weight
h.2.ln_1.bias
h.2.attn.c_attn.weight
h.2.attn.c_attn.bias
h.2.attn.c_proj.weight
h.2.attn.c_proj.bias
h.2.ln_2.weight
h.2.ln_2.bias
h.2.mlp.c_fc.weight
h.2.mlp.c_fc.bias
h.2.mlp.c_proj.weight
h.2.mlp.c_proj.bias
h.3.ln_1.weight
h.3.ln_1.bias
h.3.attn.c_attn.weight
h.3.attn.c_attn.bias
h.3.attn.c_proj.weight
h.3.attn.c_proj.bias
h.3.ln_2.weight
h.3.ln_2.bias
h.3.mlp.c_fc.weight
h.3.mlp.c_fc.bias
h.3.mlp.c_proj.weight
h.3.mlp.c_proj.bias
h.4.ln_1.weight
h.4.ln_1.bias
h.4.attn.c_attn.weight
h.4.attn.c_at

### Generate control activations

In [76]:
bert_model = 'gpt2'
language = 'english'
name = 'gpt2_control_'
prediction_type = 'sentence'
saving_path_folder = '/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/data/stimuli-representations/{}'.format(language)
seeds = [24, 213, 1111, 61, 183]

In [77]:
def randomize_layer(model, layer_nb):
    """Randomize layer weights and put bias to zero.
    The input "layer_nb" goes from 1 to 12 to be coherent with the rest of the analysis.
    It is then transfomed in the function.
    """
    layer_nb = layer_nb - 1
    model.h[layer_nb].ln_1.weight = torch.nn.parameter.Parameter(torch.rand_like(model.h[layer_nb].ln_1.weight))
    model.h[layer_nb].ln_1.bias = torch.nn.parameter.Parameter(torch.zeros_like(model.h[layer_nb].ln_1.bias))
    model.h[layer_nb].attn.c_attn.weight = torch.nn.parameter.Parameter(torch.rand_like(model.h[layer_nb].attn.c_attn.weight))
    model.h[layer_nb].attn.c_attn.bias = torch.nn.parameter.Parameter(torch.zeros_like(model.h[layer_nb].attn.c_attn.bias))
    model.h[layer_nb].attn.c_proj.weight = torch.nn.parameter.Parameter(torch.rand_like(model.h[layer_nb].attn.c_proj.weight))
    model.h[layer_nb].attn.c_proj.bias = torch.nn.parameter.Parameter(torch.zeros_like(model.h[layer_nb].attn.c_proj.bias))
    model.h[layer_nb].ln_2.weight = torch.nn.parameter.Parameter(torch.rand_like(model.h[layer_nb].ln_2.weight))
    model.h[layer_nb].ln_2.bias = torch.nn.parameter.Parameter(torch.zeros_like(model.h[layer_nb].ln_2.bias))
    model.h[layer_nb].mlp.c_fc.weight = torch.nn.parameter.Parameter(torch.rand_like(model.h[layer_nb].mlp.c_fc.weight))
    model.h[layer_nb].mlp.c_fc.bias = torch.nn.parameter.Parameter(torch.zeros_like(model.h[layer_nb].mlp.c_fc.bias))
    model.h[layer_nb].mlp.c_proj.weight = torch.nn.parameter.Parameter(torch.rand_like(model.h[layer_nb].mlp.c_proj.weight))
    model.h[layer_nb].mlp.c_proj.bias = torch.nn.parameter.Parameter(torch.zeros_like(model.h[layer_nb].mlp.c_proj.bias))
    return model

In [78]:
def randomize_ln_1(model, layer_nb):
    """Randomize attention query weights of a given layer and put bias to zero.
    """
    layer_nb = layer_nb - 1
    model.h[layer_nb].ln_1.weight = torch.nn.parameter.Parameter(torch.rand_like(model.h[layer_nb].ln_1.weight))
    model.h[layer_nb].ln_1.bias = torch.nn.parameter.Parameter(torch.zeros_like(model.h[layer_nb].ln_1.bias))
    return model

def randomize_attention_c_attn(model, layer_nb):
    """Randomize attention key weights of a given layer and put bias to zero.
    """
    layer_nb = layer_nb - 1
    model.h[layer_nb].attn.c_attn.weight = torch.nn.parameter.Parameter(torch.rand_like(model.h[layer_nb].attn.c_attn.weight))
    model.h[layer_nb].attn.c_attn.bias = torch.nn.parameter.Parameter(torch.zeros_like(model.h[layer_nb].attn.c_attn.bias))
    return model

def randomize_attention_c_proj(model, layer_nb):
    """Randomize attention value weights of a given layer and put bias to zero.
    """
    layer_nb = layer_nb - 1
    model.h[layer_nb].attn.c_proj.weight = torch.nn.parameter.Parameter(torch.rand_like(model.h[layer_nb].attn.c_proj.weight))
    model.h[layer_nb].attn.c_proj.bias = torch.nn.parameter.Parameter(torch.zeros_like(model.h[layer_nb].attn.c_proj.bias))
    return model

def randomize_ln_2(model, layer_nb):
    """Randomize attention dense network weights of a given layer and put bias to zero.
    """
    layer_nb = layer_nb - 1
    model.h[layer_nb].ln_2.weight = torch.nn.parameter.Parameter(torch.rand_like(model.h[layer_nb].ln_2.weight))
    model.h[layer_nb].ln_2.bias = torch.nn.parameter.Parameter(torch.zeros_like(model.h[layer_nb].ln_2.bias))
    return model


def randomize_mlp_c_fc(model, layer_nb):
    """Randomize intermediate dense network weights of a given layer and put bias to zero.
    """
    layer_nb = layer_nb - 1
    model.h[layer_nb].mlp.c_fc.weight = torch.nn.parameter.Parameter(torch.rand_like(model.h[layer_nb].mlp.c_fc.weight))
    model.h[layer_nb].mlp.c_fc.bias = torch.nn.parameter.Parameter(torch.zeros_like(model.h[layer_nb].mlp.c_fc.bias))
    return model

def randomize_mlp_c_proj(model, layer_nb):
    """Randomize output dense network weights of a given layer and put bias to zero.
    """
    layer_nb = layer_nb - 1
    model.h[layer_nb].mlp.c_proj.weight = torch.nn.parameter.Parameter(torch.rand_like(model.h[layer_nb].mlp.c_proj.weight))
    model.h[layer_nb].mlp.c_proj.bias = torch.nn.parameter.Parameter(torch.zeros_like(model.h[layer_nb].mlp.c_proj.bias))
    return model


In [79]:
def randomize_embeddings(model):
    """Randomize embeddings weights and put bias to zero.
    """
    model.wte.weight = torch.nn.parameter.Parameter(torch.rand_like(model.wte.weight))
    model.wpe.weight = torch.nn.parameter.Parameter(torch.rand_like(model.wpe.weight))
    return model

In [None]:
for seed in seeds:
    set_seed(seed)
    for layer in range(13):
        extractor = GPT2Extractor(gpt2_model, language, name, prediction_type, output_hidden_states=True, output_attentions=True, config_path=None)
        if layer==0:
            extractor.model = randomize_embeddings(extractor.model)
        else:
            extractor.model = randomize_layer(extractor.model, layer)
        print(extractor.name + str(seed), ' - Extracting activations for layer {}...'.format(layer))
        for run_index, iterator in tqdm(enumerate(iterator_list)):
            print("############# Run {} #############".format(run_index))
            activations  = extractor.extract_activations(iterator, language)
            hidden_states_activations = activations[0]
            attention_heads_activations = activations[1]
            activations = pd.concat([hidden_states_activations, attention_heads_activations], axis=1)

            # activations
            heads = np.arange(1, 13)
            columns_to_retrieve = ['hidden_state-layer-{}-{}'.format(layer, i) for i in range(1, 769)]
            if layer > 0:
                columns_to_retrieve += ['attention-layer-{}-head-{}-{}'.format(layer, head, i) for head in heads for i in range(1, 65)]
            activations = activations[columns_to_retrieve]

            save_path = os.path.join(saving_path_folder, name + str(seed) + '_layer-{}'.format(layer))
            check_folder(save_path)
            print('\tSaving in {}.'.format(save_path))
            activations.to_csv(os.path.join(save_path, 'activations_run{}.csv'.format(run_index + 1)), index=False)
