# Notebook to extract hidden-states and attention heads activations from gpt2 model predictions

In [1]:
import os
import glob
import torch
import gc
import numpy as np
import pandas as pd
from tqdm import tqdm
from model import GPT2Extractor
from sklearn.preprocessing import StandardScaler
from tokenizer import tokenize
from utils import set_seed
from numpy import linalg as la
import matplotlib.pyplot as plt

In [2]:
def check_folder(path):
    """Create adequate folders if necessary."""
    try:
        if not os.path.isdir(path):
            check_folder(os.path.dirname(path))
            os.mkdir(path)
    except:
        pass

In [3]:
def transform(activations, path, name, run_index, n_layers_hidden=13, n_layers_attention=12, hidden_size=768):
    assert activations.values.shape[1] == (n_layers_hidden + n_layers_attention) * hidden_size
    indexes = [[index*hidden_size, (index+1)*hidden_size] for index in range(n_layers_hidden + n_layers_attention)]
    for order in [2]: # np.inf
        matrices = []
        for i, index in enumerate(indexes):
            matrix = activations.values[:, index[0]:index[1]]
            #with_std = True if order=='std' else False
            #scaler = StandardScaler(with_mean=True, with_std=with_std)
            #scaler.fit(matrix)
            #matrix = scaler.transform(matrix)
            if order is not None and order != 'std':
                matrix = matrix / np.mean(la.norm(matrix, ord=order, axis=1))
            matrices.append(matrix)
        matrices = np.hstack(matrices)
        new_data = pd.DataFrame(matrices, columns=activations.columns)
        new_path = path + '_norm-' + str(order).replace('np.', '')
        check_folder(new_path)
        new_data.to_csv(os.path.join(new_path, name + '_run{}.csv'.format(run_index + 1)), index=False)


Defining variables:

In [4]:
template = '/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/data/text/english/text_english_run*.txt' # path to text input
language = 'english'

Creating iterator for each run:

In [5]:
template = '/Users/alexpsq/Code/Parietal/data/text_english_run*.txt' # path to text input


In [6]:
paths = sorted(glob.glob(template))

In [7]:
iterator_list = [tokenize(path, language, train=False) for path in paths]

100%|██████████| 135/135 [00:00<00:00, 730620.70it/s]
100%|██████████| 135/135 [00:00<00:00, 546555.06it/s]
100%|██████████| 176/176 [00:00<00:00, 972592.23it/s]
100%|██████████| 173/173 [00:00<00:00, 993992.59it/s]
100%|██████████| 177/177 [00:00<00:00, 972990.57it/s]
100%|██████████| 216/216 [00:00<00:00, 1235974.98it/s]
100%|██████████| 196/196 [00:00<00:00, 1174405.12it/s]
100%|██████████| 145/145 [00:00<00:00, 393895.13it/s]
100%|██████████| 207/207 [00:00<00:00, 631433.40it/s]


In [8]:
#import utils
#import seaborn as sns
#import matplotlib.pyplot as plt
#from transformers import GPT2Tokenizer
#
#tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
#
#lengths = []
#
#for index in range(9):
#    batches, indexes = utils.batchify_per_sentence_with_context(
#                iterator_list[index], 
#                1, 
#                5, 
#                'gpt2', 
#                max_length=512)
#    #lengths.append(np.array(sorted([len(item.split()) for item in batches])))
#    lengths.append(np.array(sorted([len(tokenizer.tokenize(item)) for item in batches])))
#
#    sns.boxplot(lengths[-1])
#    plt.show()
#    print()
#
#print(np.mean(np.array([np.mean(item) for item in lengths])))
#print(np.median(np.array([np.median(item) for item in lengths])))
#print(np.mean(np.array([np.median(item) for item in lengths])))

In [9]:
#from transformers import GPT2Model
#GPT2Model.from_pretrained('gpt2').config

In [10]:
#import utils
#config = {'stop_attention_at_sent': None,
#          'number_of_sentence': 1,
#          'stop_attention_before_sent': 0
#         }
#
#batches, indexes = utils.batchify_with_detailed_indexes(
#            iterator_list[0], 
#            config['number_of_sentence'], 
#            20, 
#            'gpt2', 
#            1024,
#            config['stop_attention_at_sent'],
#            config['stop_attention_before_sent'],
#    True
#        )
#
#
#
#indexes_tmp = [(indexes[i][-config['number_of_sentence']][0], indexes[i][-1][1]) for i in range(len(indexes))]
#indexes_tmp[0] = (indexes[0][0][0], indexes[0][-1][1])
#print(indexes_tmp)
#for i in indexes_tmp:
#    print(i[1]-i[0])
#
#
#
#from transformers import GPT2Tokenizer, GPT2Model
#tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
#model = GPT2Model.from_pretrained('gpt2', output_hidden_states=True)
#
#m = 0
#hidden_states_activations = []
#
#for index, batch in enumerate(batches):
#    batch = batch.strip() # Remove trailing character
#    #batch = '<|endoftext|> ' + batch + ' <|endoftext|>'
#    
#    tokenized_text = tokenizer.tokenize(batch, add_prefix_space=False)
#    inputs_ids = torch.tensor([tokenizer.convert_tokens_to_ids(tokenized_text)])
#    attention_mask = torch.tensor([[1 for x in tokenized_text]])
#
#    if (config['stop_attention_at_sent'] is not None) and (index > 0):
#        attention_mask[:, :1 + indexes[index][-config['stop_attention_at_sent']-config['number_of_sentence']][0]] = 0
#        if self.config['stop_attention_before_sent'] < 0:
#            attention_mask[:, 1 + indexes[index][-config['stop_attention_at_sent']-config['number_of_sentence']][0]: 1 + indexes[index][-config['stop_attention_at_sent']-config['number_of_sentence']][0]-config['stop_attention_before_sent']] = 0
#        elif self.config['stop_attention_before_sent'] > 0:
#            attention_mask[:, 1 + indexes[index][-config['stop_attention_at_sent']-config['number_of_sentence']][0]-config['stop_attention_before_sent']: 1 + indexes[index][-config['stop_attention_at_sent']-config['number_of_sentence']][0]] = 1
#    mapping = utils.match_tokenized_to_untokenized(tokenized_text, batch)
#    
#    with torch.no_grad():
#        encoded_layers = model(inputs_ids, attention_mask=attention_mask) 
#    hidden_states_activations_ = np.vstack(encoded_layers[2]) # retrieve all the hidden states (dimension = layer_count * len(tokenized_text) * feature_count)
#    a = utils.extract_activations_from_token_activations(hidden_states_activations_, mapping, indexes_tmp[index])
#    #m += len(tokenized_text[indexes_tmp[index][0]:indexes_tmp[index][1]])
#    m += len(a)
#    #print(len(a), a[0].shape)
#    #print(len(tokenized_text), indexes_tmp[index][0], indexes_tmp[index][1])
#    #print(tokenized_text[indexes_tmp[index][0]:indexes_tmp[index][1]])
#    #print(np.array(tokenized_text)[attention_mask.detach().numpy()[0].astype(bool)])
#    #print(mapping)
#    key_start = None
#    key_stop = None
#    for key_, value in mapping.items(): 
#        if (value[0])== (indexes_tmp[index][0]): #because we added <|endoftext|> token at the beginning
#            key_start = key_
#
#    #print(key_start, len(mapping.keys()))
#    #print(tokenized_text)
#    for word_index in range(key_start, len(mapping.keys())): # len(mapping.keys()) - 1
#        print([index for index in mapping[word_index]])
#    #print()
#    #a=input()
#    #if a!='a':
#    #    break
#print(m)

## Activation extraction

In [14]:
pretrained_gpt2_models = ['gpt2'] 
names = [
    'gpt2_pre-5_1_token-2'
        ]
config_paths = [None] * 56
saving_path_folders = [
    '/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/data/stimuli-representations/{}/gpt2_pre-5_1_token-2'.format(language)
    
]
prediction_types = ['sentence'] * 56
number_of_sentence_list = [1] * 56
number_of_sentence_before_list = [2]
attention_length_before_list = [2] * 56
stop_attention_at_sent_list = [None] * 56
stop_attention_before_sent_list = [0] * 56


In [15]:
output_attentions = False
output_hidden_states = True

In [16]:
for index, gpt2_model in enumerate(pretrained_gpt2_models):
    extractor = GPT2Extractor(gpt2_model, 
                              language, 
                              names[index], 
                              prediction_types[index],
                              output_hidden_states=output_hidden_states, 
                              output_attentions=output_attentions,
                              attention_length_before=attention_length_before_list[index],
                              config_path=config_paths[index],
                              max_length=512, 
                              number_of_sentence=number_of_sentence_list[index], 
                              number_of_sentence_before=number_of_sentence_before_list[index],
                              stop_attention_at_sent=stop_attention_at_sent_list[index],
                              stop_attention_before_sent=stop_attention_before_sent_list[index]
                             )
    print(extractor.name, ' - Extracting activations ...')
    for run_index, iterator in tqdm(enumerate(iterator_list)):
        gc.collect()
        print("############# Run {} #############".format(run_index + 1))
        activations  = extractor.extract_activations(iterator, language)
        hidden_states_activations = activations[0]
        attention_heads_activations = activations[1]
        #activations = pd.concat([hidden_states_activations, attention_heads_activations], axis=1)
        print(hidden_states_activations.shape)
        #transform(
        #    hidden_states_activations, 
        #    saving_path_folders[index], 
        #    'activations', 
        #    run_index=run_index,
        #    n_layers_hidden=13,
        #    n_layers_attention=0, 
        #    hidden_size=768)
        
        #transform(cls_activations, saving_path_folders[index], 'cls')
        #transform(sep_activations, saving_path_folders[index], 'sep')
        
        #check_folder(saving_path_folders[index])
        #hidden_states_activations.to_csv(os.path.join(saving_path_folders[index], 'activations_run{}.csv'.format(run_index + 1)), index=False)
        #activations.to_csv(os.path.join(saving_path_folders[index], 'activations_run{}.csv'.format(run_index + 1)), index=False)
        #cls_activations.to_csv(os.path.join(saving_path_folders[index], 'cls_run{}.csv'.format(run_index + 1)), index=False)
        #sep_activations.to_csv(os.path.join(saving_path_folders[index], 'sep_run{}.csv'.format(run_index + 1)), index=False)
        del activations
        del hidden_states_activations
        #del attention_heads_activations

0it [00:00, ?it/s]

gpt2_pre-5_1_token-2  - Extracting activations ...
############# Run 0 #############
 Once ,  when  I  was  six  years  old ,  I  saw  a  magnificent  picture  in  a  book  about  the  primeval  forest  called  ‘  Real  -  life  Stories .  ’  It  showed  a  boa  constrictor  swallowing  a  wild  animal .
 Here  is  a  copy  of  the  drawing .
 It  said  in  the  book  :  “  Boa  constrictors  swallow  their  prey  whole ,  without  chewing .
 Then  they  are  not  able  to  move ,  and  they  sleep  for  the  six  months  it  takes  for  digestion .  ”
 So  I  thought  a  lot  about  the  adventures  of  the  jungle  and ,  in  turn ,  I  managed ,  with  a  coloured  pencil ,  to  make  my  first  drawing .
 My  Drawing  Number  one .
 It  looked  like  this  :  I  showed  my  masterpiece  to  the  grownups  and  I  asked  them  if  my  drawing  frightened  them .
 They  answered  me  :  “  Why  would  anyone  be  frightened  by  a  hat ?  ”
 My  drawing  was  not  of  a  hat .
 It  s

0it [00:03, ?it/s]


KeyboardInterrupt: 

In [None]:
for index, bert_model in enumerate(pretrained_bert_models):
    extractor = BertExtractor(bert_model, 
                              language, 
                              names[index], 
                              prediction_types[index], 
                              output_hidden_states=True, 
                              output_attentions=True, 
                              config_path=config_paths[index], 
                              max_length=512, 
                              number_of_sentence=number_of_sentence_list[index], 
                              number_of_sentence_before=number_of_sentence_before_list[index], 
                              number_of_sentence_after=number_of_sentence_after_list[index])
    print(extractor.name, ' - Extracting activations ...')
    for run_index, iterator in tqdm(enumerate(iterator_list)):
        print("############# Run {} #############".format(run_index))
        activations  = extractor.extract_activations(iterator, language)
        hidden_states_activations = activations[0]
        attention_heads_activations = activations[1]
        (cls_hidden_states_activations, cls_attention_activations) = activations[2]
        (sep_hidden_states_activations, sep_attention_activations) = activations[3]
        activations = pd.concat([hidden_states_activations, attention_heads_activations], axis=1)
        cls_activations = pd.concat([cls_hidden_states_activations, cls_attention_activations], axis=1)
        sep_activations = pd.concat([sep_hidden_states_activations, sep_attention_activations], axis=1)
        
        transform(activations, saving_path_folders[index], 'activations', run_index=run_index)
        #transform(cls_activations, saving_path_folders[index], 'cls')
        #transform(sep_activations, saving_path_folders[index], 'sep')
        
        #activations.to_csv(os.path.join(saving_path_folders[index], 'activations_run{}.csv'.format(run_index + 1)), index=False)
        #cls_activations.to_csv(os.path.join(saving_path_folders[index], 'cls_run{}.csv'.format(run_index + 1)), index=False)
        #sep_activations.to_csv(os.path.join(saving_path_folders[index], 'sep_run{}.csv'.format(run_index + 1)), index=False)
        del activations
        del cls_activations
        del sep_activations
        del hidden_states_activations
        del attention_heads_activations
        del cls_hidden_states_activations
        del cls_attention_activations
        del sep_hidden_states_activations
        del sep_attention_activations

In [None]:
extractor.model

In [None]:
for p in list(extractor.model.named_parameters()):
    print(p[0])

### Generate control activations

In [None]:
bert_model = 'gpt2'
language = 'english'
name = 'gpt2_control_'
prediction_type = 'sentence'
saving_path_folder = '/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/data/stimuli-representations/{}'.format(language)
seeds = [24, 213, 1111, 61, 183]

In [None]:
def randomize_layer(model, layer_nb):
    """Randomize layer weights and put bias to zero.
    The input "layer_nb" goes from 1 to 12 to be coherent with the rest of the analysis.
    It is then transfomed in the function.
    """
    layer_nb = layer_nb - 1
    model.h[layer_nb].ln_1.weight = torch.nn.parameter.Parameter(torch.rand_like(model.h[layer_nb].ln_1.weight))
    model.h[layer_nb].ln_1.bias = torch.nn.parameter.Parameter(torch.zeros_like(model.h[layer_nb].ln_1.bias))
    model.h[layer_nb].attn.c_attn.weight = torch.nn.parameter.Parameter(torch.rand_like(model.h[layer_nb].attn.c_attn.weight))
    model.h[layer_nb].attn.c_attn.bias = torch.nn.parameter.Parameter(torch.zeros_like(model.h[layer_nb].attn.c_attn.bias))
    model.h[layer_nb].attn.c_proj.weight = torch.nn.parameter.Parameter(torch.rand_like(model.h[layer_nb].attn.c_proj.weight))
    model.h[layer_nb].attn.c_proj.bias = torch.nn.parameter.Parameter(torch.zeros_like(model.h[layer_nb].attn.c_proj.bias))
    model.h[layer_nb].ln_2.weight = torch.nn.parameter.Parameter(torch.rand_like(model.h[layer_nb].ln_2.weight))
    model.h[layer_nb].ln_2.bias = torch.nn.parameter.Parameter(torch.zeros_like(model.h[layer_nb].ln_2.bias))
    model.h[layer_nb].mlp.c_fc.weight = torch.nn.parameter.Parameter(torch.rand_like(model.h[layer_nb].mlp.c_fc.weight))
    model.h[layer_nb].mlp.c_fc.bias = torch.nn.parameter.Parameter(torch.zeros_like(model.h[layer_nb].mlp.c_fc.bias))
    model.h[layer_nb].mlp.c_proj.weight = torch.nn.parameter.Parameter(torch.rand_like(model.h[layer_nb].mlp.c_proj.weight))
    model.h[layer_nb].mlp.c_proj.bias = torch.nn.parameter.Parameter(torch.zeros_like(model.h[layer_nb].mlp.c_proj.bias))
    return model

In [None]:
def randomize_ln_1(model, layer_nb):
    """Randomize attention query weights of a given layer and put bias to zero.
    """
    layer_nb = layer_nb - 1
    model.h[layer_nb].ln_1.weight = torch.nn.parameter.Parameter(torch.rand_like(model.h[layer_nb].ln_1.weight))
    model.h[layer_nb].ln_1.bias = torch.nn.parameter.Parameter(torch.zeros_like(model.h[layer_nb].ln_1.bias))
    return model

def randomize_attention_c_attn(model, layer_nb):
    """Randomize attention key weights of a given layer and put bias to zero.
    """
    layer_nb = layer_nb - 1
    model.h[layer_nb].attn.c_attn.weight = torch.nn.parameter.Parameter(torch.rand_like(model.h[layer_nb].attn.c_attn.weight))
    model.h[layer_nb].attn.c_attn.bias = torch.nn.parameter.Parameter(torch.zeros_like(model.h[layer_nb].attn.c_attn.bias))
    return model

def randomize_attention_c_proj(model, layer_nb):
    """Randomize attention value weights of a given layer and put bias to zero.
    """
    layer_nb = layer_nb - 1
    model.h[layer_nb].attn.c_proj.weight = torch.nn.parameter.Parameter(torch.rand_like(model.h[layer_nb].attn.c_proj.weight))
    model.h[layer_nb].attn.c_proj.bias = torch.nn.parameter.Parameter(torch.zeros_like(model.h[layer_nb].attn.c_proj.bias))
    return model

def randomize_ln_2(model, layer_nb):
    """Randomize attention dense network weights of a given layer and put bias to zero.
    """
    layer_nb = layer_nb - 1
    model.h[layer_nb].ln_2.weight = torch.nn.parameter.Parameter(torch.rand_like(model.h[layer_nb].ln_2.weight))
    model.h[layer_nb].ln_2.bias = torch.nn.parameter.Parameter(torch.zeros_like(model.h[layer_nb].ln_2.bias))
    return model


def randomize_mlp_c_fc(model, layer_nb):
    """Randomize intermediate dense network weights of a given layer and put bias to zero.
    """
    layer_nb = layer_nb - 1
    model.h[layer_nb].mlp.c_fc.weight = torch.nn.parameter.Parameter(torch.rand_like(model.h[layer_nb].mlp.c_fc.weight))
    model.h[layer_nb].mlp.c_fc.bias = torch.nn.parameter.Parameter(torch.zeros_like(model.h[layer_nb].mlp.c_fc.bias))
    return model

def randomize_mlp_c_proj(model, layer_nb):
    """Randomize output dense network weights of a given layer and put bias to zero.
    """
    layer_nb = layer_nb - 1
    model.h[layer_nb].mlp.c_proj.weight = torch.nn.parameter.Parameter(torch.rand_like(model.h[layer_nb].mlp.c_proj.weight))
    model.h[layer_nb].mlp.c_proj.bias = torch.nn.parameter.Parameter(torch.zeros_like(model.h[layer_nb].mlp.c_proj.bias))
    return model


In [None]:
def randomize_embeddings(model):
    """Randomize embeddings weights and put bias to zero.
    """
    model.wte.weight = torch.nn.parameter.Parameter(torch.rand_like(model.wte.weight))
    model.wpe.weight = torch.nn.parameter.Parameter(torch.rand_like(model.wpe.weight))
    return model

In [None]:
for seed in seeds:
    set_seed(seed)
    for layer in range(13):
        extractor = GPT2Extractor(gpt2_model, language, name, prediction_type, output_hidden_states=True, output_attentions=True, config_path=None)
        if layer==0:
            extractor.model = randomize_embeddings(extractor.model)
        else:
            extractor.model = randomize_layer(extractor.model, layer)
        print(extractor.name + str(seed), ' - Extracting activations for layer {}...'.format(layer))
        for run_index, iterator in tqdm(enumerate(iterator_list)):
            print("############# Run {} #############".format(run_index))
            activations  = extractor.extract_activations(iterator, language)
            hidden_states_activations = activations[0]
            attention_heads_activations = activations[1]
            activations = pd.concat([hidden_states_activations, attention_heads_activations], axis=1)

            # activations
            heads = np.arange(1, 13)
            columns_to_retrieve = ['hidden_state-layer-{}-{}'.format(layer, i) for i in range(1, 769)]
            if layer > 0:
                columns_to_retrieve += ['attention-layer-{}-head-{}-{}'.format(layer, head, i) for head in heads for i in range(1, 65)]
            activations = activations[columns_to_retrieve]

            save_path = os.path.join(saving_path_folder, name + str(seed) + '_layer-{}'.format(layer))
            check_folder(save_path)
            print('\tSaving in {}.'.format(save_path))
            activations.to_csv(os.path.join(save_path, 'activations_run{}.csv'.format(run_index + 1)), index=False)
