In [17]:

from torch.utils.data import DataLoader
from pathlib import Path
from matplotlib import pyplot as plt
from math import sqrt
import torch
import os
from utils.probe_confidence_intervals import model_setup, get_activations
from utils.plotting import plot_activations_PCA
from utils.preprocessing import load_txt_data

In [3]:
model_name = "EleutherAI/pythia-14m"
saved_path_plots = "results/PCA_plots/"
saved_path_raw_activations = "raw_activations/"
# loads model
model, tokenizer, device = model_setup(model_name)
hidden_layers = model.config.num_hidden_layers


# loads data
languages = ['en', 'da', 'sv', 'nb', 'is']
raw_data_folder = Path('data/antibiotic/')
print("Load data")
ds = load_txt_data(
    file_paths={
        'da': raw_data_folder / 'da.txt',
        'en': raw_data_folder / 'en.txt',
        'sv': raw_data_folder / 'sv.txt',
        'nb': raw_data_folder / 'nb.txt',
        'is': raw_data_folder / 'is.txt'
    },
    file_extension='txt'
)
loader = DataLoader(ds, batch_size=32, shuffle=True)


# extracts activation from forward passes on data    
act_ds = get_activations(
    meta_data={'hidden_layers': model.config.num_hidden_layers,
                'hidden_size': model.config.hidden_size},
    loader=loader,
    tokenizer=tokenizer,
    model=model,
    device=device
)





found device: cpu
Load data


  0%|          | 0/130 [00:00<?, ?it/s]Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
  5%|▍         | 6/130 [00:01<00:30,  4.10it/s]


In [37]:
from collections import defaultdict
d = defaultdict(list)
path = "raw_activations/pythia_14m/"
for file in os.listdir(path):
    layer = file.split("_")[1]
    file = path + file
    tensor = torch.load(file)
    d[int(layer)].append(d[layer] + tensor)


  tensor = torch.load(file)


In [41]:
len(d[0][0])

7892

In [42]:
len(d[5])

2

In [None]:
for file in os.listdir(path):
    print(file)
    print(layer)

layer_5_language_da_tensors.pt
5
layer_2_language_da_tensors.pt
2
layer_1_language_da_tensors.pt
1
layer_0_language_da_tensors.pt
0
layer_3_language_da_tensors.pt
3
layer_4_language_da_tensors.pt
4


In [15]:
act_ds[0].predictors

[tensor([ 0.7009,  1.5122,  1.7688,  2.9591,  1.6894, -0.1288,  1.7643, -1.3696,
         -0.4695, -1.5669, -5.8299,  2.6105, -1.7262, -0.2131, -1.6035,  3.7188,
         -2.6824,  0.3868,  0.4552, -0.1992,  0.2977, -0.9090,  0.6581,  0.5560,
          0.9771,  0.0493,  2.3084,  2.0175,  1.9096, -3.4600,  1.2417,  0.2540,
          2.2639,  0.7426, -0.6960,  1.3041, -0.4008, -2.7497, -1.4998,  0.6063,
          1.0804, -2.2953,  2.0174,  1.7884,  1.5581, -1.0106, -3.5706, -1.1644,
          1.7974,  3.0403,  1.9109,  1.4993, -0.3558,  1.3398, -0.6192, -0.8426,
         -1.1171,  2.8499, -4.1102,  1.1309,  1.3031, -2.3867, -2.3766,  0.0483,
          1.4866, -1.6993, -1.0712,  3.4105, -2.1312, -0.9472,  0.2962,  0.2791,
          1.0527, -2.3749,  3.1160,  0.1115, -0.3487,  4.2015, -3.6294, -1.8489,
          0.7751,  1.1149,  0.8599,  0.3543, -1.8122,  0.4519, -3.1583,  0.6512,
         -0.7710,  1.8533, -1.0818,  0.5640,  1.8622,  2.1342,  1.2184,  1.2714,
          1.4264, -0.6474, -