In [98]:
import os

try:
    has_changed_dir
except:
    has_changed_dir = False

try:
    import google.colab
    IN_COLAB = True
    print("Running as a Colab notebook")
except:
    IN_COLAB = False

if IN_COLAB:
    %pip install datasets
    %pip install translate-toolkit
    %pip install bitsandbytes

    !git clone https://github.com/MartinKirkegaardDK/KDS_MI.git

    if not has_changed_dir:
        os.chdir('KDS_MI')
        has_changed_dir = True
else:
    if not has_changed_dir:
        os.chdir('.')
        has_changed_dir = True

Running as a Colab notebook
Cloning into 'KDS_MI'...
remote: Enumerating objects: 34, done.[K
remote: Counting objects: 100% (34/34), done.[K
remote: Compressing objects: 100% (26/26), done.[K
remote: Total 34 (delta 9), reused 24 (delta 5), pack-reused 0 (from 0)[K
Receiving objects: 100% (34/34), 250.66 KiB | 7.37 MiB/s, done.
Resolving deltas: 100% (9/9), done.


In [99]:
%load_ext autoreload
%autoreload 2

import torch
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
from torch import nn
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
from torch.utils.data import DataLoader, Dataset


from classification_probes import TextClassificationDataset, ProbeTrainer, HookManager, ClassificationProbe

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [100]:
model_name = "roneneldan/TinyStories-1M"
model_name = "EleutherAI/pythia-14m"
model_name = "AI-Sweden-Models/gpt-sw3-356m"
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token
model = AutoModelForCausalLM.from_pretrained(model_name).to(device)

try:
    hidden_size = model.config.n_embd
except AttributeError:
    hidden_size = model.config.hidden_size

In [101]:
model

GPT2LMHeadModel(
  (transformer): GPT2Model(
    (wte): Embedding(64000, 1024)
    (wpe): Embedding(2048, 1024)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0-23): 24 x GPT2Block(
        (ln_1): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2Attention(
          (c_attn): Conv1D(nf=3072, nx=1024)
          (c_proj): Conv1D(nf=1024, nx=1024)
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
        (mlp): GPT2MLP(
          (c_fc): Conv1D(nf=4096, nx=1024)
          (c_proj): Conv1D(nf=1024, nx=4096)
          (act): GELUActivation()
          (dropout): Dropout(p=0.1, inplace=False)
        )
      )
    )
    (ln_f): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
  )
  (lm_head): Linear(in_features=1024, out_features=64000, bias=False)
)

In [102]:
lab_map = {
    'da': 0,
    'en': 1,
    'is': 2,
    'nb': 3,
    'sv': 4
}

data_loc = 'data/antibiotic/'
ds = TextClassificationDataset.from_txt(data_loc + 'da.txt', lab_map['da'])
ds.add_from_txt(data_loc + 'en.txt', lab_map['en'])
ds.add_from_txt(data_loc + 'is.txt', lab_map['is'])
ds.add_from_txt(data_loc + 'nb.txt', lab_map['nb'])
ds.add_from_txt(data_loc + 'sv.txt', lab_map['sv'])

loader = DataLoader(ds, batch_size=32, shuffle=True)

In [103]:
# making a dataset that can hold the activations and labels.

class ActivationDataset(Dataset):

    def __init__(self):
        self.acts = []
        self.labels = []

    def add_with_mask(self, acts, labels, masks):
        for act, label, mask in zip(acts, labels, masks):
            if mask:
                self.acts.append(act)
                self.labels.append(label)

    def __getitem__(self, index) -> tuple:
        return (self.acts[index], self.labels[index])

    def __len__(self) -> int:
        return len(self.acts)


In [105]:
activation_ds = ActivationDataset()

for text, label in loader:

    tokenized = tokenizer(
        text,
        padding=True,
        truncation=True,
        return_tensors='pt'
    ).to(device)

    with HookManager(model) as hook_manager:
        res_stream_act = hook_manager.attach_residstream_hook(
            layer=17,
            pre_mlp=False,
            pythia=False
        )

        model(**tokenized)

    # flattening [batch, pad_size, ...] to [tokens, ...]
    attn_mask = tokenized.attention_mask.flatten() # [tokens]
    label = label.unsqueeze(-1).expand(-1, tokenized.attention_mask.shape[1]).flatten() # [tokens]
    res_stream_act = res_stream_act[0].view(-1, hidden_size) # [tokens, hidden_size]

    activation_ds.add_with_mask(res_stream_act, label, attn_mask)



Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


In [106]:
act_loader = DataLoader(activation_ds, batch_size=32, shuffle=True)

In [107]:
input_size = hidden_size
learning_rate = 0.001
reg_lambda = 0.1
num_labs = len(lab_map)

probe = ClassificationProbe(in_dim=input_size, num_labs=num_labs, device=device)
optimizer = torch.optim.Adam(probe.parameters(), lr=learning_rate)
loss_fn = nn.CrossEntropyLoss()

for epoch in range(10):
    for act, label in act_loader:

        label = label.to(device)

        outputs = probe(act)
        loss = loss_fn(outputs, label.to(device))
        loss += reg_lambda * sum(torch.norm(param, 2) for param in probe.parameters())

        accuracy = ((torch.argmax(outputs.detach(), dim=1) == label.to(device)).sum() / 32).item()
        print('acc: ', accuracy, end='\t\t')

        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        print(loss)

[1;30;43mStreaming af output blev afkortet til de sidste 5000 linjer.[0m
acc:  0.90625		tensor(1.1794, device='cuda:0', grad_fn=<AddBackward0>)
acc:  0.96875		tensor(1.1757, device='cuda:0', grad_fn=<AddBackward0>)
acc:  0.9375		tensor(1.1388, device='cuda:0', grad_fn=<AddBackward0>)
acc:  0.875		tensor(1.1561, device='cuda:0', grad_fn=<AddBackward0>)
acc:  0.90625		tensor(1.1516, device='cuda:0', grad_fn=<AddBackward0>)
acc:  0.9375		tensor(1.1402, device='cuda:0', grad_fn=<AddBackward0>)
acc:  0.875		tensor(1.2028, device='cuda:0', grad_fn=<AddBackward0>)
acc:  0.90625		tensor(1.1449, device='cuda:0', grad_fn=<AddBackward0>)
acc:  1.0		tensor(1.1034, device='cuda:0', grad_fn=<AddBackward0>)
acc:  0.9375		tensor(1.1359, device='cuda:0', grad_fn=<AddBackward0>)
acc:  0.875		tensor(1.2003, device='cuda:0', grad_fn=<AddBackward0>)
acc:  0.875		tensor(1.1666, device='cuda:0', grad_fn=<AddBackward0>)
acc:  0.84375		tensor(1.2199, device='cuda:0', grad_fn=<AddBackward0>)
acc:  0.875		tens

KeyboardInterrupt: 

In [65]:
steering_vector

tensor([ 9.3937e-05,  1.2653e-03, -2.0634e-03,  ...,  4.9431e-04,
         6.9325e-04, -6.8787e-05], device='cuda:0', grad_fn=<SelectBackward0>)

In [118]:
steering_vector = probe.linear._parameters['weight'][0]

text = 'Dette er'
tokenized = tokenizer(text, return_tensors='pt').to(device)

with HookManager(model) as hook_manager:
    hook_manager.attach_resid_stream_steer_hook(
        17,
        steering_vector,
        50,
        pre_mlp=False,
        pythia=False
    )

    output_da_steering = [
        model.generate(tokenized.input_ids, max_length=100, temperature=0.7, top_p=0.9, do_sample=True)
        for _ in range(10)
    ]

for output in output_da_steering:
    print(tokenizer.decode(output[0]).replace('\n', ' '))
    print()

Dette er en af de mest populære online dating sites i Danmark. Vores online dating er meget enkel og meget effektiv. Du kan vælge at finde en kæreste. Det er et sted, hvor du kan søge at få en partner, der vil være sammen med dig, og at du kan gå til en fest eller en festdag, og at du kan møde en mand eller kvinde, der er ved at blive forelsket i. Mødes i en dating side Man kan også gå

Dette er en slags opsummering, som jeg synes, man skal få at vide, er en god og konstruktiv artikel. 23 24 23 24 23 24 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23

Dette er en god ide, for du kan sagtens komme til at gøre det samme. Søg at få en ung kvinde, der har et godt og varmt forhold til en ung mand. Det kan også være at hun er en ung mand, der er ved at blive meget ung. 2000 års unge mand er en ung mand, der er ved at blive meget ung. Det kan være at han er 35 år, eller 29 år, eller 43

Dette er et eksempel på, at der er en tendens til at man er nødt til at være helt sikker på, at ma

In [119]:
steering_vector = probe.linear._parameters['weight'][3]

text = 'Dette er'
tokenized = tokenizer(text, return_tensors='pt').to(device)

with HookManager(model) as hook_manager:
    hook_manager.attach_resid_stream_steer_hook(
        17,
        steering_vector,
        50,
        pre_mlp=False,
        pythia=False
    )

    output_nb_steering = [
        model.generate(tokenized.input_ids, max_length=100, temperature=0.7, top_p=0.9, do_sample=True)
        for _ in range(10)
    ]

for output in output_nb_steering:
    print(tokenizer.decode(output[0]).replace('\n', ' '))
    print()

Dette er ikke et krav, men en fordel.  6. 10.10.2016 11:58  Publisert: 10.10.2016 11:58  Publisert: 10.10.2016 11:58  Sist oppdatert:  P  P  P  P  P  P 

Dette er et godt eksempel på at du kan ha mange interesser, samtidig som du har en god plan for hvordan du skal bruke de. For å holde en god plan, er det viktig å sette av tid til å planlegge. Det er viktig at du får til å planlegge alt i detalj, både på tid, hvor du skal, hvor du skal ta det, hvilke tjenester du skal få, og hva du skal gjøre. Dette er et viktig punkt for å ha en plan, da det kan påvirke

Dette er en av de viktigste faktorene for å finne en løsning som er til beste for alle. I denne oppgaven vil jeg forsøke å ta en slik løsning til det den er til. Jeg ønsker å ta en god løsning på et problem som er til det beste for alle. Jeg ønsker å finne en løsning som er til det beste for de som trenger det. Jeg vil se på et problem som er til det beste for alle, og jeg ønsker å se på et problem som er til det beste

Dette er ikke

In [None]:
output = model.generate(tokenized.input_ids, max_length=100, temperature=0.7, top_p=0.9, do_sample=False)

tokenizer.decode(output[0])

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.


'Arranged in six parts, the film depicts a strike in 1903 by the workers of a factory in pre-revolutionary Russia, and their subsequent suppression. It is best known for a sequence towards the climax, in which the violent and violent of the Soviet Union was the first to be seen.\n\nThe film is a film that is a film that is a film that is a film that is a film that is a film that is a film that is a film that is a film that'

In [None]:
output

tensor([[ 7047,  3767, 30912,    75,   413,  2827,   546,  1448,  6706,   546,
          2098, 14293,    85,  1151,   466,   891,   726,    87, 33578,  1129,
          8225,  5025,   346,    49,  2768,  1670,    75,   771,   354,  1059,
         33325,  1507,  1073,  5751,  4953,     3,  9040,   945,  1162,   269,
          5507,   620,   265,  1775,   274,  1257,    75,   615,   278,  4415,
            78,    15,   187,   187,    34,    27,   187,   187,   510,   806,
          3213,   310,   281,  1918,   368,   247,  2372,   273,   247, 12662,
           285,   923,   752,   253,  3662,    27,   187,   187,    14, 50275,
           187,   187,    14, 50275,   187,   187,    14, 50275,   187,   187,
            14, 50275,   187,   187,    14, 50275,   187,   187,    14, 50275]])

'Filmen Strejke er en del af en planlagt serie i syv dele med titlen "På vej mod proletariatets diktatur" og var et fælles samarbejde mellem.\n\nA:\n\nThe first step is to give you a bit of a hint and see what the answer:\n\n-   \n\n-   \n\n-   \n\n-   \n\n-   \n\n-   '

In [None]:
projection_magnitudes = (act.unsqueeze(0) @ steering_vector).unsqueeze(-1)

In [None]:
steering_vector_ = steering_vector.view(1, 1, -1)

In [None]:
projections

tensor([[[ 1.0383],
         [ 3.2407],
         [ 4.7199],
         [-0.4780],
         [ 4.3807],
         [-3.7402],
         [ 1.0187],
         [ 8.1848],
         [-1.3870],
         [ 1.7934],
         [ 0.7859],
         [-3.5573],
         [-0.1791],
         [ 2.3834]]], grad_fn=<UnsqueezeBackward0>)

In [None]:
projections = (projection_magnitudes * steering_vector_)

tensor([4, 0, 3, 1, 4, 1, 2, 2, 4, 0, 2, 2, 1, 4, 4, 1, 4, 0, 4, 4, 1, 4, 0, 1,
        1, 4, 2, 2, 0, 3, 2, 1])

In [None]:
label.unsqueeze(-1).expand(-1, tokenized.attention_mask.shape[1]).flatten()

tensor([2, 2, 2,  ..., 4, 4, 4])

In [None]:
tokenized.attention_mask.flatten().shape

torch.Size([4928])

In [None]:
res_stream_act[0].view(-1, hidden_size)[0]

tensor([-0.5304,  0.9247, -0.0046, -2.0886,  0.9416, -0.6766,  1.6014,  0.4292,
        -0.3467,  0.6638, -0.3799, -3.7725,  0.0168, -0.6235,  1.7516,  1.1082,
         0.2715,  0.7788,  0.1381,  0.3071,  1.7841,  0.8833, -0.9052,  0.7601,
        -1.4813,  0.8397, -2.0588, -0.9006, -0.7124,  0.6576, -1.1364,  1.0030,
         0.6364,  1.6032,  0.4388, -1.4383,  0.8074,  3.0726,  0.0916, -0.9084,
         0.0775,  1.0104, -0.6060, -0.0836, -2.3467, -4.3569,  2.1628,  1.2648,
        -0.7746, -0.0214,  1.1413, -0.5092, -2.0130,  0.4083, -2.4431, -1.1179,
         0.5212,  0.7194, -0.6159, -0.8679,  0.7901,  1.3831,  0.0591,  0.8055])

In [None]:
res_stream_act[0].shape

torch.Size([32, 154, 64])

In [None]:

trainer = ProbeTrainer(hidden_size, 5, 0.001, 0.1, 'cpu')

for text_batch, labels in loader:
    print(text)
    with HookManager(model) as hook_manager:
        res_stream_act = hook_manager.attach_residstream_hook(
            layer=4,
            pre_mlp=False
        )

        tokenized = [
            tokenizer(text, return_tensors='pt')
            for text in text_batch
        ]
        for text in tokenized:
            model.forward(**text)

    loss = trainer.train_step(torch.Tensor(res_stream_act), torch.Tensor(labels))
    print(loss)

In [None]:
model.config.n_embed

AttributeError: 'GPTNeoConfig' object has no attribute 'n_embed'

In [None]:
model.get_submodule('transformer.h.')

GPT2LMHeadModel(
  (transformer): GPT2Model(
    (wte): Embedding(64000, 768)
    (wpe): Embedding(2048, 768)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0-11): 12 x GPT2Block(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2SdpaAttention(
          (c_attn): Conv1D(nf=2304, nx=768)
          (c_proj): Conv1D(nf=768, nx=768)
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (mlp): GPT2MLP(
          (c_fc): Conv1D(nf=3072, nx=768)
          (c_proj): Conv1D(nf=768, nx=3072)
          (act): GELUActivation()
          (dropout): Dropout(p=0.1, inplace=False)
        )
      )
    )
    (ln_f): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
  )
  (lm_head): Linear(in_features=768, out_features=64000, bias=False)
)

In [None]:
input_ = "dette er en"

tokenized = tokenizer(input_, return_tensors='pt')

In [None]:
tokenized

{'input_ids': tensor([[1122,  358,  315]]), 'attention_mask': tensor([[1, 1, 1]])}

In [None]:
output_ = model.generate(tokenized.input_ids, max_length=100, temperature=0.7, top_p=0.9, do_sample=True)