## Imports


In [2]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [19]:
import sys
from pathlib import Path
from collections import defaultdict

from tqdm.notebook import tqdm

import torch
from torch import nn
from torch.utils.data import DataLoader

from datasets import load_dataset
from transformers import T5ForConditionalGeneration, T5Tokenizer, AutoConfig

In [4]:
ELK_PATH = Path("../../../elk/")
print(ELK_PATH.resolve())

modules = [
    ELK_PATH,
    ELK_PATH / "elk" / "training",
    ELK_PATH / "elk" / "promptsource",
]

for module in modules:
    if not str(module) in sys.path:
        sys.path.insert(0, str(module.resolve()))

print(sys.path[:3])

from reporter import Reporter
from templates import DatasetTemplates

/fsx/home-augustas/elk
['/fsx/home-augustas/elk/elk/promptsource', '/fsx/home-augustas/elk/elk/training', '/fsx/home-augustas/elk']


## Config

In [5]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [6]:
DATA_DIR = (
    "../../../VINC-logs/"
    "allenai/unifiedqa-v2-t5-11b-1363200/"
    "AugustasM/burns-datasets-VINC/strange-montalcini"
)
DATA_DIR = Path(DATA_DIR)

!ls {DATA_DIR}

cfg.yaml  fingerprints.yaml  lr_eval.csv  reporters	  train_lm_eval.csv
eval.csv  lm_eval.csv	     lr_models	  train_eval.csv


In [7]:
model_name = "allenai/unifiedqa-v2-t5-11b-1363200"
model_name = "allenai/unifiedqa-v2-t5-3b-1363200"

LAYER = 24

## Tokenizer

In [8]:
model_name

'allenai/unifiedqa-v2-t5-3b-1363200'

In [9]:
tokenizer = T5Tokenizer.from_pretrained(model_name, truncation_side="left")
tokenizer

T5Tokenizer(name_or_path='allenai/unifiedqa-v2-t5-3b-1363200', vocab_size=32100, model_max_length=512, is_fast=False, padding_side='right', truncation_side='left', special_tokens={'eos_token': '</s>', 'unk_token': '<unk>', 'pad_token': '<pad>', 'additional_special_tokens': ['<extra_id_0>', '<extra_id_1>', '<extra_id_2>', '<extra_id_3>', '<extra_id_4>', '<extra_id_5>', '<extra_id_6>', '<extra_id_7>', '<extra_id_8>', '<extra_id_9>', '<extra_id_10>', '<extra_id_11>', '<extra_id_12>', '<extra_id_13>', '<extra_id_14>', '<extra_id_15>', '<extra_id_16>', '<extra_id_17>', '<extra_id_18>', '<extra_id_19>', '<extra_id_20>', '<extra_id_21>', '<extra_id_22>', '<extra_id_23>', '<extra_id_24>', '<extra_id_25>', '<extra_id_26>', '<extra_id_27>', '<extra_id_28>', '<extra_id_29>', '<extra_id_30>', '<extra_id_31>', '<extra_id_32>', '<extra_id_33>', '<extra_id_34>', '<extra_id_35>', '<extra_id_36>', '<extra_id_37>', '<extra_id_38>', '<extra_id_39>', '<extra_id_40>', '<extra_id_41>', '<extra_id_42>', '<ex

In [10]:
list(tokenizer.get_vocab().keys())[:3]

['<pad>', '</s>', '<unk>']

## Dataset

In [11]:
dataset = load_dataset("AugustasM/burns-datasets-VINC", split="validation[:8]")
dataset

Found cached dataset parquet (/admin/home-augustas/.cache/huggingface/datasets/AugustasM___parquet/AugustasM--burns-datasets-VINC-85ec467026b56702/0.0.0/14a00e99c0d15a23649d0db8944380ac81082d4b021f398733dd84f3a6c569a7)


Dataset({
    features: ['text', 'label', 'original_dataset', 'template_name'],
    num_rows: 8
})

### Templates

In [12]:
dataset_template_path = "AugustasM/burns-datasets-VINC/all"

dataset_templates = DatasetTemplates(dataset_template_path)
dataset_templates.templates = {
    x.name: x for x in dataset_templates.templates.values() if x.get_answer_choices_list(dataset[0]) is not None
}
print(dataset_templates.templates)

template = list(dataset_templates.templates.values())[0]
template

{'Consider the text below': <templates.Template object at 0x7f124171de70>, 'Is the given text truthful': <templates.Template object at 0x7f124171dea0>, 'Text first': <templates.Template object at 0x7f124171ded0>, 'No question no choices': <templates.Template object at 0x7f124171cd60>, 'No question with choices': <templates.Template object at 0x7f124171de10>}


<templates.Template at 0x7f124171de70>

### Preprocess dataset

In [13]:
def tokenization_function(q, a):
    return tokenizer(
        q, text_target=a.strip(),
        add_special_tokens=True, return_tensors="pt",
        padding="max_length",
    )

def preprocess_function(rows):
    processed_rows = defaultdict(list)

    for text in rows["text"]:
        # print(len(text))
        entry = { "text": text }

        # Get the positive and negative examples
        entry["label"] = 1
        pos_q, pos_a = template.apply(entry)

        entry["label"] = 0
        neg_q, neg_a = template.apply(entry)

        # Tokenize the inputs
        pos_inputs = tokenization_function(pos_q, pos_a)
        neg_inputs = tokenization_function(neg_q, neg_a)

        # Store the processed inputs
        processed_rows["pos_input_ids"].append(pos_inputs["input_ids"].squeeze())
        processed_rows["pos_attention_mask"].append(pos_inputs["attention_mask"].squeeze())
        processed_rows["pos_labels"].append(pos_inputs["labels"].squeeze())
        processed_rows["neg_input_ids"].append(neg_inputs["input_ids"].squeeze())
        processed_rows["neg_attention_mask"].append(neg_inputs["attention_mask"].squeeze())
        processed_rows["neg_labels"].append(neg_inputs["labels"].squeeze())
    
    return processed_rows
        

columns_to_delete = dataset.column_names
columns_to_delete.remove("label")
processed_dataset = dataset.map(
    preprocess_function, batched=True, batch_size=8,
    remove_columns=columns_to_delete,
)
processed_dataset = processed_dataset.filter(
    lambda x: max(len(x["pos_input_ids"]), len(x["neg_input_ids"])) <= tokenizer.model_max_length,
    batched=False,
)
processed_dataset.set_format(type="torch")
processed_dataset

Map:   0%|          | 0/8 [00:00<?, ? examples/s]

Filter:   0%|          | 0/8 [00:00<?, ? examples/s]

Dataset({
    features: ['label', 'pos_input_ids', 'pos_attention_mask', 'pos_labels', 'neg_input_ids', 'neg_attention_mask', 'neg_labels'],
    num_rows: 8
})

## Try combining report with a language model

Inspiration taken from the [original repository](https://github.com/collin-burns/discovering_latent_knowledge/blob/main/CCS.ipynb).

In [18]:
%%time

model_cfg = AutoConfig.from_pretrained(model_name)
fp32_weights = model_cfg.torch_dtype in (None, torch.float32)
is_bf16_possible = fp32_weights and torch.cuda.is_bf16_supported()
print(f"{is_bf16_possible=}")

kwargs = {
    "torch_dtype": torch.bfloat16 if is_bf16_possible else torch.float32
}
model = T5ForConditionalGeneration.from_pretrained(model_name).to(device)
model.eval();

is_bf16_possible=True
CPU times: user 30 s, sys: 23.2 s, total: 53.2 s
Wall time: 59.1 s


In [37]:
# class MyRewardModel(nn.Module):
#     def __init__(
#             self, language_model, reporter_path,
#             layer=-1, device="cpu", hidden_state_name="decoder_hidden_states",
#         ):
#         super().__init__()

#         # Load the language model and the reporter
#         # self.language_model = T5ForConditionalGeneration.from_pretrained(
#         #     language_model
#         # ).to(device)
#         # self.language_model.eval()
#         self.language_model = language_model

#         self.reporter = Reporter.load(reporter_path).to(device)
#         self.reporter.eval()

#         self.layer = layer # which layer to extract
#         self.hidden_state_name = hidden_state_name

    
#     def forward(self, pos_inputs, neg_inputs):
#         '''
#             NOTE: only works for a single input at a time for now
#         '''

#         # Get the hidden states
#         pos_hidden_states = self.language_model(
#             **pos_inputs, output_hidden_states=True,
#         )[self.hidden_state_name][self.layer]
#         neg_hidden_states = self.language_model(
#             **neg_inputs, output_hidden_states=True,
#         )[self.hidden_state_name][self.layer]

#         print(pos_hidden_states.shape, neg_hidden_states.shape)
#         # Get the last token's output
#         # Shape B x T x H -> B x H
#         pos_last_tokens = pos_hidden_states[:, -1, :]
#         neg_last_tokens = neg_hidden_states[:, -1, :]

#         # Get the logits for the two classes
#         # Shape B x H -> B
#         pos_logits = self.reporter(pos_last_tokens)
#         neg_logits = self.reporter(neg_last_tokens)

#         # Return the difference in logits which will later be
#         # passed through a sigmoid function
#         return pos_logits - neg_logits


In [142]:
class MyRewardModel(nn.Module):
    def __init__(
            self, language_model, reporter_path,
            layer=-1, device="cpu", hidden_state_name="decoder_hidden_states",
        ):
        super().__init__()

        # Load the language model and the reporter
        # self.language_model = T5ForConditionalGeneration.from_pretrained(
        #     language_model
        # ).to(device)
        # self.language_model.eval()
        self.language_model = language_model

        self.reporter = Reporter.load(reporter_path).to(device)
        self.reporter.eval()

        self.layer = layer # which layer to extract
        self.hidden_state_name = hidden_state_name

        self.pad_token_id = self.language_model.config.pad_token_id

    
    def forward(self, pos_inputs, neg_inputs):
        # Get the hidden states
        # Shape B x T x H
        pos_hidden_states = self.language_model(
            **pos_inputs, output_hidden_states=True,
        )[self.hidden_state_name][self.layer]
        neg_hidden_states = self.language_model(
            **neg_inputs, output_hidden_states=True,
        )[self.hidden_state_name][self.layer]

        pos_last_token_index = (
            pos_inputs["labels"] == self.pad_token_id
        ).int().argmax(dim=1) - 1
        neg_last_token_index = (
            neg_inputs["labels"] == self.pad_token_id
        ).int().argmax(dim=1) - 1

        # Get the last token's output
        pos_last_tokens = pos_hidden_states[range(len(pos_last_token_index)), pos_last_token_index]
        neg_last_tokens = neg_hidden_states[range(len(neg_last_token_index)), neg_last_token_index]

        # Get the logits for the two classes
        pos_logits = self.reporter(pos_last_tokens)
        neg_logits = self.reporter(neg_last_tokens)

        print(pos_logits.shape, neg_logits.shape)

        # Return the difference in logits which will later be
        # passed through a sigmoid function
        return pos_logits - neg_logits


In [143]:
reporter_path = DATA_DIR / "reporters" / f"layer_{LAYER}.pt"
reporter_path.resolve()

PosixPath('/fsx/home-augustas/VINC-logs/allenai/unifiedqa-v2-t5-11b-1363200/AugustasM/burns-datasets-VINC/strange-montalcini/reporters/layer_24.pt')

In [144]:
%%time

reward_model = MyRewardModel(model, reporter_path, layer=LAYER, device=device)

CPU times: user 313 ms, sys: 0 ns, total: 313 ms
Wall time: 11.8 ms


### Sift through the data

In [145]:
# tokenization_function_tmp = lambda q, a: tokenizer(
#     q, text_target=a.strip(),
#     add_special_tokens=True, return_tensors="pt",
# )

In [146]:
# predictions = []
# labels = []
# for idx, item in enumerate(dataset):
#     labels.append(item["label"])
#     item_copy = item.copy()

#     # Get the positive and negative examples
#     item_copy["label"] = 1
#     pos_q, pos_a = template.apply(item_copy)

#     item_copy["label"] = 0
#     neg_q, neg_a = template.apply(item_copy)

#     # Tokenize the inputs
#     pos_inputs = tokenization_function_tmp(pos_q, pos_a).to(device)
#     neg_inputs = tokenization_function_tmp(neg_q, neg_a).to(device)

#     with torch.no_grad():
#         prediction = reward_model(pos_inputs, neg_inputs)
    
#     predictions.append(prediction.sigmoid().item())

# predictions = torch.tensor(predictions)
# labels = torch.tensor(labels)
# predictions[:5], labels[:5]

In [147]:
%%time

dataloader = DataLoader(
    processed_dataset, batch_size=4, shuffle=False,
    num_workers=12,
)

predictions = []
labels = []

loop = tqdm(enumerate(dataloader), total=len(dataloader), leave=False)
for idx, batch in loop:
    # print(batch["label"].shape)
    # print(batch["pos_input_ids"].shape)
    # print(batch["pos_attention_mask"].shape)
    # print(batch["pos_labels"].shape)
    pos_inputs = {
        "input_ids": batch["pos_input_ids"].to(device),
        "attention_mask": batch["pos_attention_mask"].to(device),
        "labels": batch["pos_labels"].to(device),
    }
    neg_inputs = {
        "input_ids": batch["neg_input_ids"].to(device),
        "attention_mask": batch["neg_attention_mask"].to(device),
        "labels": batch["neg_labels"].to(device),
    }
    
    labels.append(batch["label"])

    with torch.no_grad():
        current_predictions = reward_model(pos_inputs, neg_inputs)
    
    predictions.append(current_predictions.sigmoid())

predictions = torch.cat(predictions)
labels = torch.cat(labels)
predictions[:5], labels[:5]

  0%|          | 0/2 [00:00<?, ?it/s]

torch.Size([4]) torch.Size([4])
torch.Size([4]) torch.Size([4])
CPU times: user 2.58 s, sys: 1.47 s, total: 4.05 s
Wall time: 4.11 s


(tensor([0.5635, 0.5949, 0.5807, 0.5706, 0.5930], device='cuda:0'),
 tensor([0, 0, 0, 1, 1]))