In [33]:
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch

from transformers import Trainer
import numpy as np
from tqdm.auto import tqdm

In [2]:
tokenizer = AutoTokenizer.from_pretrained('bert-base-cased')
task_to_keys = {
    "cola": ("sentence", None),
    "mnli": ("premise", "hypothesis"),
    "mrpc": ("sentence1", "sentence2"),
    "qnli": ("question", "sentence"),
    "qqp": ("question1", "question2"),
    "rte": ("sentence1", "sentence2"),
    "sst2": ("sentence", None),
    "stsb": ("sentence1", "sentence2"),
    "wnli": ("sentence1", "sentence2"),
}
sentence1_key, sentence2_key = task_to_keys['rte']

In [3]:
raw_datasets = load_dataset(
            "glue", 'rte',
            cache_dir='.',
        )

Found cached dataset arrow (/Users/konstantinakovlev/Documents/GitHub/Concord-NAS-RNN/analysis/./arrow/glue-79c215b275e45293/0.0.0/74f69db2c14c2860059d39860b1f400a03d11bf7fb5a8258ca38c501c878c137)


  0%|          | 0/3 [00:00<?, ?it/s]

In [4]:
def preprocess_function(examples):
        # Tokenize the texts
        args = (
            (examples[sentence1_key],) if sentence2_key is None else (examples[sentence1_key], examples[sentence2_key])
        )
        result = tokenizer(*args, padding=True, max_length=256, truncation=True)
        
        return result

In [5]:
raw_datasets = raw_datasets.map(
            preprocess_function,
            batched=True,
            desc="Running tokenizer on dataset",
        )

Running tokenizer on dataset:   0%|          | 0/2490 [00:00<?, ? examples/s]

Loading cached processed dataset at /Users/konstantinakovlev/Documents/GitHub/Concord-NAS-RNN/analysis/arrow/glue-79c215b275e45293/0.0.0/74f69db2c14c2860059d39860b1f400a03d11bf7fb5a8258ca38c501c878c137/cache-1b6d27a3fd885b69.arrow
Loading cached processed dataset at /Users/konstantinakovlev/Documents/GitHub/Concord-NAS-RNN/analysis/arrow/glue-79c215b275e45293/0.0.0/74f69db2c14c2860059d39860b1f400a03d11bf7fb5a8258ca38c501c878c137/cache-e9b413c82fe50b69.arrow


In [9]:
model = AutoModelForSequenceClassification.from_pretrained(
        'glue_finetune/rte/',
    )
model.eval()

trainer = Trainer(
        model=model,
        tokenizer=tokenizer,
    )

In [None]:
# run eval

In [35]:
predictions = []
for data_point in tqdm(raw_datasets['validation']):
    pred = trainer.predict([data_point], metric_key_prefix="predict").predictions
    predictions.append(pred.reshape(-1))
predictions = np.stack(predictions, axis=0)

  0%|          | 0/277 [00:00<?, ?it/s]

In [36]:
(predictions.argmax(-1) == np.array([el['label'] for el in raw_datasets['validation']])).mean()

0.6498194945848376

In [38]:
# save validation logits for distillation
# np.save('validation_logits.npy', predictions)

In [39]:
predictions = []
for data_point in tqdm(raw_datasets['train']):
    pred = trainer.predict([data_point], metric_key_prefix="predict").predictions
    predictions.append(pred.reshape(-1))
predictions = np.stack(predictions, axis=0)

  0%|          | 0/2490 [00:00<?, ?it/s]

In [40]:
(predictions.argmax(-1) == np.array([el['label'] for el in raw_datasets['train']])).mean()

0.885140562248996

In [42]:
# save train logits for distillation
# np.save('train_logits.npy', predictions)