In [1]:
from transformers import BertTokenizer, BertForMaskedLM, BertTokenizer, BertForSequenceClassification
import torch
def load_model(name):
    model_path = name
    model = BertForSequenceClassification.from_pretrained(
        model_path,
        output_attentions=False
    )
    return model.cuda()
tokenizer_path = "bert-base-uncased"
tokenizer = BertTokenizer.from_pretrained(tokenizer_path, do_lower_case=True)

models = ["bert-base-uncased"] + [f"./finetuned-scrumbled-wikitext2-3e-4/checkpoint-{i}" for i in range(80, 40000, 80)]

In [2]:
from datasets import load_dataset

imdb = load_dataset("imdb")

def preprocess_function(examples):
    return tokenizer(examples["text"], truncation=True, padding=True)
tokenized_imdb = imdb.map(preprocess_function, batched=True)


from transformers import DataCollatorWithPadding

data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

Found cached dataset imdb (/home/sha43/.cache/huggingface/datasets/imdb/plain_text/1.0.0/d613c88cf8fa3bab83b4ded3713f1f74830d1100e171db75bbddb80b3345c9c0)


  0%|          | 0/3 [00:00<?, ?it/s]

Loading cached processed dataset at /home/sha43/.cache/huggingface/datasets/imdb/plain_text/1.0.0/d613c88cf8fa3bab83b4ded3713f1f74830d1100e171db75bbddb80b3345c9c0/cache-220ce39307ff7007.arrow
Loading cached processed dataset at /home/sha43/.cache/huggingface/datasets/imdb/plain_text/1.0.0/d613c88cf8fa3bab83b4ded3713f1f74830d1100e171db75bbddb80b3345c9c0/cache-547478c462eb5903.arrow
Loading cached processed dataset at /home/sha43/.cache/huggingface/datasets/imdb/plain_text/1.0.0/d613c88cf8fa3bab83b4ded3713f1f74830d1100e171db75bbddb80b3345c9c0/cache-e28a7280cf0b9af3.arrow


In [3]:
import evaluate
import numpy as np

accuracy = evaluate.load("accuracy")


def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    return accuracy.compute(predictions=predictions, references=labels)

id2label = {0: "NEGATIVE", 1: "POSITIVE"}
label2id = {"NEGATIVE": 0, "POSITIVE": 1}

In [4]:
from transformers import DataCollatorWithPadding

data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

In [5]:
model = load_model(models[0])

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

In [6]:

from torch.utils.data import DataLoader
from tqdm import tqdm
def preprocess_function(batch):
    batch = [{k:v for k, v in elem.items() if k != "text"} for elem in batch]
    return data_collator(batch)

test_loader = DataLoader(tokenized_imdb["test"], batch_size=64, shuffle=False, collate_fn=preprocess_function)

def evaluate(model, loader):
    model.eval().cuda()
    all_pred = []
    all_true = []
    with torch.no_grad():
        for batch in tqdm(loader):
            input_ids = batch['input_ids'].cuda()
            attention_mask = batch['attention_mask'].cuda()
            labels = batch['labels'].cuda()
            output = model(input_ids, attention_mask=attention_mask, labels=labels)
            predicts = output["logits"].argmax(axis=1)
            all_pred.extend(predicts.cpu().numpy().tolist())
            all_true.extend(batch["labels"].cpu().numpy().tolist())
    return (np.array(all_pred) == np.array(all_true)).astype(float).mean()

In [7]:
from transformers import AutoModelForSequenceClassification, TrainingArguments, Trainer

open("data_imdb_new.csv", "w").close()

for model_name in models:
    model = load_model(model_name).cuda()

    training_args = TrainingArguments(
        output_dir=f"res",
        save_steps=100000000,
        learning_rate=3e-5,
        per_device_train_batch_size=16,
        num_train_epochs=1,
        weight_decay=0.01,
    )

    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=tokenized_imdb["train"],
        tokenizer=tokenizer,
        data_collator=data_collator,
    )


    res = trainer.train()
    model = trainer.model
    del trainer
    torch.cuda.empty_cache()

    score = evaluate(model, test_loader)
    with open("data_imdb_new.csv", "a") as fout:
        print(model_name, score, sep="\t", file=fout)


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Step,Training Loss
