In [0]:
from google.colab import drive
drive.mount('/content/drive')

!pip install pytorch_lightning simplejson transformers wandb torch

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [0]:
import torch
import numpy as np
import pytorch_lightning as pl
import simplejson as json
import wandb

from pathlib import Path
from transformers import BertTokenizer,\
                         BertModel, \
                         BertForSequenceClassification
from torchtext import data
from torch.nn import functional as F

from pytorch_lightning.logging import WandbLogger
from sklearn.metrics import accuracy_score

%load_ext autoreload
%autoreload 2


DATA = Path("../data/")
DATA = Path("/content/drive/My Drive/boolq/data")
config = {
    "device": "cuda",
    "bert_pretrained": "bert-base-multilingual-cased",
    "batch_size": 48,
    "start_lr": 1e-4,
    "lr_factor": 0.3
    
}

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [0]:
#!wandb login

In [0]:
def get_prepared_dataset(
        raw_data: str,
        tokenizer: BertTokenizer,
        device: str) -> data.Dataset:

    input_idxs = data.Field(
        use_vocab=False,
        pad_token=tokenizer.pad_token_id,
        batch_first=True
    )
    type_idxs = data.Field(
        use_vocab=False,
        pad_token=1,
        batch_first=True
    )

    answers = data.Field(
        use_vocab=False,
        sequential=False,
        batch_first=True,
        dtype=torch.float32
    )

    fields = [
        ("x", input_idxs),
        ("type_ids", type_idxs),
        ("ans", answers)
    ]

    examples = []
    with open(raw_data) as f:
        for item in f:
            item = json.loads(item)
            tokenized = tokenizer.encode_plus(
                item["question"], item["passage"],
                return_token_type_ids=True,
                add_special_tokens=True,
                max_length=512,
            )
            examples.append(
                data.Example.fromlist(
                    (tokenized['input_ids'],
                     tokenized['token_type_ids'],
                     int(item["answer"])
                     ), fields)
            )

    return data.Dataset(examples, fields)


class BoolqClassifier(pl.LightningModule):
    def __init__(self, config):
        super().__init__()
        self.bert = BertModel.from_pretrained(
            config["bert_pretrained"]).to(config["device"])
        self.bert.requires_grad_(False)
        self.bert.eval()
        self.classifier = torch.nn.Linear(768, 1)
        self.bert.encoder.layer[-1].requires_grad_(True)
        # self.bert.pooler.requires_grad_(True)

    def forward(self, x, type_ids):
        x = self.bert(x, token_type_ids=type_ids)[0][:, 0]
        x = self.classifier(x)
        return x

    def training_step(self, batch, batch_nb):
        y_pred = F.sigmoid(self(batch.x, batch.type_ids).view(-1))
        loss = F.binary_cross_entropy(y_pred, batch.ans)
        logs = {'train_loss': loss}
        return {'loss': loss, 'log': logs}
    
    
    def training_epoch_end(self, outputs):
        avg_loss = torch.stack([x['loss'] for x in outputs]).mean()
        logs = {'avg_loss': avg_loss}
        return {'t_loss': avg_loss, 'log': logs}
    

    def validation_step(self, batch, batch_nb):
        y_pred = F.sigmoid(self(batch.x, batch.type_ids).view(-1))
        loss = F.binary_cross_entropy(y_pred, batch.ans)
        acc = accuracy_score(batch.ans.cpu(), y_pred.cpu() > 0.5)
        logs = {'val_loss': loss, "val_acc": acc}
        return {'val_loss': loss, "val_acc": acc, 'log': logs}

    def validation_epoch_end(self, outputs):
        avg_loss = torch.stack([x['val_loss'] for x in outputs]).mean()
        avg_acc = np.mean([x['val_acc'] for x in outputs])
        logs = {'avg_val_loss': avg_loss, "avg_val_acc": avg_acc}
        lr = self._get_lr(self.trainer.optimizers[0])
        return {'avg_val_loss': avg_loss, "val_acc": avg_acc, "lr": lr, 'log': logs}


    def configure_optimizers(self):
        opt = torch.optim.Adam(self.parameters(), config["start_lr"])
        sch = {
          'scheduler': torch.optim.lr_scheduler.ReduceLROnPlateau(
              opt,
              factor=config["lr_factor"],
              patience=1),
          'monitor': 'avg_val_loss',
          'interval': 'epoch',
          'frequency': 1
        }
        return [opt], [sch]
               

    def train_dataloader(self):
        return train_loader

    def val_dataloader(self):
        return val_loader

    @staticmethod
    def _get_lr(optimizer):
        for param_group in optimizer.param_groups:
            return param_group['lr']

In [0]:
import simplejson as json
test = [json.loads(x, encoding="utf-8") for x in DATA.joinpath("test.jsonl").open()]
for x in test:
    if x["answer"] == "true":
        x["answer"] = True
    elif x["answer"] == "false":
        x["answer"] = False

_ = DATA.joinpath("test.jsonl").open("w").write("\n".join([json.dumps(x, ensure_ascii=False) for x in test]))


In [0]:
tokenizer = BertTokenizer.from_pretrained(
    config["bert_pretrained"], cased=True)

train_dataset = get_prepared_dataset(
    DATA.joinpath("train.jsonl"),
    tokenizer,
    config["device"])

train_loader = data.BucketIterator(
    train_dataset,
    config["batch_size"],
    device=config["device"],
    sort_key=lambda x: len(x.x),
    shuffle=True
)

val_dataset = get_prepared_dataset(
    DATA.joinpath("dev.jsonl"),
    tokenizer,
    config["device"])

val_loader = data.BucketIterator(
    val_dataset,
    128,
    device=config["device"],
    sort_key=lambda x: len(x.x),
    shuffle=True
)

test_dataset = get_prepared_dataset(
    DATA.joinpath("test.jsonl"),
    tokenizer,
    config["device"])

test_loader = data.BucketIterator(
    test_dataset,
    128,
    device=config["device"],
    sort_key=lambda x: len(x.x),
    shuffle=True
)

print(
    len(val_loader),
    len(train_loader)
)

26 197


In [0]:
model = BoolqClassifier(config)
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)
count_parameters(model)

logger = WandbLogger(
    offline=False,
    name="bert_baseline_last_layer_classifier",
    project="boolq",
    entity="morgachev",
)
logger.log_hyperparams(config)
# logger.watch(model, log_freq=6)

In [14]:
early_stop_callback = pl.callbacks.early_stopping.EarlyStopping(
    monitor='avg_val_loss',
    min_delta=0.00,
    patience=5,
    verbose=True,
    mode='min'
)
trainer = pl.Trainer(
    logger=logger,
    gpus=1,
    early_stop_callback=early_stop_callback,
    # fast_dev_run=True,
#     overfit_pct=0.1,
#     train_percent_check=0.1,
#     val_percent_check=0.1,
#     test_percent_check=0.1,
#     val_check_interval=0.1
#     auto_lr_find=True,
#     row_log_interval=10
 )
trainer.fit(model)
wandb.save("debug.ipynb")

GPU available: True, used: True
No environment variable for node rank defined. Set as 0.
CUDA_VISIBLE_DEVICES: [0]

    | Name                                             | Type              | Params
-----------------------------------------------------------------------------------
0   | bert                                             | BertModel         | 177 M 
1   | bert.embeddings                                  | BertEmbeddings    | 92 M  
2   | bert.embeddings.word_embeddings                  | Embedding         | 91 M  
3   | bert.embeddings.position_embeddings              | Embedding         | 393 K 
4   | bert.embeddings.token_type_embeddings            | Embedding         | 1 K   
5   | bert.embeddings.LayerNorm                        | LayerNorm         | 1 K   
6   | bert.embeddings.dropout                          | Dropout           | 0     
7   | bert.encoder                                     | BertEncoder       | 85 M  
8   | bert.encoder.layer                    

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validation sanity check', layout=Layout…





HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Training', layout=Layout(flex='2'), max…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…



HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

Epoch 00007: early stopping





[]

In [15]:
print(2)

2


In [0]:
from tqdm.notebook import tqdm
from sklearn import metrics

def evaluate(loader):
    y_true = []
    y_pred = []

    model.eval()
    with torch.no_grad():
        for i, batch in tqdm(enumerate(loader), total=len(loader)):
            output = model(batch.x, batch.type_ids)
            y_pred.append(torch.sigmoid(output).cpu().numpy())
            y_true.append(batch.ans.cpu().numpy())

    y_true = np.concatenate(y_true)
    y_pred = np.concatenate(y_pred) > 0.5

    acc = metrics.accuracy_score(y_true, y_pred)
    f1 = metrics.f1_score(y_true, y_pred)
    prec = metrics.precision_score(y_true, y_pred)
    rec = metrics.recall_score(y_true, y_pred)

    print(f"Accuracy: {acc:.2f}\n"+
          f"Precision: {prec:.2f}\n"+
          f"Recall: {rec:.2f}\n"+
          f"F1: {f1:.2f}")
    
    return acc, prec, rec, f1


In [49]:
_ = evaluate(test_loader)

HBox(children=(FloatProgress(value=0.0, max=3.0), HTML(value='')))


Accuracy: 0.49
Precision: 0.37
Recall: 0.86
F1: 0.52


In [50]:
_ = evaluate(val_loader)

HBox(children=(FloatProgress(value=0.0, max=26.0), HTML(value='')))


Accuracy: 0.70
Precision: 0.75
Recall: 0.77
F1: 0.76
