In [2]:
!pip install ipywidgets==7.6.0

[0m

In [3]:
!pip install -r ../src/requirements.txt

[0m

In [4]:
import numpy as np
import pytorch_lightning as pl
import torch.nn.functional as F
import torch
import torch.nn as nn
import os

from torch.utils.data import DataLoader, random_split
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from datasets import load_dataset, load_metric
from pytorch_lightning.callbacks import ModelCheckpoint

  warn(f"Failed to load image Python extension: {e}")


In [5]:
class GrammarModel(pl.LightningModule):
    def __init__(self, lr=2e-5, eps=1e-8, batch_size=32):
        super().__init__()
        self.save_hyperparameters()
        self.lr = lr
        self.eps = eps
        self.batch_size = batch_size
        self.num_classes = 2
        self.model = AutoModelForSequenceClassification.from_pretrained(
            "distilbert-base-uncased", num_labels=self.num_classes
        )
        self.tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")

        self.metric = load_metric("glue", "cola")
        self.criterion = nn.CrossEntropyLoss()
        
        self.predictions = []
        self.references = []

    def prepare_data(self):
        cola_dataset = load_dataset("glue", "cola")
        self.train_data = cola_dataset["train"]
        self.val_data = cola_dataset["validation"]
        
        self.train_data = self.train_data.map(self.tokenize_sentence, batched=True)
        self.val_data = self.val_data.map(self.tokenize_sentence, batched=True)
        
        self.train_data.set_format(
                type="torch", columns=["input_ids", "attention_mask", "label"]
            )
        self.val_data.set_format(
                type="torch", columns=["input_ids", "attention_mask", "label"]
            )

    def tokenize_sentence(self, batch):
        outputs = self.tokenizer(
                    batch["sentence"],
                    max_length=128,
                    truncation=True,
                    padding="max_length",
                    )
        
        return outputs

    def forward(self, batch):
        input_ids, attention_mask = batch["input_ids"], batch["attention_mask"]
        outputs = self.model(input_ids, attention_mask=attention_mask)
        logits = outputs.logits
        
        return logits

    def train_dataloader(self):
        return torch.utils.data.DataLoader(
            self.train_data, batch_size=self.batch_size, shuffle=True
        )

    def val_dataloader(self):
        return torch.utils.data.DataLoader(
            self.val_data, batch_size=self.batch_size, shuffle=False
        )

    def training_step(self, batch, batch_idx):
        labels = batch["label"]
        logits = self.forward(batch)
        loss = self.criterion(logits.view(-1, self.num_classes), labels)
        
        self.log("train_loss", loss, prog_bar=True)
        
        return loss

    def validation_step(self, batch, batch_idx):
        labels = batch["label"]
        logits = self.forward(batch)
        loss = self.criterion(logits.view(-1, self.num_classes), labels)
        preds = torch.argmax(logits, dim=1)
        
        self.log("valid_loss", loss, prog_bar=True)
        self.predictions.append(preds)
        self.references.append(labels)

    def on_validation_epoch_end(self):
        predictions = torch.concat(self.predictions).view(-1)
        references = torch.concat(self.references).view(-1)
        matthews_correlation = self.metric.compute(
            predictions=predictions, references=references
        )
        
        self.log_dict(matthews_correlation, sync_dist=True, prog_bar=True)
        self.predictions.clear()
        self.references.clear()

    def configure_optimizers(self):
        return torch.optim.AdamW(self.parameters(), lr=self.lr, eps=self.eps)

In [6]:
LOCAL_DIR = "local"
VERSION = 1
os.makedirs(LOCAL_DIR, exist_ok=True)

checkpoint_callback = ModelCheckpoint(
    dirpath=f"{LOCAL_DIR}/models", monitor="valid_loss", mode="min"
)
    
model = GrammarModel()

trainer = pl.Trainer(
    default_root_dir=f"{LOCAL_DIR}/logs",
    accelerator=("gpu" if torch.cuda.is_available() else "cpu"),
    max_epochs=5,
    fast_dev_run=False,
    logger=pl.loggers.TensorBoardLogger(f"{LOCAL_DIR}/logs/", name="cola", version=VERSION),
    callbacks=[checkpoint_callback],
)
    
trainer.fit(model)

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['pre_classifier.weight', 'pre_classifier.bias', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  self.metric = load_metric("glue", "cola")
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


Downloading builder script:   0%|          | 0.00/28.8k [00:00<?, ?B/s]

Downloading metadata:   0%|          | 0.00/28.7k [00:00<?, ?B/s]

Downloading readme:   0%|          | 0.00/27.9k [00:00<?, ?B/s]

Map:   0%|          | 0/8551 [00:00<?, ? examples/s]

Map:   0%|          | 0/1043 [00:00<?, ? examples/s]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name      | Type                                | Params
------------------------------------------------------------------
0 | model     | DistilBertForSequenceClassification | 67.0 M
1 | criterion | CrossEntropyLoss                    | 0     
------------------------------------------------------------------
67.0 M    Trainable params
0         Non-trainable params
67.0 M    Total params
267.820   Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(
  rank_zero_warn(


Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_epochs=5` reached.
