In [None]:
! pip install lightning transformers rich -q

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.9/1.9 MB[0m [31m9.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.6/7.6 MB[0m [31m25.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m66.4/66.4 kB[0m [31m8.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m71.3/71.3 kB[0m [31m9.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m66.2/66.2 kB[0m [31m8.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m660.0/660.0 kB[0m [31m30.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m45.7/45.7 kB[0m [31m5.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m69.9/69.9 kB[0m [31m8.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [None]:
! cp /content/drive/MyDrive/ERAv1/S17/S17.zip .
! unzip S17.zip

In [None]:
! cp /content/S17/BERT/names.tsv .
! cp /content/S17/BERT/values.tsv .
! cp /content/S17/BERT/training.txt .
! cp /content/S17/BERT/vocab.txt .

In [None]:
import re
import torch
import random
import logging
import warnings
import numpy as np
import pandas as pd
import seaborn as sn
from torch import nn
from os.path import exists
import lightning.pytorch as pl
import matplotlib.pyplot as plt
from collections import Counter
from torch.nn import functional as F
from dataset import SentencesDataset
from transformer import EncoderTransformer
from lightning.pytorch.loggers import TensorBoardLogger
from lightning.pytorch.callbacks import LearningRateMonitor, RichProgressBar

In [None]:
batch_size = 1024
seq_len = 20
embed_size = 128
inner_ff_size = embed_size * 4
n_heads = 8
n_code = 8
n_vocab = 40000
dropout = 0.1

In [None]:
optim_kwargs = {'lr':1e-4, 'weight_decay':1e-4, 'betas':(.9,.999)}

pth = 'training.txt'
sentences = open(pth).read().lower().split('\n')

print('tokenizing sentences...')
special_chars = ',?;.:/*!+-()[]{}"\'&'
sentences = [re.sub(f'[{re.escape(special_chars)}]', ' \g<0> ', s).split(' ') for s in sentences]
sentences = [[w for w in s if len(w)] for s in sentences]

print('creating/loading vocab...')
pth = 'vocab.txt'
if not exists(pth):
    words = [w for s in sentences for w in s]
    vocab = Counter(words).most_common(n_vocab)
    vocab = [w[0] for w in vocab]
    open(pth, 'w+').write('\n'.join(vocab))
else:
    vocab = open(pth).read().split('\n')

print('creating dataset...')
dataset = SentencesDataset(sentences, vocab, seq_len)
kwargs = {'shuffle':True,  'drop_last':True, 'pin_memory':True, 'batch_size':batch_size}
data_loader = torch.utils.data.DataLoader(dataset, **kwargs)

tokenizing sentences...
creating/loading vocab...
creating dataset...


In [None]:
class BERTLightning(pl.LightningModule):
  def __init__(self, dataset, seq_len=20, embed_dim=128, n_heads=8, n_layers=8, ff_size=128*4, dropout=0.1):
    super().__init__()
    self.seq_len = seq_len
    self.embed_dim = embed_dim
    self.n_heads = n_heads
    self.n_layers = n_layers
    self.ff_size = ff_size
    self.dropout = dropout
    self.model = EncoderTransformer(n_layers, n_heads, embed_dim, ff_size, seq_len, n_embeddings=len(dataset.vocab), dropout=dropout)
    self.criterion = nn.CrossEntropyLoss(ignore_index=dataset.IGNORE_IDX)
    self.grads = []
    self.save_hyperparameters()

  def forward(self, x):
    return self.model(x)

  def configure_optimizers(self):
    optimizer = torch.optim.Adam(self.parameters(),
                                 lr=1e-4,
                                 betas=(.9,.999),
                                 weight_decay=1e-4)
    return(optimizer)

  def model_step(self, batch):
    masked_input, masked_target = batch['input'], batch['target']
    output = self(masked_input)
    output_v = output.view(-1, output.shape[-1])
    target_v = masked_target.view(-1, 1).squeeze()
    loss = self.criterion(output_v, target_v)
    return(loss)


  def training_step(self, batch, batch_idx):
    loss = self.model_step(batch)
    self.log('train_loss', loss, prog_bar=True, on_epoch=True, on_step=True, logger=True)
    return loss


  def validation_step(self, batch, batch_idx):
    loss = self.model_step(batch)
    self.log('val_loss', loss, prog_bar=True, on_epoch=True, on_step=True, logger=True)

  def on_train_epoch_end(self):
    print(f'Δw - {np.mean(self.grads)}')
    self.grads.clear()

  def on_after_backward(self):
      grad = self.model.embeddings.weight.grad
      if grad is not None:
        grad = round(grad.abs().sum().item(), 3)
        self.grads.append(grad)
        self.log('Δw', grad, on_step=True, logger=True)

In [None]:
trainer = pl.Trainer(
                     log_every_n_steps=1,
                     check_val_every_n_epoch=1,
                     enable_model_summary=True,
                     max_epochs=15,
                     accelerator='auto',
                     devices=1 if torch.cuda.is_available() else None,
                     logger=[TensorBoardLogger("logs/", name="BERT")],
                     callbacks=[RichProgressBar(leave=True)]
                     )

INFO: GPU available: True (cuda), used: True
INFO:lightning.pytorch.utilities.rank_zero:GPU available: True (cuda), used: True
INFO: TPU available: False, using: 0 TPU cores
INFO:lightning.pytorch.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO: IPU available: False, using: 0 IPUs
INFO:lightning.pytorch.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO: HPU available: False, using: 0 HPUs
INFO:lightning.pytorch.utilities.rank_zero:HPU available: False, using: 0 HPUs


In [None]:
model = BERTLightning(dataset)
trainer.fit(model, data_loader)

INFO: LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:lightning.pytorch.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

INFO: `Trainer.fit` stopped: `max_epochs=15` reached.
INFO:lightning.pytorch.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=15` reached.
