# BERT

BERT, or Bidirectional Encoder Representations from Transformers, is a new method of pre-training language representations which obtains state-of-the-art results on a wide array of Natural Language Processing (NLP) tasks.     
It was developed by Google Research in October, 2018.

Reference    
[1] https://github.com/google-research/bert    
[2] https://github.com/huggingface/transformers    
[3] https://neptune.ai/blog/how-to-code-bert-using-pytorch-tutorial

## Use pre-trained version using transformers library by huggingface

In [1]:
import transformers
import torch.nn as nn

class BERTClassification(nn.Module):
    def __init__ (self):
        super(BERTClassification, self).__init__()
        self.bert = transformers.BertModel.from_pretrained('bert-base-cased')
        self.bert_drop = nn.Dropout(0.4)
        self.out = nn.Linear(768, 1)
        
    def forward(self, ids, mask, token_type_ids):
        _, pooledOut = self.bert(ids, attention_mask = mask,
                                token_type_ids=token_type_ids)
        bertOut = self.bert_drop(pooledOut)
        output = self.out(bertOut)
        
        return output

### Example of fine-tuning a pretrained BERT
Reference: https://huggingface.co/docs/transformers/training#finetuning-in-native-pytorch

In [None]:
from datasets import load_dataset
from transformers import AutoTokenizer

# cache_dir = "YOUR OWN CACHE DIRECTORY"

# load dataset
dataset = load_dataset(
    "yelp_review_full", 
#    cache_dir = cache_dir
)

In [None]:
def tokenize_function(examples):
    return tokenizer(examples["text"], padding="max_length", truncation=True)

tokenized_datasets = dataset.map(tokenize_function, batched=True)

In [None]:
tokenized_datasets = tokenized_datasets.remove_columns(["text"])
tokenized_datasets = tokenized_datasets.rename_column("label", "labels")
tokenized_datasets.set_format("torch")

small_train_dataset = tokenized_datasets["train"].shuffle(seed=42).select(range(1000))
small_eval_dataset = tokenized_datasets["test"].shuffle(seed=42).select(range(1000))

In [None]:
from torch.utils.data import DataLoader

train_dataloader = DataLoader(small_train_dataset, shuffle=True, batch_size=8)
eval_dataloader = DataLoader(small_eval_dataset, batch_size=8)

In [None]:
from transformers import AutoModelForSequenceClassification

model = AutoModelForSequenceClassification.from_pretrained("bert-base-cased", num_labels=2)

In [None]:
from transformers import AdamW

optimizer = AdamW(model.parameters(), lr=5e-5)

In [None]:
from transformers import get_scheduler

num_epochs = 3
num_training_steps = num_epochs * len(train_dataloader)
lr_scheduler = get_scheduler("linear", optimizer=optimizer, num_warmup_steps=0, num_training_steps=num_training_steps)

In [None]:
import torch

device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
model.to(device)

In [None]:
from tqdm.auto import tqdm

progress_bar = tqdm(range(num_training_steps))

model.train()
for epoch in range(num_epochs):
    for batch in train_dataloader:
        batch = {k: v.to(device) for k, v in batch.items()}
        outputs = model(**batch)
        loss = outputs.loss
        loss.backward()

        optimizer.step()
        lr_scheduler.step()
        optimizer.zero_grad()
        progress_bar.update(1),

In [None]:
metric = load_metric("accuracy")
model.eval()
for batch in eval_dataloader:
    batch = {k: v.to(device) for k, v in batch.items()}
    with torch.no_grad():
        outputs = model(**batch)

    logits = outputs.logits
    predictions = torch.argmax(logits, dim=-1)
    metric.add_batch(predictions=predictions, references=batch["labels"])

metric.compute()

### Example of using pre-trained BERT with a pipeline

Example of using pre-trained BERT with a pipeline for masked language modeling    
Reference: https://huggingface.co/bert-base-uncased?text=London+is+the+%5BMASK%5D+of+England

In [None]:
# Bert-base-uncased model is about 420M. So please reconsider if you really need to download this pre-trained model
from transformers import pipeline
unmasker = pipeline('fill-mask', model='bert-base-uncased')

In [None]:
unmasker('Hello I\'m a [MASK] model.')