Setup TPU

In [None]:
VERSION = "nightly" #@param ["1.5", "20200325", "nightly", "20200516"]
!curl https://raw.githubusercontent.com/pytorch/xla/master/contrib/scripts/env-setup.py -o pytorch-xla-env-setup.py
!python pytorch-xla-env-setup.py --version $VERSION

In [None]:
!pip install transformers

XLA Libraries

In [None]:
import os
assert os.environ['COLAB_TPU_ADDR']
# imports the torch_xla libraries for TPU usage
import torch_xla.core.xla_model as xm
import torch_xla.distributed.parallel_loader as pl
import torch_xla.distributed.xla_multiprocessing as xmp

Libraries for Network

In [None]:
import numpy as np
import torch
import torch.nn as nn
import pandas as pd
from torch.utils import data
import transformers
from google.colab import drive
from sklearn.model_selection import train_test_split
import time

# Ignore warnings
from typing import Tuple
import warnings
warnings.filterwarnings("ignore")

Get Data from Google Drive

In [None]:
# Mount to relevant Google Drive folder
drive.mount('/content/drive', force_remount=True)
data_location = '/content/drive/My Drive/FakeNews/tri_sentence_news.csv'

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly&response_type=code

Enter your authorization code:
··········
Mounted at /content/drive


Pretraining Functions

In [None]:
class AttrDict(dict):
    """
    Object used for storing neural net training specifications
    """
    def __init__(self, *args, **kwargs):
        super(AttrDict, self).__init__(*args, **kwargs)
        self.__dict__ = self

class NewsDataset(data.Dataset):
    """
    Dataset object for custom NLP News dataset
    """
    def __init__(self, txt: pd.Series[str], target: pd.Series[str],
                 args: AttrDict):
        """
        :param txt: Each observation corresponding to a news article.
        :param target: Whether the news article is fake news or not.
        :param args: Neural Network training specifications.
        """
        self.txt = txt.values
        self.target = target.values
        self.tokenizer = args.tokenizer
        self.max_len = args.max_len

    def __len__(self) -> int:
        """ Returns length of self

        :returns: int
        """
        return len(self.txt)

    def __getitem__(self, id: int) -> dict:
        """
        Returns item located at index <id>

        :param id: Index of row to be retrieved.
        :return: dict
        """
        text = self.txt[id]

        encoding = self.tokenizer.encode_plus(
            text,
            max_length=self.max_len,
            add_special_tokens=True,
            pad_to_max_length=True,
            return_attention_mask=True,
            return_token_type_ids=False,
            return_tensors='pt',
            truncation=True
        )
        return {
            'text': text,
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            'targets': torch.tensor(self.target[id], dtype=torch.long)
            }

def create_dataloader(news: pd.DataFrame, args: AttrDict,
                      sampler: data.Sampler,
                      validation=False) -> data.DataLoader:
    """
    Returns a DataLoader for the <news> dataset.

    :param news: Dataset.
    :param args: Neural Network training specifications.
    :param sampler: Sampler for DataLoader.
    :param validation: Whether or not the DataLoader is for validation or
            training.
    :type validation: bool
    :return: data.DataLoader
    """
    batch_size = args.val_batch_size if validation else args.batch_size
    df = NewsDataset(
        txt=news['Text'],
        target=news['Real'],
        args=args)
    return data.DataLoader(
        df,
        batch_size=batch_size,
        num_workers=args.num_workers,
        sampler=sampler,
        drop_last=True
    )

In [None]:
def get_dataset(args: AttrDict) -> Tuple[pd.DataFrame]:
    """
    Returns a tuple of DataFrames, training and validation

    :param args: Neural Network training specifications.
    :return: Tuple[pd.DataFrame]
    """
    data = pd.read_csv(data_location)
    return train_test_split(data, test_size=0.2)

HBox(children=(FloatProgress(value=0.0, description='Downloading', max=213450.0, style=ProgressStyle(descripti…




Model

In [None]:
class SentimentClassifier(nn.Module):
    """
    Neural Network for NLP classification
    """
    def __init__(self, args: AttrDict):
        """
        :param args: Neural Network training specifications.
        """
        super(SentimentClassifier, self).__init__()
        self.bert = transformers.BertModel.from_pretrained(args.bert_type)
        self.dropout = nn.Dropout(0.3)
        self.out = nn.Linear(self.bert.config.hidden_size, args.classes)
        self.softmax = nn.Softmax(dim=1)

    def forward(self, input_ids: torch.Tensor,
                attention_mask: torch.Tensor) -> torch.Tensor:
        """
        Neural network forward propagation

        :param input_ids: Id's of input tokens.
        :param attention_mask: Id's of which tokens should be attended to and
                which should not.
        :return: torch.Tensor
        """
        _, pooled_output = self.bert(
            input_ids=input_ids,
            attention_mask=attention_mask
            )
        output = self.dropout(pooled_output)
        output = self.out(output)
        return self.softmax(output)

HBox(children=(FloatProgress(value=0.0, description='Downloading', max=433.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=435779157.0, style=ProgressStyle(descri…




Training Functions

In [None]:
def train(args: AttrDict, train_loader: data.DataLoader,
          val_loader: data.DataLoader, device) -> SentimentClassifier:
    """
    Returns a fine-tuned BERT neural network

    :param args: Neural Network training specifications.
    :param train_loader: Training DataLoader.
    :param val_loader: Validation DataLoader.
    :param device: XLA TPU device.
    :type device: xm.xla_device.
    :return: SentimentClassifier
    """
    model = WRAPPED_MODEL.to(device)
    optimizer = transformers.AdamW(model.parameters(),
                                   lr=args.learn_rate * xm.xrt_world_size())
    loss_fn = nn.CrossEntropyLoss().to(device)
    train_losses, train_accs = [], []

    def train_loop(loader: data.DataLoader) -> Tuple[float, float]:
         """
        Train neural network over epoch. Returns training accuracy and loss.

        :param loader: Training DataLoader.
        :return: Tuple[float, float]
        """
        model.train()
        losses = []
        correct_predictions = 0
        n_examples = 0
        for step, batch in enumerate(loader):
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            targets = batch['targets'].to(device)

            optimizer.zero_grad()
            outputs = model(input_ids=input_ids, attention_mask=attention_mask)
            loss = loss_fn(outputs, targets)
            loss.backward()
            nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            xm.optimizer_step(optimizer)

            _, predictions = torch.max(outputs, dim=1)
            correct_predictions += torch.sum(predictions == targets)
            losses.append(loss.item())
            n_examples += args.batch_size

        return 100 * correct_predictions / n_examples, np.mean(losses)

    def eval_loop(loader: data.DataLoader) -> float:
        """
        Evaluate neural network over epoch. Returns validation accuracy.

        :param loader: Validation DataLoader.
        :return: float
        """
        model.eval()
        correct_predictions = 0
        n_examples = 0
        losses = []
        for step, batch in enumerate(loader):
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            targets = batch['targets'].to(device)

            outputs = model(input_ids=input_ids, attention_mask=attention_mask)
            _, predictions = torch.max(outputs, dim=1)
            loss = loss_fn(outputs, targets)
            correct_predictions += torch.sum(predictions == targets)
            losses.append(loss.item())
            n_examples += args.val_batch_size

        return 100 * correct_predictions / n_examples, np.mean(losses)

    for epoch in range(args.epochs):
        start = time.time()
        para_loader = pl.ParallelLoader(train_loader, [device])
        train_acc, train_loss = train_loop(
            para_loader.per_device_loader(device)
            )
        train_losses.append(train_loss)
        train_accs.append(train_acc)
        end = time.time()
        xm.master_print(("Epoch [%d/%d], Train Loss: %.4f, "
                         "Train Accuracy: %.2f%%, Time (m): %d") % (
            epoch + 1,
            args.epochs,
            train_loss,
            train_acc,
            (end - start) / 60)
                        )
    xm.save(model.state_dict(), 'model.pt')

    start = time.time()
    para_loader = pl.ParallelLoader(val_loader, [device])
    val_acc, val_loss = eval_loop(para_loader.per_device_loader(device))
    end = time.time()
    xm.master_print(
        ("Validation Loss: %.4f, "
         "Validation Accuracy: %.2f%%, Time (m): %d") % (
            val_loss, val_acc, (end - start) / 60)
            )
    return model

In [None]:
def run(index, args: AttrDict) -> None:
    """
    Fine-tune BERT neural network with the given specifications in <args>,
    using a TPU.

    :param index: TPU index. Users can ignore.
    :param args: Neural Network training specifications.
    :return: None
    """
    torch.manual_seed(args.seed) # set seed
    device = xm.xla_device()  # Set device after Spawn call to get all 8 cores
    news_train, news_val = SERIAL_EXEC.run(get_dataset)
    train_sampler = torch.utils.data.DistributedSampler(
        news_train,
        num_replicas=xm.xrt_world_size(),
        rank=xm.get_ordinal(),
        shuffle=True
        )
    train_dataloader = create_dataloader(news=news_train,
                                         args=args,
                                         sampler=train_sampler)
    val_dataloader = create_dataloader(news=news_val,
                                       args=args,
                                       sampler=None,
                                       validation=True
    train(args, train_dataloader, val_dataloader, device)


Training Specifications

In [None]:
args = AttrDict()
args_dict = {
    'learn_rate': 1e-5, 
    'batch_size': 64,
    'val_batch_size': 16,
    'epochs': 5, 
    'seed': 10,
    'max_len': 200,
    'classes': 2,
    'bert_type': 'bert-base-cased',
    'tokenizer': transformers.BertTokenizer.from_pretrained('bert-base-cased'),
    'num_workers': 0
}
args.update(args_dict)

SERIAL_EXEC = xmp.MpSerialExecutor()
# Only instantiate model weights once in memory.
WRAPPED_MODEL = xmp.MpModelWrapper(SentimentClassifier(args))

Train Model

In [None]:
# can only use 'fork' start method on Colab
xmp.spawn(run, args=(args,), nprocs=8, start_method='fork')

Epoch [1/5], Train Loss: 0.3445, Train Accuracy: 96.00%, Time (m): 11
Epoch [2/5], Train Loss: 0.3282, Train Accuracy: 98.00%, Time (m): 11
Epoch [3/5], Train Loss: 0.3250, Train Accuracy: 98.00%, Time (m): 10
Epoch [4/5], Train Loss: 0.3231, Train Accuracy: 98.00%, Time (m): 10
Epoch [5/5], Train Loss: 0.3223, Train Accuracy: 99.00%, Time (m): 10
Validation Loss: 0.3233, Validation Accuracy: 98.00%, Time (m): 15


In [None]:
model = SentimentClassifier(args)
model.load_state_dict(torch.load('/content/drive/My Drive/FakeNews/model.bin'))
device = 'cuda'
model = model.to(device)