In [1]:
!curl https://raw.githubusercontent.com/pytorch/xla/master/contrib/scripts/env-setup.py -o pytorch-xla-env-setup.py
!python pytorch-xla-env-setup.py --version nightly --apt-packages libomp5 libopenblas-dev

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  5116  100  5116    0     0  32794      0 --:--:-- --:--:-- --:--:-- 32794
Updating... This may take around 2 minutes.
Updating TPU runtime to pytorch-nightly ...
Found existing installation: torch 1.7.0
Uninstalling torch-1.7.0:
Done updating TPU runtime
  Successfully uninstalled torch-1.7.0
Found existing installation: torchvision 0.8.1
Uninstalling torchvision-0.8.1:
  Successfully uninstalled torchvision-0.8.1
Copying gs://tpu-pytorch/wheels/torch-nightly-cp37-cp37m-linux_x86_64.whl...
\ [1 files][132.2 MiB/132.2 MiB]                                                
Operation completed over 1 objects/132.2 MiB.                                    
Copying gs://tpu-pytorch/wheels/torch_xla-nightly-cp37-cp37m-linux_x86_64.whl...
\ [1 files][138.4 MiB/138.4 MiB]                                                
Operation complet

Here we import all the PyTorch XLA-specific modules.

In [2]:
import torch_xla
import torch_xla.debug.metrics as met
import torch_xla.distributed.parallel_loader as pl
import torch_xla.utils.utils as xu
import torch_xla.core.xla_model as xm
import torch_xla.distributed.xla_multiprocessing as xmp
import torch_xla.test.test_utils as test_utils

import warnings
warnings.filterwarnings("ignore")

In [3]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch import optim
from torch.utils.data import DataLoader
import torch.nn.functional as F


import torchvision
from torchvision import transforms

from tqdm.notebook import tqdm
from torch import nn
from transformers import RobertaModel
import torch
from torch.utils.data import Dataset
import pandas as pd
from transformers import RobertaTokenizer
from torch import optim
from torch import cuda
from torch.utils.data import DataLoader

In [4]:
class EntailmentDataset(Dataset):
    def __init__(self, entailment_csv, max_length):
        self.dataset = pd.read_csv(entailment_csv)
        self.tokenizer = RobertaTokenizer.from_pretrained("roberta-base")
        self.max_length = max_length

    def __len__(self):
        return len(self.dataset)

    def __getitem__(self,idx):
        hypothesis = self.dataset.loc[idx, ["hypothesis"]].values[0]
        premise = self.dataset.loc[idx, ["premise"]].values[0]
        label = self.dataset.loc[idx, ["label"]].values[0]
        inputs = self.tokenizer.encode_plus(
            hypothesis,
            premise,
            add_special_tokens=True,
            max_length=self.max_length,
            pad_to_max_length=True,
            return_token_type_ids=True
        )
        ids = inputs['input_ids']
        mask = inputs['attention_mask']
        token_type_ids = inputs["token_type_ids"]
        return {
            'input_ids': torch.tensor(ids, dtype=torch.long),
            'attention_mask': torch.tensor(mask, dtype=torch.long),
            'token_type_ids': torch.tensor(token_type_ids, dtype=torch.long),
            "labels": torch.tensor([label], dtype=torch.float) 
        }

In [9]:
class RoBERTaNLI(nn.Module):
    def __init__(self):
        super().__init__()
        self.l1 = RobertaModel.from_pretrained("roberta-base")
        self.linear = torch.nn.Linear(768, 3)

    def forward(self, input_ids, attention_mask, token_type_ids):
        output_1 = self.l1(input_ids=input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids)
        hidden_state = output_1[0]
        embeddings = hidden_state[:,0,:]
        positive_embedddings = torch.nn.ReLU()(embeddings)
        return F.log_softmax(self.linear(positive_embeddings), dim=1)

In [10]:
def train(epochs, batch_size, data, model, device):
    criterion = nn.NLLLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.0001)
    for epoch in range(0,epochs):
        model.train().to(device)
        for i, input_data in enumerate(data):
            print('Batch #', i)
            input_ids = input_data['input_ids'].to(device)
            attention_mask = input_data['attention_mask'].to(device)
            token_type_ids = input_data['token_type_ids'].to(device)
            labels = input_data['labels'].to(device).squeeze()
            output = model(input_ids, attention_mask, token_type_ids)
            optimizer.zero_grad()
            loss = criterion(output, labels)
            loss.backward()
            optimizer.step()
        print({"Epoch": epoch, "Loss": loss.item()})
    return

In [11]:
device = xm.xla_device()
batch_size=100
epochs=1
dataset  = EntailmentDataset("../input/contradictory-my-dear-watson/train.csv", max_length=259)

data_loader = DataLoader(dataset, batch_size=batch_size)
model = RoBERTaNLI()
train(epochs, batch_size, data_loader, model, device)

Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.


Batch # 0
pooler torch.Size([100, 768])
Batch # 1
pooler torch.Size([100, 768])
Batch # 2
pooler torch.Size([100, 768])
Batch # 3
pooler torch.Size([100, 768])
Batch # 4
pooler torch.Size([100, 768])
Batch # 5
pooler torch.Size([100, 768])
Batch # 6
pooler torch.Size([100, 768])
Batch # 7
pooler torch.Size([100, 768])
Batch # 8
pooler torch.Size([100, 768])
Batch # 9
pooler torch.Size([100, 768])
Batch # 10
pooler torch.Size([100, 768])
Batch # 11
pooler torch.Size([100, 768])
Batch # 12
pooler torch.Size([100, 768])
Batch # 13
pooler torch.Size([100, 768])
Batch # 14
pooler torch.Size([100, 768])
Batch # 15
pooler torch.Size([100, 768])
Batch # 16
pooler torch.Size([100, 768])


KeyboardInterrupt: 