In [1]:
!pip install jsonlines pytorch_pretrained_bert



In [2]:
from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [0]:
PATH = '/content/gdrive/My Drive/Colab Notebooks/NLP/CommonsenseQA'

In [0]:
import sys
sys.path.append(PATH)

# Finetuning

In [5]:
import logging
import numpy as np
import torch
from finetuning.bert import *

logging.basicConfig(format = '%(asctime)s - %(levelname)s - %(name)s -   %(message)s',
                    datefmt = '%m/%d/%Y %H:%M:%S',
                    level = logging.INFO)
logger = logging.getLogger(__name__)

Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex.


In [0]:
parser = init_parser(PATH=PATH)
args = parser.parse_args('')
args.output_dir = os.path.join(PATH, 'bert-freeze-emb')

In [0]:
train_file = 'train_rand_split.jsonl'
val_file = 'dev_rand_split.jsonl'

CONFIG_NAME = 'bert_config.json'
WEIGHTS_NAME = 'pytorch_model.bin'

In [8]:
finetune(args, train_file, val_file, WEIGHTS_NAME, CONFIG_NAME, freeze_emb=True)

03/17/2019 10:08:53 - INFO - finetuning.bert -   device: cuda n_gpu: 1, distributed training: False, 16-bits training: False
03/17/2019 10:08:53 - INFO - pytorch_pretrained_bert.tokenization -   loading vocabulary file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-vocab.txt from cache at /root/.pytorch_pretrained_bert/26bc1ad6c0ac742e9b52263248f6d0f00068293b33709fae12320c0e35ccfbbb.542ce4285a40d23a559526243235df47c5f75c197f04f37d1a0c124c32c9a084
03/17/2019 10:08:54 - INFO - pytorch_pretrained_bert.modeling -   loading archive file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased.tar.gz from cache at /root/.pytorch_pretrained_bert/distributed_-1/9c41111e2de84547a463fd39217199738d1e3deb72d4fec4399e6e241983c6f0.ae3cef932725ca7a30cdcb93fc6e09150a55e2a130ec7af63975a16c153ae2ba
03/17/2019 10:08:54 - INFO - pytorch_pretrained_bert.modeling -   extracting archive file /root/.pytorch_pretrained_bert/distributed_-1/9c41111e2de84547a463fd39217199738d1e

# Evaluate performance

In [0]:
import os
from finetuning.bert import *
from tqdm import tqdm

In [0]:
data_dir = os.path.join(PATH, 'data')
output_dir = os.path.join(PATH, 'bert-freeze-emb')
bert_model = 'bert-base-uncased'

CONFIG_NAME = 'bert_config.json'
WEIGHTS_NAME = 'pytorch_model.bin'

output_config_file = os.path.join(output_dir, CONFIG_NAME)
output_model_file = os.path.join(output_dir, WEIGHTS_NAME)

In [0]:
parser = init_parser(PATH=PATH)
args = parser.parse_args('')

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [0]:
config = BertConfig(output_config_file)
model = BertForMultipleChoice(config, num_choices=3)
model.load_state_dict(torch.load(output_model_file))
model.to(device);

In [0]:
val_file = 'dev_rand_split.jsonl'
train_file = 'train_rand_split.jsonl'

max_seq_length = args.max_seq_length

In [15]:
tokenizer = BertTokenizer.from_pretrained(bert_model, do_lower_case=True)
eval_examples = read_qa_examples(os.path.join(data_dir, train_file), is_training = False)
eval_features = convert_examples_to_features(eval_examples, tokenizer, max_seq_length, True)

all_input_ids = torch.tensor(select_field(eval_features, 'input_ids'), dtype=torch.long)
all_input_mask = torch.tensor(select_field(eval_features, 'input_mask'), dtype=torch.long)
all_segment_ids = torch.tensor(select_field(eval_features, 'segment_ids'), dtype=torch.long)
all_label = torch.tensor([f.label for f in eval_features], dtype=torch.long)
eval_data = TensorDataset(all_input_ids, all_input_mask, all_segment_ids, all_label)

03/17/2019 11:13:35 - INFO - pytorch_pretrained_bert.tokenization -   loading vocabulary file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-vocab.txt from cache at /root/.pytorch_pretrained_bert/26bc1ad6c0ac742e9b52263248f6d0f00068293b33709fae12320c0e35ccfbbb.542ce4285a40d23a559526243235df47c5f75c197f04f37d1a0c124c32c9a084
03/17/2019 11:13:35 - INFO - finetuning.bert -   *** Example ***
03/17/2019 11:13:35 - INFO - finetuning.bert -   choice: 0
03/17/2019 11:13:35 - INFO - finetuning.bert -   tokens: [CLS] if a lantern is not for sale , where is it likely to be ? [SEP] antique shop [SEP]
03/17/2019 11:13:35 - INFO - finetuning.bert -   input_ids: 101 2065 1037 12856 2003 2025 2005 5096 1010 2073 2003 2009 3497 2000 2022 1029 102 14361 4497 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
03/17/2019 11:13:35 - INFO - finetuning.bert -   input_mask: 1

In [0]:
eval_sampler = SequentialSampler(eval_data)
eval_dataloader = DataLoader(eval_data, sampler=eval_sampler, batch_size=8)

model.eval();

In [17]:
eval_loss, eval_accuracy = 0, 0
nb_eval_steps, nb_eval_examples = 0, 0
for input_ids, input_mask, segment_ids, label_ids in tqdm(eval_dataloader):
    input_ids = input_ids.to(device)
    input_mask = input_mask.to(device)
    segment_ids = segment_ids.to(device)
    label_ids = label_ids.to(device)

    with torch.no_grad():
        tmp_eval_loss = model(input_ids, segment_ids, input_mask, label_ids)
        logits = model(input_ids, segment_ids, input_mask)

    logits = logits.detach().cpu().numpy()
    label_ids = label_ids.to('cpu').numpy()
    tmp_eval_accuracy = accuracy(logits, label_ids)

    eval_loss += tmp_eval_loss.mean().item()
    eval_accuracy += tmp_eval_accuracy

    nb_eval_examples += input_ids.size(0)
    nb_eval_steps += 1

eval_loss = eval_loss / nb_eval_steps
eval_accuracy = eval_accuracy / nb_eval_examples

result = {'train_loss': eval_loss,
          'train_accuracy': eval_accuracy}

100%|██████████| 952/952 [10:46<00:00,  1.87it/s]


In [18]:
result

{'train_accuracy': 0.985676741130092, 'train_loss': 0.0566954360129028}

In [19]:
tokenizer = BertTokenizer.from_pretrained(bert_model, do_lower_case=True)
eval_examples = read_qa_examples(os.path.join(data_dir, val_file), is_training = False)
eval_features = convert_examples_to_features(eval_examples, tokenizer, max_seq_length, True)

all_input_ids = torch.tensor(select_field(eval_features, 'input_ids'), dtype=torch.long)
all_input_mask = torch.tensor(select_field(eval_features, 'input_mask'), dtype=torch.long)
all_segment_ids = torch.tensor(select_field(eval_features, 'segment_ids'), dtype=torch.long)
all_label = torch.tensor([f.label for f in eval_features], dtype=torch.long)
eval_data = TensorDataset(all_input_ids, all_input_mask, all_segment_ids, all_label)

eval_sampler = SequentialSampler(eval_data)
eval_dataloader = DataLoader(eval_data, sampler=eval_sampler, batch_size=8)

model.eval();

eval_loss, eval_accuracy = 0, 0
nb_eval_steps, nb_eval_examples = 0, 0
for input_ids, input_mask, segment_ids, label_ids in tqdm(eval_dataloader):
    input_ids = input_ids.to(device)
    input_mask = input_mask.to(device)
    segment_ids = segment_ids.to(device)
    label_ids = label_ids.to(device)

    with torch.no_grad():
        tmp_eval_loss = model(input_ids, segment_ids, input_mask, label_ids)
        logits = model(input_ids, segment_ids, input_mask)

    logits = logits.detach().cpu().numpy()
    label_ids = label_ids.to('cpu').numpy()
    tmp_eval_accuracy = accuracy(logits, label_ids)

    eval_loss += tmp_eval_loss.mean().item()
    eval_accuracy += tmp_eval_accuracy

    nb_eval_examples += input_ids.size(0)
    nb_eval_steps += 1

eval_loss = eval_loss / nb_eval_steps
eval_accuracy = eval_accuracy / nb_eval_examples

result = {'dev_loss': eval_loss,
          'dev_accuracy': eval_accuracy}
print(result)

03/17/2019 11:33:31 - INFO - pytorch_pretrained_bert.tokenization -   loading vocabulary file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-vocab.txt from cache at /root/.pytorch_pretrained_bert/26bc1ad6c0ac742e9b52263248f6d0f00068293b33709fae12320c0e35ccfbbb.542ce4285a40d23a559526243235df47c5f75c197f04f37d1a0c124c32c9a084
03/17/2019 11:33:31 - INFO - finetuning.bert -   *** Example ***
03/17/2019 11:33:31 - INFO - finetuning.bert -   choice: 0
03/17/2019 11:33:31 - INFO - finetuning.bert -   tokens: [CLS] why do people who are dying receive social security payments ? [SEP] born again [SEP]
03/17/2019 11:33:31 - INFO - finetuning.bert -   input_ids: 101 2339 2079 2111 2040 2024 5996 4374 2591 3036 10504 1029 102 2141 2153 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
03/17/2019 11:33:31 - INFO - finetuning.bert -   input_mask: 1 1 1 1 1 1

{'dev_loss': 1.345569231179582, 'dev_accuracy': 0.611578947368421}



