In [None]:
!nvidia-smi

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
!pip install transformers==3.5.1
!pip install torch==1.4.0

Collecting transformers==3.5.1
  Downloading transformers-3.5.1-py3-none-any.whl (1.3 MB)
[K     |████████████████████████████████| 1.3 MB 2.3 MB/s 
Collecting sentencepiece==0.1.91
  Downloading sentencepiece-0.1.91-cp37-cp37m-manylinux1_x86_64.whl (1.1 MB)
[K     |████████████████████████████████| 1.1 MB 33.6 MB/s 
Collecting tokenizers==0.9.3
  Downloading tokenizers-0.9.3-cp37-cp37m-manylinux1_x86_64.whl (2.9 MB)
[K     |████████████████████████████████| 2.9 MB 43.7 MB/s 
Installing collected packages: tokenizers, sentencepiece, transformers
  Attempting uninstall: tokenizers
    Found existing installation: tokenizers 0.10.3
    Uninstalling tokenizers-0.10.3:
      Successfully uninstalled tokenizers-0.10.3
  Attempting uninstall: sentencepiece
    Found existing installation: sentencepiece 0.1.96
    Uninstalling sentencepiece-0.1.96:
      Successfully uninstalled sentencepiece-0.1.96
  Attempting uninstall: transformers
    Found existing installation: tr

In [None]:
from transformers import BertTokenizer, BertForPreTraining, BertForQuestionAnswering, BertModel, BertConfig
from transformers import XLMRobertaForQuestionAnswering, XLMRobertaTokenizer
import torch
import torch.nn as nn
from transformers.data.metrics.squad_metrics import compute_predictions_log_probs, compute_predictions_logits, squad_evaluate
from transformers.data.processors.squad import SquadResult, SquadV1Processor, SquadV2Processor



In [None]:
model = XLMRobertaForQuestionAnswering.from_pretrained('../input/ckpt-xlmr-xquad-pretrain/final_model')

In [None]:
tokenizer = XLMRobertaTokenizer.from_pretrained("xlm-roberta-large")

Downloading:   0%|          | 0.00/5.07M [00:00<?, ?B/s]

In [None]:
processor = SquadV1Processor()

In [None]:
train_examples = processor.get_train_examples('../input/viquad-v1','train_ViQuAD.json')
dev_examples = processor.get_dev_examples('../input/viquad-v1','dev_ViQuAD.json')

100%|██████████| 138/138 [00:11<00:00, 12.19it/s]
100%|██████████| 18/18 [00:01<00:00, 14.33it/s]


In [None]:
from transformers.data.processors.squad import squad_convert_examples_to_features

In [None]:
train_features, train_dataset = squad_convert_examples_to_features(train_examples, 
                                                       tokenizer, 
                                                       max_seq_length = 384, 
                                                       doc_stride = 128,
                                                       max_query_length = 64,
                                                       is_training = True,
                                                       return_dataset = 'pt',
                                                       threads = 10
                                                       )

convert squad examples to features:  40%|████      | 7521/18579 [01:01<01:21, 134.88it/s]Could not find answer: 'Là con đầu lòng của Augustine Washington (1694–1743) và người vợ thứ hai,' vs. 'Cha của ông, Augustine là một nhà trồng thuốc lá có sở hữu người nô lệ'
convert squad examples to features:  46%|████▌     | 8545/18579 [01:09<01:30, 110.37it/s]Could not find answer: 'vụ phân tích mẫu đất bằng phổ kế huỳnh quang tia' vs. 'phân tích mẫu đất bằng phổ kế huỳnh quang tia X'
convert squad examples to features: 100%|██████████| 18579/18579 [02:32<00:00, 122.09it/s]
add example index and unique id: 100%|██████████| 18579/18579 [00:00<00:00, 626933.67it/s]


In [None]:
del train_examples

In [None]:
dev_features, dev_dataset = squad_convert_examples_to_features(dev_examples, 
                                                       tokenizer, 
                                                       max_seq_length = 384, 
                                                       doc_stride = 128,
                                                       max_query_length = 64,
                                                       is_training = False,
                                                       return_dataset = 'pt',
                                                       threads = 10
                                                       )

convert squad examples to features: 100%|██████████| 2285/2285 [00:17<00:00, 128.08it/s]
add example index and unique id: 100%|██████████| 2285/2285 [00:00<00:00, 562341.41it/s]


In [None]:
def to_list(tensor):
    return tensor.detach().cpu().tolist()

In [None]:
import os
def evaluate(model, tokenizer, dev_dataset, dev_examples, dev_features):
    eval_sampler = SequentialSampler(dev_dataset)
    eval_dataloader = DataLoader(dev_dataset, sampler=eval_sampler, batch_size=12)
    all_results = []
#     start_time = timeit.default_timer()
    for batch in tqdm(eval_dataloader, desc="Evaluating"):
        model.eval()
        batch = tuple(t.to(device) for t in batch)
        with torch.no_grad():
            inputs = {
                "input_ids": batch[0],
                "attention_mask": batch[1],
                "token_type_ids": batch[2],
            }
            del inputs["token_type_ids"]
            example_indices = batch[3]
            outputs = model(**inputs)
        for i, example_index in enumerate(example_indices):
            eval_feature = dev_features[example_index.item()]
            unique_id = int(eval_feature.unique_id)
#             for output in outputs:
#                 print(output)
            output = [to_list(output[i]) for output in outputs]
#             output = [to_list(output) for output in outputs]
            if len(output) >= 5:
                start_logits = output[0]
                start_top_index = output[1]
                end_logits = output[2]
                end_top_index = output[3]
                cls_logits = output[4]

                result = SquadResult(
                    unique_id,
                    start_logits,
                    end_logits,
                    start_top_index=start_top_index,
                    end_top_index=end_top_index,
                    cls_logits=cls_logits,
                )
            else:
                start_logits, end_logits = output
                result = SquadResult(unique_id, start_logits, end_logits)
            all_results.append(result)
    
    output_prediction_file = os.path.join("./", "predictions_{}.json".format(""))
    output_nbest_file = os.path.join("./", "nbest_predictions_{}.json".format(""))
    output_null_log_odds_file = os.path.join("./", "null_odds_{}.json".format(""))
    predictions = compute_predictions_logits(
            dev_examples,
            dev_features,
            all_results,
            20,
            300,
            False,
            output_prediction_file,
            output_nbest_file,
            output_null_log_odds_file,
            True,
            False,
            0.0,
            tokenizer,
        )
    results = squad_evaluate(dev_examples, predictions)
    return results

In [None]:
from torch.utils.tensorboard import SummaryWriter
from torch.utils.data import DataLoader, RandomSampler, SequentialSampler
from transformers import AdamW, get_linear_schedule_with_warmup
from tqdm import trange, tqdm
device = torch.device('cuda')

In [None]:
# for param in model.bert.parameters():
#     param.requires_grad = False

In [None]:
num_epochs = 2
tb_writer = SummaryWriter()
train_sampler = RandomSampler(train_dataset)
train_dataloader = DataLoader(train_dataset, sampler=train_sampler, batch_size=4)
t_total = len(train_dataloader) // 1 * num_epochs


no_decay = ["bias", "LayerNorm.weight"]

optimizer_grouped_parameters = [
    {
        "params": [p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay)],
        "weight_decay": 0,
    },
    {"params": [p for n, p in model.named_parameters() if any(nd in n for nd in no_decay)], "weight_decay": 0.0},
]
optimizer = AdamW(optimizer_grouped_parameters, lr=2e-5, eps = 1e-8)
scheduler = get_linear_schedule_with_warmup(
    optimizer, num_warmup_steps=814, num_training_steps=t_total
)

device = torch.device('cuda')

model.to(device)

global_step = 1
epochs_trained = 0
steps_trained_in_current_epoch = 0
tr_loss, logging_loss = 0.0, 0.0

model.zero_grad()
train_iterator = trange(
    epochs_trained, int(num_epochs), desc="Epoch", disable=-1 not in [-1, 0]
)

from functools import partial
tqdm = partial(tqdm, position=0, leave=True)

for _ in train_iterator:
    epoch_iterator = tqdm(train_dataloader, desc="Iteration", disable=False)
    for step, batch in enumerate(epoch_iterator):
        model.train()
        batch = tuple(t.to(device) for t in batch)
        
        inputs = {
            "input_ids": batch[0],
            "attention_mask": batch[1],
            "token_type_ids": batch[2],
            "start_positions": batch[3],
            "end_positions": batch[4],
        }
        del inputs["token_type_ids"]
        
        outputs = model(**inputs)
        loss = outputs[0]
        loss.backward()
        tr_loss += loss.item()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1)
        optimizer.step()
        scheduler.step()
        model.zero_grad()
        global_step += 1

        if global_step % 5000 == 0:
#             output_dir = os.path.join('./', "checkpoint-{}".format(global_step))
#             model_to_save = model.module if hasattr(model, "module") else model
#             model_to_save.save_pretrained(output_dir)
#             tokenizer.save_pretrained(output_dir)
#             torch.save(optimizer.state_dict(), os.path.join(output_dir, "optimizer.pt"))
#             torch.save(scheduler.state_dict(), os.path.join(output_dir, "scheduler.pt"))
            print(" global_step = %s, average loss = %s", global_step, tr_loss/global_step)

            
output_dir = os.path.join('./', 'final_model')
model_to_save = model.module if hasattr(model, "module") else model
model_to_save.save_pretrained(output_dir)
tokenizer.save_pretrained(output_dir)

print(" global_step = %s, average loss = %s", global_step, tr_loss/global_step)

results = evaluate(model, tokenizer, dev_dataset, dev_examples, dev_features)
for key, value in results.items():
    print(key, value)

Iteration: 100%|█████████▉| 4999/5013 [53:43<00:09,  1.55it/s]

 global_step = %s, average loss = %s 5000 1.1454309169983492


Iteration: 100%|██████████| 5013/5013 [53:52<00:00,  1.55it/s]
Iteration:  99%|█████████▉| 4986/5013 [53:49<00:17,  1.51it/s]

 global_step = %s, average loss = %s 10000 0.907736317210528


Iteration: 100%|██████████| 5013/5013 [54:06<00:00,  1.54it/s]
Epoch: 100%|██████████| 2/2 [1:47:59<00:00, 3239.90s/it]
Evaluating:   0%|          | 0/199 [00:00<?, ?it/s]

 global_step = %s, average loss = %s 10027 0.9068552134443153


Evaluating: 100%|██████████| 199/199 [01:41<00:00,  1.97it/s]


exact 73.88059701492537
f1 89.59521308785466
total 2278
HasAns_exact 73.88059701492537
HasAns_f1 89.59521308785466
HasAns_total 2278
best_exact 73.88059701492537
best_exact_thresh 0.0
best_f1 89.59521308785466
best_f1_thresh 0.0


In [None]:
tokenizer_1 = XLMRobertaTokenizer.from_pretrained("xlm-roberta-large")

In [None]:
test_examples = processor.get_dev_examples('../input/viquad-v1','test_ViQuAD.json')
test_features, test_dataset = squad_convert_examples_to_features(test_examples, 
                                                       tokenizer, 
                                                       max_seq_length = 384, 
                                                       doc_stride = 128,
                                                       max_query_length = 64,
                                                       is_training = False,
                                                       return_dataset = 'pt',
                                                       threads = 10
                                                       )

100%|██████████| 18/18 [00:01<00:00, 14.47it/s]
convert squad examples to features: 100%|██████████| 2210/2210 [00:18<00:00, 122.46it/s]
add example index and unique id: 100%|██████████| 2210/2210 [00:00<00:00, 448035.76it/s]


In [None]:
results = evaluate(model, tokenizer_1, test_dataset, test_examples, test_features)
for key, value in results.items():
    print(key, value)

Evaluating: 100%|██████████| 201/201 [01:41<00:00,  1.98it/s]


exact 71.82971014492753
f1 88.17030533760955
total 2208
HasAns_exact 71.82971014492753
HasAns_f1 88.17030533760955
HasAns_total 2208
best_exact 71.82971014492753
best_exact_thresh 0.0
best_f1 88.17030533760955
best_f1_thresh 0.0
