In [1]:
!pip install transformers



In [3]:
import pickle
import torch

class MLQADataset(torch.utils.data.Dataset):
    def __init__(self, encodings):
        self.encodings = encodings

    def __getitem__(self, idx):
        return {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}

    def __len__(self):
        return len(self.encodings['input_ids'])

train_large_dataset = pickle.load(open("train_large_dataset", "rb")) # Augmented large dataset
train_small_dataset = pickle.load(open("train_small_dataset", "rb")) # Augmented small dataset
train_zh_dataset = pickle.load(open("train_zh_dataset", "rb")) # Large ONLY (without original) translated dataset

train_dataset = pickle.load(open("train_dataset", "rb"))
val_dataset = pickle.load(open("val_dataset", "rb"))
zh_dataset = pickle.load(open("zh_dataset", "rb"))

In [4]:
def compute_f1(predicted, true):
    c = len(set(predicted) & set(true))
    l1 = len(predicted)
    l2 = len(true)
    if(l1 + l2 == 0):
        return 1
    f1 = 2*c/(l1+l2)
    return f1
    
def compute_em(predicted, true):
    return int(predicted == true)

In [14]:
from torch.utils.data import DataLoader
from transformers import AdamW, BertForQuestionAnswering
# input_ids = None
# attention_mask = None
# start_positions = None
# end_positions = None
# model = None
# torch.cuda.empty_cache()

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
print(device)

model = BertForQuestionAnswering.from_pretrained('bert-base-multilingual-cased')
model.to(device)
model.train()

train_loader = DataLoader(train_small_dataset, batch_size=8, shuffle=True) # MODIFY dataset here !!!!!
val_loader = DataLoader(val_dataset, batch_size=8, shuffle=True)
zh_loader = DataLoader(zh_dataset, batch_size=8, shuffle=True)
print(len(train_loader))
print(len(val_loader))
dummy_index = 0
collect = []
optim = AdamW(model.parameters(), lr=5e-5)


val_batch = 300
max_epoch = 3
train_batch = len(train_loader)
for epoch in range(max_epoch):
    for batch_idx, batch in enumerate(train_loader):
        model.train()
        optim.zero_grad()
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        start_positions = batch['start_positions'].to(device)
        end_positions = batch['end_positions'].to(device)
        outputs = model(input_ids, attention_mask=attention_mask, start_positions=start_positions, end_positions=end_positions)
        loss = outputs[0]
        loss.backward()
        optim.step()
        
        if batch_idx % val_batch == 0 or batch_idx == train_batch - 1:
            print("Epoch: {}/{}, batch: {}/{}, {:%}".format(epoch, max_epoch, batch_idx, train_batch, batch_idx/train_batch))
            dummy_index = epoch * train_batch + batch_idx
            model.eval()
            eval_cnt = 0
            F1 = 0.0
            EM = 0.0
            for batch_idx, batch in enumerate(val_loader):
                optim.zero_grad()
                input_ids = batch['input_ids'].to(device)
                attention_mask = batch['attention_mask'].to(device)
                start_positions = batch['start_positions'].to(device)
                end_positions = batch['end_positions'].to(device)
                outputs = model(input_ids, attention_mask=attention_mask, start_positions=start_positions, end_positions=end_positions)
                samples_in_batch = len(input_ids)
                for i in range(samples_in_batch):
                    predict_start = int(outputs[1][i].argmax().cpu())
                    predict_end = int(outputs[2][i].argmax().cpu())
                    true_start = int(start_positions[i].cpu())
                    true_end = int(end_positions[i].cpu())
                    F1 += compute_f1(range(predict_start, predict_end), range(true_start, true_end))
                    EM += compute_em(range(predict_start, predict_end), range(true_start, true_end))
                eval_cnt += samples_in_batch
            F1 /= eval_cnt
            EM /= eval_cnt
            print("English eval score: F1:{}, EM:{}".format(F1, EM))
            eng_f1, eng_em = F1, EM

            eval_cnt = 0
            F1 = 0.0
            EM = 0.0
            for batch_idx, batch in enumerate(zh_loader):
                optim.zero_grad()
                input_ids = batch['input_ids'].to(device)
                attention_mask = batch['attention_mask'].to(device)
                start_positions = batch['start_positions'].to(device)
                end_positions = batch['end_positions'].to(device)
                outputs = model(input_ids, attention_mask=attention_mask, start_positions=start_positions, end_positions=end_positions)
                samples_in_batch = len(input_ids)
                for i in range(samples_in_batch):
                    predict_start = int(outputs[1][i].argmax().cpu())
                    predict_end = int(outputs[2][i].argmax().cpu())
                    true_start = int(start_positions[i].cpu())
                    true_end = int(end_positions[i].cpu())
                    F1 += compute_f1(range(predict_start, predict_end), range(true_start, true_end))
                    EM += compute_em(range(predict_start, predict_end), range(true_start, true_end))
                eval_cnt += samples_in_batch
            F1 /= eval_cnt
            EM /= eval_cnt
            print("Chinese eval score: F1:{}, EM:{}".format(F1, EM))
            print('collect data: ', eng_f1, eng_em, F1, EM, dummy_index, epoch)
            collect.append((eng_f1, eng_em, F1, EM, dummy_index, epoch))
        


cuda


Some weights of the model checkpoint at bert-base-multilingual-cased were not used when initializing BertForQuestionAnswering: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForQuestionAnswering from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForQuestionAnswering from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForQuestionAnswering were not initialized from the model checkpoint at bert-bas

1749
144
Epoch: 0/3, batch: 0/1749, 0.000000%
English eval score: F1:0.14156745529837858, EM:0.1210801393728223
Chinese eval score: F1:0.09376655100388047, EM:0.0496031746031746
collect data:  0.14156745529837858 0.1210801393728223 0.09376655100388047 0.0496031746031746 0 0
Epoch: 0/3, batch: 300/1749, 17.152659%
English eval score: F1:0.47069715377397175, EM:0.3797909407665505
Chinese eval score: F1:0.31726979351435697, EM:0.2123015873015873
collect data:  0.47069715377397175 0.3797909407665505 0.31726979351435697 0.2123015873015873 300 0
Epoch: 0/3, batch: 600/1749, 34.305317%
English eval score: F1:0.5520533827108367, EM:0.4416376306620209
Chinese eval score: F1:0.41534611712857705, EM:0.29563492063492064
collect data:  0.5520533827108367 0.4416376306620209 0.41534611712857705 0.29563492063492064 600 0
Epoch: 0/3, batch: 900/1749, 51.457976%
English eval score: F1:0.5814410989397145, EM:0.4712543554006969
Chinese eval score: F1:0.4406542394912614, EM:0.32936507936507936
collect data

In [15]:
with open('MUSE_lr_' + str("train_small") + '.csv', 'w') as f:
    f.write("En_F1,En_EM,Zh_F1,Zh_EM,dummy_index,epoch\n")
    for line in collect:
        sline = list(map(lambda x: str(x), line))
        sline = ','.join(sline)
        f.write(sline+"\n")

In [None]:
# 64 52 42 29

BERT_MLQA.ipynb					 en-zh.txt
BERT_MLQA_MUSE.ipynb				 fast_align
BERT_MLQA_TAR-Copy1.ipynb			 muse_dict
BERT_MLQA_TAR.ipynb				 train_dataset
BERT_MLQA_TEST.ipynb				 train_large_dataset
MLQA_V1						 train_zh_dataset
MLQA_V1.zip					 val_dataset
MUSE_dataset_preparation.ipynb			 zh-en.txt
Question_Answering_with_a_Fine_Tuned_BERT.ipynb  zh_dataset
build
