In [1]:
import torch
import pytorch_lightning as pl

from processors import MnliProcessor
from firebert_fct import FireBERT_FCT
from bert_base_model import LightningBertForSequenceClassification

num_gpus = -1 if torch.cuda.is_available() else None

Instructions for updating:
non-resource variables are not supported in the long term


In [2]:
# place where we will save our model

save_root_path ='resources/models/co-tuned_MNLI_on_lightning/'

use_full_example = True

In [3]:
# prepare hyperparameters

max_steps = -1 # if -1 then calculate number of training steps based on the length of the train set
len_train_set = 392702

gradient_accumulation_steps = 1
learning_rate = 2e-5
weight_decay = 0.00
adam_epsilon = 1e-8
warmup_proportion = 0 

num_train_epochs = 1
batch_size = 7

if max_steps > 0:
    num_train_epochs = max_steps // (len_train_set // gradient_accumulation_steps) + 1
    num_training_steps = max_steps
else:
    num_training_steps = len_train_set // gradient_accumulation_steps * num_train_epochs
    
warmup_steps = num_training_steps // num_train_epochs * warmup_proportion


In [4]:
num_training_steps

392702

In [5]:
warmup_steps

0

In [6]:
# parameters for generating adversarial candidates during co-tuning

use_USE = True
USE_method = 'filter'
USE_multiplier = 12 #3
stop_words = True
perturb_words = 9 #2
candidates_per_word = 10
total_alternatives = 4 #5
match_pos = True
leave_alone = 0
random_out_of = 0
judge_bert = False

In [7]:
hparams = { 'learning_rate': learning_rate,
            'adam_epsilon': adam_epsilon,
            'weight_decay': weight_decay,
            'warmup_steps': warmup_steps,
            'num_training_steps': num_training_steps,
            'batch_size': batch_size,
            'use_USE': use_USE,
            'USE_method': USE_method,
            'USE_multiplier': USE_multiplier,
            'stop_words': stop_words,
            'perturb_words': perturb_words,
            'candidates_per_word': candidates_per_word,
            'total_alternatives': total_alternatives,
            'match_pos': match_pos,
            'use_full_example': use_full_example,
            'leave_alone': leave_alone,
            'random_out_of': random_out_of,
            'judge_bert': judge_bert
           }

proc_hparams = {}
# delete this next line to run full 100%
#proc_hparams.update({'sample_percent': 3,
#                     'randomize': False})

# instantiate the model used for SWITCH
switch_model = LightningBertForSequenceClassification(load_from = 'resources/models/MNLI/pytorch_model.bin', 
                                                      processor = MnliProcessor(), 
                                                      hparams = {'batch_size': 6 })

switch_model.cuda()
#switch_model = None

model = FireBERT_FCT(switch_model=switch_model, processor=MnliProcessor(hparams=proc_hparams), hparams=hparams)

INFO:absl:Using scratch/tf_cache to cache modules.


INFO:tensorflow:Saver not created because there are no variables in the graph to restore


INFO:tensorflow:Saver not created because there are no variables in the graph to restore


INFO:tensorflow:Saver not created because there are no variables in the graph to restore


INFO:tensorflow:Saver not created because there are no variables in the graph to restore


In [8]:
processor = model.get_processor()

In [9]:
train_dataset, train_examples = \
        processor.load_and_cache_examples("data/MNLI", example_set='train')

In [10]:
val_dataset, _ = processor.load_and_cache_examples("data/MNLI", example_set='dev')

In [11]:
test_dataset, _ = processor.load_and_cache_examples("data/MNLI", example_set='test')

In [12]:
model.set_train_dataset(train_dataset, train_examples)
model.set_val_dataset(val_dataset)
model.set_test_dataset(test_dataset)

In [13]:
from pytorch_lightning.logging import TensorBoardLogger

tensor_logger = TensorBoardLogger(save_dir= save_root_path + 'logs', version=10, name='mnli_finetuning')
checkpoint_save_path = save_root_path + 'checkpoints/'

In [14]:
from pytorch_lightning.callbacks import ModelCheckpoint

checkpoint_callback = ModelCheckpoint(
    filepath=checkpoint_save_path,
    verbose=True,
    monitor='val_loss',
    mode='min'
)

In [15]:
amp_opt_level='O1' # https://nvidia.github.io/apex/amp.html#opt-levels
max_grad_norm = 1.0

In [16]:
trainer = pl.Trainer(default_save_path=checkpoint_save_path, logger=tensor_logger, gpus=num_gpus,
                     max_epochs = num_train_epochs, amp_level=amp_opt_level, gradient_clip_val=max_grad_norm,
                     max_steps = num_training_steps, checkpoint_callback=checkpoint_callback)

In [17]:
trainer.fit(model)

HBox(children=(FloatProgress(value=0.0, description='Validation sanity check', layout=Layout(flex='2'), max=5.…



HBox(children=(FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), max=1.0), HTML(value='')), …

HBox(children=(FloatProgress(value=0.0, description='Validating', layout=Layout(flex='2'), max=1403.0, style=P…




1

In [18]:
trainer.tqdm_metrics

{'val_loss': 0.46123167872428894, 'avg_val_acc': 0.8214032053947449}

In [19]:
trainer.save_checkpoint(save_root_path + 'training_checkpoint')

torch.save(model.state_dict(), save_root_path + 'pytorch_model.bin')
with open(save_root_path + 'bert_config.json', 'w') as f:
    f.write(model.bert.config.to_json_string())
model.tokenizer.save_pretrained(save_root_path)

('resources/models/co-tuned_MNLI_on_lightning_final_rank/vocab.txt',
 'resources/models/co-tuned_MNLI_on_lightning_final_rank/special_tokens_map.json',
 'resources/models/co-tuned_MNLI_on_lightning_final_rank/added_tokens.json')

In [20]:
trainer.test(model)

HBox(children=(FloatProgress(value=0.0, description='Testing', layout=Layout(flex='2'), max=1405.0, style=Prog…

----------------------------------------------------------------------------------------------------
TEST RESULTS
{'avg_test_acc': tensor(0.8242)}
----------------------------------------------------------------------------------------------------



In [21]:
trainer.tqdm_metrics

{'val_loss': 0.46123167872428894,
 'avg_val_acc': 0.8214032053947449,
 'avg_test_acc': 0.824199378490448}

In [22]:
from torch.utils.data import TensorDataset

def load_examples(features_file):

    features = torch.load(features_file)

    # Convert to Tensors and build dataset
    all_input_ids = torch.tensor([f.input_ids for f in features], dtype=torch.long)
    all_attention_mask = torch.tensor([f.attention_mask for f in features], dtype=torch.long)
    all_token_type_ids = torch.tensor([f.token_type_ids for f in features], dtype=torch.long)
    all_labels = torch.tensor([f.label for f in features], dtype=torch.long)

    all_idxs = torch.tensor([i for i in range(len(all_input_ids))], dtype=torch.long)
        
    dataset = TensorDataset(all_input_ids, all_attention_mask, all_token_type_ids, all_labels, all_idxs)
    
    return dataset

In [23]:
# compare how well the model does against adversarial samples
test_dataset = load_examples('data/MNLI/generated/mnli_adversarial_samples_for_dev')
model.set_test_dataset(test_dataset)

In [24]:
trainer = pl.Trainer(gpus=num_gpus)
trainer.test(model)
trainer.tqdm_metrics

HBox(children=(FloatProgress(value=0.0, description='Testing', layout=Layout(flex='2'), max=1071.0, style=Prog…

----------------------------------------------------------------------------------------------------
TEST RESULTS
{'avg_test_acc': tensor(0.7618)}
----------------------------------------------------------------------------------------------------



{'avg_test_acc': 0.7617712020874023}

In [25]:
# compare how well the model does against test adversarial samples
test_dataset = load_examples('data/MNLI/generated/mnli_adversarial_samples_for_test')
model.set_test_dataset(test_dataset)
trainer = pl.Trainer(gpus=num_gpus)
trainer.test(model)
trainer.tqdm_metrics

HBox(children=(FloatProgress(value=0.0, description='Testing', layout=Layout(flex='2'), max=1083.0, style=Prog…

----------------------------------------------------------------------------------------------------
TEST RESULTS
{'avg_test_acc': tensor(0.7736)}
----------------------------------------------------------------------------------------------------



{'avg_test_acc': 0.7736445069313049}

In [26]:
# load up the dev samples again to get eval timings (that we didn't get from training)
test_dataset = load_examples('data/MNLI/cached_dev_bert-base-uncased_128_mnli')
model.set_test_dataset(test_dataset)
trainer = pl.Trainer(gpus=num_gpus)
trainer.test(model)
trainer.tqdm_metrics

HBox(children=(FloatProgress(value=0.0, description='Testing', layout=Layout(flex='2'), max=1403.0, style=Prog…

----------------------------------------------------------------------------------------------------
TEST RESULTS
{'avg_test_acc': tensor(0.8214)}
----------------------------------------------------------------------------------------------------



{'avg_test_acc': 0.8214032053947449}