# Bi-Directional Attention Flow Model

#### Set Checkpoint and Data Folder

In [1]:
!pip install -U PyYAML
!pip install -U h5py
!pip install pytorch-lightning

Requirement already up-to-date: PyYAML in /home/julian/Development/PythonEnv/pytorch/lib/python3.8/site-packages (5.3.1)
Requirement already up-to-date: h5py in /home/julian/Development/PythonEnv/pytorch/lib/python3.8/site-packages (2.10.0)


In [2]:
import sys, os
import torch
pwd = os.getcwd()

class Arguments():
    data = os.path.join(pwd, 'DATA', 'train_v2.1.json')
    exp_folder = os.path.join(pwd, 'Experimente/LightningTest')
    #word_rep = os.path.join(pwd, 'DATA', 'glove.840B.300d.txt')
    word_rep = None
    cuda = torch.cuda.is_available()
    use_covariance = False
    force_restart = False

args = Arguments()

if not os.path.exists(args.exp_folder):
    os.makedirs(args.exp_folder)


#### Global Configurations (instead of config.yaml)

In [3]:
import yaml

config_yaml = """
    bidaf:
        dropout: 0.2
        num_highways: 2
        num_lstm: 2
        hidden_size: 100
        embedding_dim: 300
        embedding_reduce: 100
        characters:
            dim: 16
            num_filters: 100
            filter_sizes:
                - 5
    training:
        lr: 0.001
        betas:
            - 0.9
            - 0.999
        eps: 0.00000001
        weigth_decay: 0
        epochs: 1
        batch_size: 60
        limit: 400
"""
config = yaml.load(config_yaml, Loader=yaml.FullLoader)


#### Importing the MSMARCO Bidaf Model

In [4]:
sys.path.append(os.path.join(pwd,'MsmarcoQuestionAnswering','Baseline'))
sys.path.append(os.path.join(pwd,'MsmarcoQuestionAnswering','Baseline','scripts'))

import MsmarcoQuestionAnswering.Baseline.mrcqa as mrcqa
import MsmarcoQuestionAnswering.Baseline.scripts.dataset as dataset
import MsmarcoQuestionAnswering.Baseline.scripts.checkpointing as checkpointing
import MsmarcoQuestionAnswering.Baseline.scripts.train as train_manager
import MsmarcoQuestionAnswering.Baseline.scripts.predict as predict_manager
from pytorch_lightning import LightningModule
from pytorch_lightning import Trainer
import h5py

#### Pytorch Lightning Wrapper

In [5]:
checkpoint_w, training_state_w, epoch_w = train_manager.try_to_resume(
            args.force_restart, args.exp_folder)

if checkpoint_w:
    print('Resuming training...')
    model_w, id_to_token_w, id_to_char_w, optimizer_w, dataloader_w = train_manager.reload_state(checkpoint_w, training_state_w, config, args,None)
else:
    print('Preparing to train...')
    model_w, id_to_token_w, id_to_char_w, optimizer_w, dataloader_w = train_manager.init_state(config, args,None)
    checkpoint_w = h5py.File(os.path.join(args.exp_folder, 'checkpoint'))
    checkpointing.save_vocab(checkpoint_w, 'vocab', id_to_token_w)
    checkpointing.save_vocab(checkpoint_w, 'c_vocab', id_to_char_w)

if torch.cuda.is_available() and args.cuda:
    dataloader_w.tensor_type = torch.cuda.LongTensor

Preparing to train...
Load Data [1/6]
Start Organizing Data...
Organizing progress: 0.0 x 10⁴
Organizing progress: 1.0 x 10⁴
Organizing progress: 2.0 x 10⁴
Organizing progress: 3.0 x 10⁴
Organizing progress: 4.0 x 10⁴
Organizing progress: 5.0 x 10⁴
Organizing progress: 6.0 x 10⁴
Organizing progress: 7.0 x 10⁴
Organizing progress: 8.0 x 10⁴
Organizing progress: 9.0 x 10⁴
Organizing progress: 10.0 x 10⁴
Organizing progress: 11.0 x 10⁴
Organizing progress: 12.0 x 10⁴
Organizing progress: 13.0 x 10⁴
Organizing progress: 14.0 x 10⁴
Organizing progress: 15.0 x 10⁴
Organizing progress: 16.0 x 10⁴
Organizing progress: 17.0 x 10⁴
Organizing progress: 18.0 x 10⁴
Organizing progress: 19.0 x 10⁴
Organizing progress: 20.0 x 10⁴
Organizing progress: 21.0 x 10⁴
Organizing progress: 22.0 x 10⁴
Organizing progress: 23.0 x 10⁴
Organizing progress: 24.0 x 10⁴
Organizing progress: 25.0 x 10⁴
Organizing progress: 26.0 x 10⁴
Organizing progress: 27.0 x 10⁴
Organizing progress: 28.0 x 10⁴
Organizing progress



No pre-trained embeddings given [5/6]
Done init_state [6/6]


  checkpoint_w = h5py.File(os.path.join(args.exp_folder, 'checkpoint'))


In [6]:
class BidafLightningWrapper(LightningModule):
    def __init__(self):
        super().__init__()
        
    def setup(self,stage):
        pass;
            
    def prepare_data(self):
        pass;

    def configure_optimizers(self):
        return optimizer_w;

    def forward(self, passage, p_lengths, question, q_lengths):
        return model_w(passage, p_lengths, question, q_lengths)

    def train_dataloader(self):
        return dataloader_w;

    def training_step(self, batch, batch_idx):
        qids, passages, queries, answers, _ = batch
        start_log_probs, end_log_probs = self(passages[:2], passages[2], queries[:2], queries[2])
        return {'loss': model_w.get_loss(start_log_probs, end_log_probs, answers[:, 0], answers[:, 1])}

    def training_epoch_end(self, results):
        checkpointing.checkpoint(model_w, epoch_w, optimizer_w, checkpoint_w, args.exp_folder)
        loss = torch.stack([step['loss'] for step in results]).mean()
        model_w.cuda()
        return {'log': {'train_loss': loss}}

    def test_step(self, batch, batch_idx):
        
        return {'test_loss': loss}

    def test_epoch_end(self, results):
        loss = torch.stack([step['test_loss'] for step in results]).mean()
        return {'log': {'test_loss': loss}}
    
    def test_dataloader(self):
        pass


In [7]:
modelLightning = BidafLightningWrapper()
trainer = Trainer(max_epochs=10, gpus=1)
trainer.fit(modelLightning)

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
CUDA_VISIBLE_DEVICES: [0]

  | Name | Type | Params
------------------------------


Epoch 1:  15%|█▍        | 854/5838 [04:51<28:18,  2.93it/s, loss=3.323, v_num=27]




1