# <font color="orange"> Custom Training Question Answering System with BERT </font>

Remember to change the runtype to GPU

In [None]:
!pip -q install simpletransformers

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
train_path = '/content/drive/MyDrive/Colab Notebooks/NLP/Question Answering System with BERT/train.json'
test_path = '/content/drive/MyDrive/Colab Notebooks/NLP/Question Answering System with BERT/test.json'

In [None]:
#read the files
import json
with open(train_path,'r') as read_file:
  train = json.load(read_file)

The training data has to be in this <font color="yellow">
[format]('https://simpletransformers.ai/docs/qa-data-formats/#train-data-format') </font>

In [None]:
train

[{'context': 'Mistborn is a series of epic fantasy novels written by American author Brandon Sanderson.',
  'qas': [{'id': '00001',
    'is_impossible': False,
    'question': 'Who is the author of the Mistborn series?',
    'answers': [{'text': 'Brandon Sanderson', 'answer_start': 71}]}]},
 {'context': 'The first series, published between 2006 and 2008, consists of The Final Empire,The Well of Ascension, and The Hero of Ages.',
  'qas': [{'id': '00002',
    'is_impossible': False,
    'question': 'When was the series published?',
    'answers': [{'text': 'between 2006 and 2008', 'answer_start': 28}]},
   {'id': '00003',
    'is_impossible': False,
    'question': 'What are the three books in the series?',
    'answers': [{'text': 'The Final Empire, The Well of Ascension, and The Hero of Ages',
      'answer_start': 63}]},
   {'id': '00004',
    'is_impossible': True,
    'question': 'Who is the main character in the series?',
    'answers': []}]}]

In [None]:
with open(test_path,'r') as read_file:
  test = json.load(read_file)

test

[{'context': 'The series primarily takes place in a region called the Final Empire on a world called Scadrial, where the sun and sky are red, vegetation is brown, and the ground is constantly being covered under black volcanic ashfalls.',
  'qas': [{'id': '00001',
    'is_impossible': False,
    'question': 'Where does the series take place?',
    'answers': [{'text': 'region called the Final Empire', 'answer_start': 38},
     {'text': 'world called Scadrial', 'answer_start': 74}]}]},
 {'context': '"Mistings" have only one of the many Allomantic powers, while "Mistborns" have all the powers.',
  'qas': [{'id': '00002',
    'is_impossible': False,
    'question': 'How many powers does a Misting possess?',
    'answers': [{'text': 'one', 'answer_start': 21}]},
   {'id': '00003',
    'is_impossible': True,
    'question': 'What are Allomantic powers?',
    'answers': []}]}]

QuestionAnsweringModel : for training the model<br>
QuestionAnsweringArgs : giving custom arguments to the model 

In [None]:
import logging

from simpletransformers.question_answering import QuestionAnsweringModel, QuestionAnsweringArgs

Model takes in [model_type and model_name](https://simpletransformers.ai/docs/qa-model/#questionansweringmodel) <br>
The types of models supported [are](https://simpletransformers.ai/docs/qa-specifics/#supported-model-types) <br>
And the model names [are](https://huggingface.co/transformers/v3.3.1/pretrained_models.html) 

In [None]:
model_type = "bert"  #we will be using bert for our training
model_name = "bert-base-cased"
if model_type == "bert":
  model_name="bert-base-cased"

elif model_type == "roberta":
    model_name = "roberta-base"

elif model_type == "distilbert":
    model_name = "distilbert-base-cased"

elif model_type == "distilroberta":
    model_type = "roberta"
    model_name = "distilroberta-base"

elif model_type == "electra-base":
    model_type = "electra"
    model_name = "google/electra-base-discriminator"

elif model_type == "electra-small":
    model_type = "electra"
    model_name = "google/electra-small-discriminator"

elif model_type == "xlnet":
    model_name = "xlnet-base-cased"

We can Configure model in 2 ways

In [None]:
#1 using QuestionAnsweringArgs class
model_args = QuestionAnsweringArgs()
model_args.train_batch_size = 16
model_args.evaluate_during_training = True
model_args.n_best_size=3
model_args.num_train_epochs=5

In [None]:
#to see the training visualization
!pip -q install wandb

In [None]:
#2 Advanced Method : use some key value pairs with respect to all the parameters
train_args = {
    "reprocess_input_data": True,
    "overwrite_output_dir": True,
    "use_cached_eval_features": True,
    "output_dir": f"outputs/{model_type}",
    "best_model_dir": f"outputs/{model_type}/best_model",
    "evaluate_during_training": True,
    "max_seq_length": 128,
    "num_train_epochs": 5,
    "evaluate_during_training_steps": 1000,
    "wandb_project": "Question Answer Application",
    "wandb_kwargs": {"name": model_name},
    "save_model_every_epoch": False,
    "save_eval_checkpoints": False,
    "n_best_size":3,
    # "use_early_stopping": True,
    # "early_stopping_metric": "mcc",
    # "n_gpu": 2,
    # "manual_seed": 4,
    # "use_multiprocessing": False,
    "train_batch_size": 128,
    "eval_batch_size": 64,
    # "config": {
    #     "output_hidden_states": True
    # }
}

Initialize the model <br>
##### model will download the pretrained model and consider all the arguments we have specified

In [None]:
model = QuestionAnsweringModel(
    model_type,model_name,args=train_args    
)

Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForQuestionAnswering: ['cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForQuestionAnswering from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForQuestionAnswering from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForQuestionAnswering were not initialized from the model checkpoint at bert-base-cased and a

In [None]:
#if we want to remove the output folder while re-training
!rm -rf outputs

Train the model

In [None]:
import wandb
import os
os.environ['WANDB_NOTEBOOK_NAME'] = 'question-answering-system-with-bert'

we can see the train_loss and eval_loss decreasing with each epoch

In [None]:
#it'll run for 5 epochs
model.train_model(train,eval_data=test)

convert squad examples to features:   0%|          | 0/4 [00:00<?, ?it/s]Could not find answer: 'The Final Empire,The Well of Ascension, and The Hero of Ages.' vs. 'The Final Empire, The Well of Ascension, and The Hero of Ages'
convert squad examples to features: 100%|██████████| 4/4 [00:00<00:00, 357.24it/s]
add example index and unique id: 100%|██████████| 4/4 [00:00<00:00, 1333.11it/s]


Epoch:   0%|          | 0/5 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
correct,▁▁▁▁▁
eval_loss,█▄▂▁▁
global_step,▁▃▅▆█
incorrect,▁▁▁▁▁
similar,▁▁▁▁▁
train_loss,██▅▃▁

0,1
correct,0.0
eval_loss,0.24707
global_step,5.0
incorrect,1.0
similar,2.0
train_loss,3.73828


Running Epoch 0 of 5:   0%|          | 0/1 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/1 [00:00<?, ?it/s]

Running Epoch 1 of 5:   0%|          | 0/1 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/1 [00:00<?, ?it/s]

Running Epoch 2 of 5:   0%|          | 0/1 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/1 [00:00<?, ?it/s]

Running Epoch 3 of 5:   0%|          | 0/1 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/1 [00:00<?, ?it/s]

Running Epoch 4 of 5:   0%|          | 0/1 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/1 [00:00<?, ?it/s]

(5,
 {'global_step': [1, 2, 3, 4, 5],
  'correct': [0, 0, 0, 0, 0],
  'similar': [2, 2, 2, 2, 2],
  'incorrect': [1, 1, 1, 1, 1],
  'train_loss': [3.5094399452209473,
   3.6520183086395264,
   2.8357748985290527,
   2.9710285663604736,
   2.5694987773895264],
  'eval_loss': [0.2470703125,
   0.2244873046875,
   0.2244873046875,
   0.2086181640625,
   0.198974609375]})

Evaluate the model

In [None]:
result, texts = model.eval_model(test)

Running Evaluation:   0%|          | 0/1 [00:00<?, ?it/s]

Correct is 0,
Incorrect is 1. <br>
It means we need to train it again with increased epochs.

In [None]:
result

{'correct': 0, 'similar': 2, 'incorrect': 1, 'eval_loss': 0.198974609375}

### same as above with increased epochs

In [None]:
train_args = {
    "reprocess_input_data": True,
    "overwrite_output_dir": True,
    "use_cached_eval_features": True,
    "output_dir": f"outputs/{model_type}",
    "best_model_dir": f"outputs/{model_type}/best_model",
    "evaluate_during_training": True,
    "max_seq_length": 128,
    "num_train_epochs": 20,
    "evaluate_during_training_steps": 1000,
    "wandb_project": "Question Answer Application",
    "wandb_kwargs": {"name": model_name},
    "save_model_every_epoch": False,
    "save_eval_checkpoints": False,
    "n_best_size":2,
     "use_early_stopping": True,
    # "early_stopping_metric": "mcc",
    # "n_gpu": 2,
    # "manual_seed": 4,
    # "use_multiprocessing": False,
    "train_batch_size": 128,
    "eval_batch_size": 64,
    # "config": {
    #     "output_hidden_states": True
    # }
}

In [None]:
model = QuestionAnsweringModel(
    model_type,model_name,args=train_args    
)

Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForQuestionAnswering: ['cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForQuestionAnswering from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForQuestionAnswering from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForQuestionAnswering were not initialized from the model checkpoint at bert-base-cased and a

In [None]:
model.train_model(train,eval_data=test)

convert squad examples to features:   0%|          | 0/4 [00:00<?, ?it/s]Could not find answer: 'The Final Empire,The Well of Ascension, and The Hero of Ages.' vs. 'The Final Empire, The Well of Ascension, and The Hero of Ages'
convert squad examples to features: 100%|██████████| 4/4 [00:00<00:00, 280.00it/s]
add example index and unique id: 100%|██████████| 4/4 [00:00<00:00, 13025.79it/s]


Epoch:   0%|          | 0/20 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
correct,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
eval_loss,██▇▇▆▆▅▅▄▄▃▃▂▂▂▂▁▁▁▁
global_step,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
incorrect,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
similar,▁███████████████████
train_loss,███▇▆▅▄▄▃▂▂▂▂▁▁▁▁▁▁▁

0,1
correct,1.0
eval_loss,-0.74707
global_step,20.0
incorrect,0.0
similar,2.0
train_loss,0.54346


Running Epoch 0 of 20:   0%|          | 0/1 [00:00<?, ?it/s]



Running Evaluation:   0%|          | 0/1 [00:00<?, ?it/s]

Running Epoch 1 of 20:   0%|          | 0/1 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/1 [00:00<?, ?it/s]

Running Epoch 2 of 20:   0%|          | 0/1 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/1 [00:00<?, ?it/s]

Running Epoch 3 of 20:   0%|          | 0/1 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/1 [00:00<?, ?it/s]

Running Epoch 4 of 20:   0%|          | 0/1 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/1 [00:00<?, ?it/s]

Running Epoch 5 of 20:   0%|          | 0/1 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/1 [00:00<?, ?it/s]

Running Epoch 6 of 20:   0%|          | 0/1 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/1 [00:00<?, ?it/s]

Running Epoch 7 of 20:   0%|          | 0/1 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/1 [00:00<?, ?it/s]

Running Epoch 8 of 20:   0%|          | 0/1 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/1 [00:00<?, ?it/s]

Running Epoch 9 of 20:   0%|          | 0/1 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/1 [00:00<?, ?it/s]

Running Epoch 10 of 20:   0%|          | 0/1 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/1 [00:00<?, ?it/s]

Running Epoch 11 of 20:   0%|          | 0/1 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/1 [00:00<?, ?it/s]

Running Epoch 12 of 20:   0%|          | 0/1 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/1 [00:00<?, ?it/s]

Running Epoch 13 of 20:   0%|          | 0/1 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/1 [00:00<?, ?it/s]

Running Epoch 14 of 20:   0%|          | 0/1 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/1 [00:00<?, ?it/s]

Running Epoch 15 of 20:   0%|          | 0/1 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/1 [00:00<?, ?it/s]

Running Epoch 16 of 20:   0%|          | 0/1 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/1 [00:00<?, ?it/s]

Running Epoch 17 of 20:   0%|          | 0/1 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/1 [00:00<?, ?it/s]

Running Epoch 18 of 20:   0%|          | 0/1 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/1 [00:00<?, ?it/s]

Running Epoch 19 of 20:   0%|          | 0/1 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/1 [00:00<?, ?it/s]

(20,
 {'global_step': [1,
   2,
   3,
   4,
   5,
   6,
   7,
   8,
   9,
   10,
   11,
   12,
   13,
   14,
   15,
   16,
   17,
   18,
   19,
   20],
  'correct': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
  'similar': [1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2],
  'incorrect': [2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
  'train_loss': [4.95703125,
   4.830078125,
   4.595703125,
   4.052408695220947,
   3.58984375,
   3.0315756797790527,
   2.4521484375,
   2.1144206523895264,
   1.6671550273895264,
   1.4471843242645264,
   1.2705078125,
   1.0342611074447632,
   1.0522868633270264,
   0.9698893427848816,
   0.93902587890625,
   0.7823892831802368,
   0.6603597402572632,
   0.668212890625,
   0.6072998046875,
   0.5709329843521118],
  'eval_loss': [-0.051361083984375,
   -0.0833740234375,
   -0.1551513671875,
   -0.24658203125,
   -0.364013671875,
   -0.48388671875,
   -0.61181640625,
   -0.75146484375,
   -0.89208984375,
   -1

In [None]:
result, texts = model.eval_model(test)

Running Evaluation:   0%|          | 0/1 [00:00<?, ?it/s]

In [None]:
result

{'correct': 0, 'similar': 2, 'incorrect': 1, 'eval_loss': -1.556640625}

In [None]:
texts

{'correct_text': {},
 'similar_text': {'00001': {'truth': 'region called the Final Empire',
   'predicted': '',
   'question': 'Where does the series take place?'},
  '00003': {'truth': '',
   'predicted': 'empty',
   'question': 'What are Allomantic powers?'}},
 'incorrect_text': {'00002': {'truth': 'one',
   'predicted': 'empty',
   'question': 'How many powers does a Misting possess?'}}}

Make Predictions with the Model

In [None]:
to_predict = [
    {
        "context": "Vin is a Mistborn of great power and skill.", 
        "qas": [
            {
                "question": "What is Vin's speciality?", 
                "id": "0"
            }
          ],
    }
]

In [None]:
answers, probabilities = model.predict(to_predict)

convert squad examples to features: 100%|██████████| 1/1 [00:00<00:00, 249.44it/s]
add example index and unique id: 100%|██████████| 1/1 [00:00<00:00, 2805.55it/s]


Running Prediction:   0%|          | 0/1 [00:00<?, ?it/s]

In [None]:
answers

[{'id': '0', 'answer': ['and skill', 'great power and skill']}]

In [None]:
probabilities

[{'id': '0', 'probability': [0.4505729848554377, 0.438901474093273]}]