# Custom Training Question Answer Model

In [16]:
!pip install simpletransformers



In [38]:
import json
with open(r"train.json",'r') as read_file:
  train = json.load(read_file)

In [39]:
train

[{'context': 'Mistborn is a series of epic fantasy novels written by American author Brandon Sanderson.',
  'qas': [{'id': '00001',
    'is_impossible': False,
    'question': 'Who is the author of the Mistborn series?',
    'answers': [{'text': 'Brandon Sanderson', 'answer_start': 71}]}]},
 {'context': 'The first series, published between 2006 and 2008, consists of The Final Empire,The Well of Ascension, and The Hero of Ages.',
  'qas': [{'id': '00002',
    'is_impossible': False,
    'question': 'When was the series published?',
    'answers': [{'text': 'between 2006 and 2008', 'answer_start': 28}]},
   {'id': '00003',
    'is_impossible': False,
    'question': 'What are the three books in the series?',
    'answers': [{'text': 'The Final Empire, The Well of Ascension, and The Hero of Ages',
      'answer_start': 63}]},
   {'id': '00004',
    'is_impossible': True,
    'question': 'Who is the main character in the series?',
    'answers': []}]}]

In [40]:
with open(r"test.json",'r') as read_file:
  test = json.load(read_file)


In [41]:
test

[{'context': 'The series primarily takes place in a region called the Final Empire on a world called Scadrial, where the sun and sky are red, vegetation is brown, and the ground is constantly being covered under black volcanic ashfalls.',
  'qas': [{'id': '00001',
    'is_impossible': False,
    'question': 'Where does the series take place?',
    'answers': [{'text': 'region called the Final Empire', 'answer_start': 38},
     {'text': 'world called Scadrial', 'answer_start': 74}]}]},
 {'context': '"Mistings" have only one of the many Allomantic powers, while "Mistborns" have all the powers.',
  'qas': [{'id': '00002',
    'is_impossible': False,
    'question': 'How many powers does a Misting possess?',
    'answers': [{'text': 'one', 'answer_start': 21}]},
   {'id': '00003',
    'is_impossible': True,
    'question': 'What are Allomantic powers?',
    'answers': []}]}]

In [42]:
import logging
# from simpletransformers.question_answering import QuestionAnsweringModel, QuestionAnsweringArgs

In [43]:
model_type = 'bert'
model_name = 'bert-base-cased'
if model_type =='bert':
  model_name = 'bert-bas-cased'

elif model_type == 'roberta':
  model_name = 'roberta'

elif model_type == 'roberta':
  model_name = 'roberta'

elif model_type == 'distilbert':
  model_name = 'distilbert-base-cased'

elif model_type == 'distilroberta':
  model_type = 'roberta'
  model_name = 'distilrobert-base'

elif model_type == 'electra-base':
  model_type = 'electra'
  model_name = 'google/electra-base-discriminator'

elif model_type == 'xlnet':
  model_type = 'xlnet-base-cased'

In [44]:
#Configure the model
from simpletransformers.question_answering import QuestionAnsweringArgs

model_args = QuestionAnsweringArgs()
model_args.train_batch_size = 16
model_args.evaluted_during_training=True
model_args.n_best_size=3
model_args.num_trained_epochs=5

In [45]:
### Advanced Methodology
train_args = {
    "reprocess_input_data": True,
    "overwrite_output_dir": True,
    "use_cached_eval_features": True,
    "output_dir": "outputs/{model_type}",
    "best_model_dir": f"outputs/{model_type}/best_model",
    "evaluate_during_training": True,
    "max_seq_length": 128,
    "num_train_epochs": 5,
    "evaluate_during_training_steps": 1000,
    "wandb_project": "Question Answer Application",
    "wandb_kwargs": {"name": model_name},
    "save_model_every_epoch": False,
    "save_eval_checkpoints": False,
    "n_best_size": 3,
    # "use_early_stopping": True,
    # "early_stopping_metric": "mcc",
    # "n_gpu": 2,
    # "manual_seed": 4,
    # "use_multiprocessing": False,
     "train_batch_size": 128,
     "eval_batch_size": 64,
    # "config":{
        # 'out_hiden_states':True
    # }
}

In [46]:
import os
os.environ["WANDB_DISABLED"] = "true"

In [47]:
from simpletransformers.question_answering import QuestionAnsweringModel

model = QuestionAnsweringModel(
    model_type="bert",
    model_name="bert-base-cased",
    args=train_args,
    use_cuda=False
)

Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForQuestionAnswering: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForQuestionAnswering from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForQuestionAnswering from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForQuestionAnswering were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['qa_outputs

In [49]:
#Train model
import torch
from transformers.data.processors.squad import SquadFeatures
torch.serialization.add_safe_globals([SquadFeatures])

# Now train the model
model.train_model(train, eval_data=test)





convert squad examples to features:   0%|          | 0/4 [00:00<?, ?it/s][A[A[A[ACould not find answer: 'The Final Empire,The Well of Ascension, and The Hero of Ages.' vs. 'The Final Empire, The Well of Ascension, and The Hero of Ages'
convert squad examples to features: 100%|██████████| 4/4 [00:00<00:00, 90.58it/s]




add example index and unique id: 100%|██████████| 4/4 [00:00<00:00, 13729.31it/s]


Epoch:   0%|          | 0/5 [00:00<?, ?it/s]

0,1
correct,▁
eval_loss,▁
global_step,▁
incorrect,▁
similar,▁
train_loss,▁

0,1
correct,1.0
eval_loss,0.1216
global_step,1.0
incorrect,0.0
similar,2.0
train_loss,5.00603


Running Epoch 1 of 5:   0%|          | 0/1 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/1 [00:00<?, ?it/s]

Running Epoch 2 of 5:   0%|          | 0/1 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/1 [00:00<?, ?it/s]

Running Epoch 3 of 5:   0%|          | 0/1 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/1 [00:00<?, ?it/s]

Running Epoch 4 of 5:   0%|          | 0/1 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/1 [00:00<?, ?it/s]

Running Epoch 5 of 5:   0%|          | 0/1 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/1 [00:00<?, ?it/s]

(5,
 {'global_step': [1, 2, 3, 4, 5],
  'correct': [1, 1, 1, 1, 1],
  'similar': [2, 2, 1, 2, 2],
  'incorrect': [0, 0, 1, 0, 0],
  'train_loss': [4.446612358093262,
   4.467581748962402,
   3.9537525177001953,
   3.370347023010254,
   3.084319591522217],
  'eval_loss': [0.06271106004714966,
   -0.028162792325019836,
   -0.1029127836227417,
   -0.1565721482038498,
   -0.18261469900608063]})





convert squad examples to features:   0%|          | 0/4 [00:00<?, ?it/s][A[A[A[ACould not find answer: 'The Final Empire,The Well of Ascension, and The Hero of Ages.' vs. 'The Final Empire, The Well of Ascension, and The Hero of Ages'
convert squad examples to features: 100%|██████████| 4/4 [00:00<00:00, 237.70it/s]




add example index and unique id: 100%|██████████| 4/4 [00:00<00:00, 5751.53it/s]


Epoch:   0%|          | 0/5 [00:00<?, ?it/s]

0,1
correct,▁▁▁▁▁
eval_loss,█▅▃▂▁
global_step,▁▃▅▆█
incorrect,▁▁█▁▁
similar,██▁██
train_loss,██▅▂▁

0,1
correct,1.0
eval_loss,-0.18261
global_step,5.0
incorrect,0.0
similar,2.0
train_loss,3.08432


In [55]:
#make prediction with the tool
to_predict = [
    {
        "context": "Python is a widely used programming language created by Guido van Rossum in the late 1980s.",
        "qas": [
            {
                "question": "Who created Python?",
                "id": "0"
            }
        ]
    }
]

In [59]:
answers, probabilities = model.predict(to_predict)
print(answers)
print(probabilities)





convert squad examples to features: 100%|██████████| 1/1 [00:00<00:00, 72.80it/s]




add example index and unique id: 100%|██████████| 1/1 [00:00<00:00, 4739.33it/s]


Running Prediction:   0%|          | 0/1 [00:00<?, ?it/s]

[{'id': '0', 'answer': ['Guido van Rossum in the late 1980s', 'Guido van Rossum', 'in the late 1980s']}]
[{'id': '0', 'probability': [0.37596232596670853, 0.30885699533929745, 0.2458324679472951]}]
