## Train Transformers on 70 Acts of Dataset

In [1]:
## Get Data

In [2]:
from cdqa.utils.converters import pdf_converter

In [33]:
df = pdf_converter(directory_path='./Split_Acts/Separated/')

In [34]:
df.head()

Unnamed: 0,title,paragraphs
0,Army_Act_Section_27,[27. Remedy of aggrieved officers— Any office...
1,Army_Act_Section_64,[64. Miscellaneous offences.— Any person subj...
2,Army_Act_Section_38,[38. Desertion and aiding desertion.— (1) An...
3,Army_Act_Section_50,[50. Irregularity in connection With arrest o...
4,Army_Act_Section_17,[17. Mode of attestation.— (1) When a person ...


## Downloading pre-trained models

In [17]:
from cdqa.utils.download import download_squad, download_model, download_bnpp_data

directory = '../'

# Downloading data
download_squad(dir=directory)
download_bnpp_data(dir=directory)

# Downloading pre-trained BERT fine-tuned on SQuAD 1.1
download_model('distilbert-squad_1.1', dir=directory)

# Downloading pre-trained DistilBERT fine-tuned on SQuAD 1.1
download_model('distilbert-squad_1.1', dir=directory)

Downloading SQuAD v1.1 data...
train-v1.1.json already downloaded
dev-v1.1.json already downloaded

Downloading SQuAD v2.0 data...
train-v2.0.json already downloaded
dev-v2.0.json already downloaded

Downloading BNP data...
bnpp_newsroom-v1.1.csv already downloaded

Downloading trained model...
distilbert_qa.joblib already downloaded

Downloading trained model...
distilbert_qa.joblib already downloaded


In [15]:
ls ../

[0m[01;34marmy_act_pdf[0m/  [01;34mcdQA[0m/  [01;34mcontracts_pdf[0m/  [01;34mdata[0m/  [01;34m__MACOSX[0m/  [01;34mmodels[0m/  [01;31mpdf.zip[0m


## Fit the pipeline on my corpus

In [35]:
import pandas as pd
from ast import literal_eval
from cdqa.pipeline import QAPipeline

In [36]:
df.shape

(69, 2)

In [37]:
df.dtypes

title         object
paragraphs    object
dtype: object

In [38]:
cdqa_pipeline = QAPipeline(reader='distilbert_qa.joblib')
cdqa_pipeline.fit_retriever(df=df)

QAPipeline(reader=BertQA(adam_epsilon=1e-08,
                         bert_model='distilbert-base-uncased',
                         do_lower_case=True, fp16=False,
                         gradient_accumulation_steps=1, learning_rate=5e-05,
                         local_rank=-1, loss_scale=0, max_answer_length=30,
                         n_best_size=20, no_cuda=False,
                         null_score_diff_threshold=0.0, num_train_epochs=3.0,
                         output_dir=None, predict_batch_size=8, seed=42,
                         server_ip='', ser...size=8,
                         verbose_logging=False, version_2_with_negative=False,
                         warmup_proportion=0.1, warmup_steps=0),
           retrieve_by_doc=False,
           retriever=BM25Retriever(b=0.75, floor=None, k1=2.0, lowercase=True,
                                   max_df=0.85, min_df=2, ngram_range=(1, 2),
                                   preprocessor=None, stop_words='english',
           

In [39]:
cdqa_pipeline.dump_reader('distilbert_army_act_reader.joblib')

'/home/ec2-user/SageMaker/Closed_Domain_QA/army_act_pdf'

In [40]:
cdqa_pipeline.predict(query='What is disobedience to superior officer?')

('any lawful command',
 'Army_Act_Section_41',
 '1.  Offences under this section, when on active service, should not be dealt with summarily under AA.s.80, 83 or 84. 2.  An offence under this section cannot be made the subject of a joint charge. 3.  Lawful Command.—The command must be a specific command to an individual i.e., it must be capable of individual execution by the person to whom it is addressed and justified by military, as well as by civil, law and usage, e.g., a command addressed by a superior officer to four persons to "dismiss" is for the purposes of this section a lawful military command to each of the four persons so addressed. The command must relate to military duty that is to say disobedience to it must tend to impede, delay or prevent a military proceeding. The disobedience must have reference to the time at which the command is to be obeyed. If the command be a lawful command, and demands a prompt and immediate compliance, hesitation or unnecessary delay in obeyin

In [31]:
cdqa_pipeline.predict(query='Who is superior officer?',return_all_preds=True, retriever_score_weight=0.35)

[{'text': 'AA.s.40(b)',
  'probability': 0.554266506160069,
  'start_logit': 7.050589084625244,
  'end_logit': 5.054758548736572,
  'qas_id': 'a3bf127c-b5f6-48c9-9664-3f990559b11a',
  'title': 'Army_Act_Section_48',
  'paragraph': '48.  Intoxication. —  (1) Any person subject to this Act who is found in a state of intoxication, whether on duty or not, shall, on conviction by court-martial, if he is an officer, be liable to be cashiered or to suffer such less punishment as is in this Act mentioned; and, if he is not an officer, be liable, subject to the provisions of sub-section (2), to suffer imprisonment for a term which may extend to two years or such less punishment as is in this Act mentioned.  (2) Where an offence of being intoxicated is committed by a person other than an officer when not on active service or not on duty, the period of imprisonment awarded shall not exceed six months.  NOTES  1.  Intoxication may be induced by opium or any similar drug, as well as by liquor. This