## Train Transformers on Indexed Dataset

In [2]:
## Get Data

In [3]:
from cdqa.utils.converters import pdf_converter

In [4]:
import pandas as pd

In [5]:
df=pd.read_csv('../../SDD_POC/data_preprocessing/IndexedAct40_41.csv')

In [6]:
df.head()

Unnamed: 0,tag,questions,paragraph,source,answer
0,offence type ct 40 main text,"['what all offences comes under act 40', 'who ...",a) \tuses criminal force to or assaults his su...,../index used/aa41and40.json,
1,imprisonment act 40 main text,"['imprisonment duration according to act 40', ...",if such officer is at the time in the executio...,../index used/aa41and40.json,
2,criminal force example 40 note 2 a i,"['criminal force example', 'which instance com...","A throws a stone at B. If the stone hits B, A ...",../index used/aa41and40.json,
3,assault example act 40 note 2 a ii,"['assault example', 'which instance comes unde...","A, during an altercation with B, picks up a st...",../index used/aa41and40.json,
4,assult and criminal force similarity act 40 no...,['difference between criminal force and assual...,An 'assault' is something less than the use of...,../index used/aa41and40.json,


In [7]:
df = df[['tag','paragraph']]

In [8]:
df.rename(columns={'tag':'title','paragraph':'paragraphs'},inplace=True)

In [9]:
df.head()

Unnamed: 0,title,paragraphs
0,offence type ct 40 main text,a) \tuses criminal force to or assaults his su...
1,imprisonment act 40 main text,if such officer is at the time in the executio...
2,criminal force example 40 note 2 a i,"A throws a stone at B. If the stone hits B, A ..."
3,assault example act 40 note 2 a ii,"A, during an altercation with B, picks up a st..."
4,assult and criminal force similarity act 40 no...,An 'assault' is something less than the use of...


In [10]:
df.title.isna().value_counts()

False    115
Name: title, dtype: int64

In [11]:
df.paragraphs.isna().value_counts()

False    113
True       2
Name: paragraphs, dtype: int64

In [12]:
df = df.dropna()

In [13]:
df.paragraphs = df.paragraphs.apply(lambda x: [str(x)])

In [14]:
df.head()

Unnamed: 0,title,paragraphs
0,offence type ct 40 main text,[a) \tuses criminal force to or assaults his s...
1,imprisonment act 40 main text,[if such officer is at the time in the executi...
2,criminal force example 40 note 2 a i,"[A throws a stone at B. If the stone hits B, A..."
3,assault example act 40 note 2 a ii,"[A, during an altercation with B, picks up a s..."
4,assult and criminal force similarity act 40 no...,[An 'assault' is something less than the use o...


In [29]:
df.to_json('trans_qa_data.json',orient='records')

## Fit the pipeline on my corpus

In [15]:
import pandas as pd
from ast import literal_eval
from cdqa.pipeline import QAPipeline



In [16]:
df.shape

(113, 2)

In [17]:
df.dtypes

title         object
paragraphs    object
dtype: object

In [18]:
cdqa_pipeline = QAPipeline(reader='distilbert_qa.joblib')
cdqa_pipeline.fit_retriever(df=df)

QAPipeline(reader=BertQA(adam_epsilon=1e-08,
                         bert_model='distilbert-base-uncased',
                         do_lower_case=True, fp16=False,
                         gradient_accumulation_steps=1, learning_rate=5e-05,
                         local_rank=-1, loss_scale=0, max_answer_length=30,
                         n_best_size=20, no_cuda=False,
                         null_score_diff_threshold=0.0, num_train_epochs=3.0,
                         output_dir=None, predict_batch_size=8, seed=42,
                         server_ip='', ser...size=8,
                         verbose_logging=False, version_2_with_negative=False,
                         warmup_proportion=0.1, warmup_steps=0),
           retrieve_by_doc=False,
           retriever=BM25Retriever(b=0.75, floor=None, k1=2.0, lowercase=True,
                                   max_df=0.85, min_df=2, ngram_range=(1, 2),
                                   preprocessor=None, stop_words='english',
           

In [20]:
cdqa_pipeline.dump_reader('distilbert_indexed40_41_reader.joblib')

In [24]:
type(cdqa_pipeline.metadata)

pandas.core.frame.DataFrame

In [19]:
cdqa_pipeline.

{'reader__adam_epsilon': 1e-08,
 'reader__bert_model': 'distilbert-base-uncased',
 'reader__do_lower_case': True,
 'reader__fp16': False,
 'reader__gradient_accumulation_steps': 1,
 'reader__learning_rate': 5e-05,
 'reader__local_rank': -1,
 'reader__loss_scale': 0,
 'reader__max_answer_length': 30,
 'reader__n_best_size': 20,
 'reader__no_cuda': False,
 'reader__null_score_diff_threshold': 0.0,
 'reader__num_train_epochs': 3.0,
 'reader__output_dir': None,
 'reader__predict_batch_size': 8,
 'reader__seed': 42,
 'reader__server_ip': '',
 'reader__server_port': '',
 'reader__train_batch_size': 8,
 'reader__verbose_logging': False,
 'reader__version_2_with_negative': False,
 'reader__warmup_proportion': 0.1,
 'reader__warmup_steps': 0,
 'reader': BertQA(adam_epsilon=1e-08, bert_model='distilbert-base-uncased',
        do_lower_case=True, fp16=False, gradient_accumulation_steps=1,
        learning_rate=5e-05, local_rank=-1, loss_scale=0, max_answer_length=30,
        n_best_size=20, no_cu

In [35]:
cdqa_pipeline.predict(query='What is disobedience to superior officer?')

('any lawful command',
 'act 41 note 8',
 'Sub sec. (2).—The offence under this sub sec is a less grave offence when not committed on active service and consists of disobedience of any lawful command given by a superior officer but not accompanied by the essential elements of the graver offence under sub sec (1).',
 12.758791940130445)

In [43]:
cdqa_pipeline.predict(query='What is the definition of superior officer?',n_predictions=5, return_all_preds=True, retriever_score_weight=0.95)

[('cannot give a lawful command to a person who is, by the terms of such restrictions, placed outside his control',
  'act 41 note 5 a',
  "(a) Superior officer; see AA.s.3(xxiii).—A 'superior officer' whose command has been restricted, either by the terms of his commission or by regulations, cannot give a lawful command to a person who is, by the terms of such restrictions, placed outside his control",
  5.946681161326095),
 ('Sec 3(xxiii) of Army Act',
  'act 40 note 6 a',
  "The definition of Superior Officer is given in Sec 3(xxiii) of Army Act. Further, The expression 'superior officer' in this section and in AA.s.41 means not only a superior in rank but also a senior in the same grade where that seniority gives power of command according to the usages of the service, but one sepoy can never be the 'Superior officer' of another. The court should be satisfied, before conviction, that the accused knew the person, with respect to whom the offence was committed, to be a superior offic