# Extractive Model Pipeline

In [1]:
from haystack.document_stores import FAISSDocumentStore
from haystack.nodes import DensePassageRetriever
from haystack.pipelines import Pipeline
from haystack.nodes import TransformersReader
from haystack.utils import print_answers

document_store = FAISSDocumentStore.load("document_store.faiss")

retriever = DensePassageRetriever(
    document_store=document_store,
    query_embedding_model="facebook/dpr-question_encoder-single-nq-base",
    passage_embedding_model="facebook/dpr-ctx_encoder-single-nq-base",
    use_gpu=True
)
reader = TransformersReader(model_name_or_path="..\initial_moddeling\distilbert-qa\distilbert-nlb-qa", use_gpu=True)

  return self.fget.__get__(instance, owner)()
The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. 
The tokenizer class you load from this checkpoint is 'DPRQuestionEncoderTokenizer'. 
The class this function is called from is 'DPRContextEncoderTokenizerFast'.


In [2]:
p_extractive = Pipeline()
p_extractive.add_node(component=retriever, name="Retriever", inputs=["Query"])
p_extractive.add_node(component=reader, name="Reader", inputs=["Retriever"])
res = p_extractive.run(
    query="When were the anticorruption policies and procedures revised?", params={"Reader": {"top_k": 5}}
)
print_answers(res, details="all")

'Query: When were the anticorruption policies and procedures revised?'
'Answers:'
[   <Answer {'answer': '2020', 'type': 'extractive', 'score': 0.8741440176963806, 'context': 'ules, instructions, or\nThere were no confirmed cases of corruption in 2020. ', 'offsets_in_document': [{'start': 600, 'end': 604}], 'offsets_in_context': [{'start': 70, 'end': 74}], 'document_ids': ['9d62c736460f4723707cd101c87cf668'], 'meta': {'_split_id': 152, 'vector_id': '146'}}>,
    <Answer {'answer': '2020', 'type': 'extractive', 'score': 0.7560802698135376, 'context': 'and Bribery\nand on Management of Conflicts of Interest was renewed in 2020. The\nsystem of risk management of corruption and bribery was suppleme', 'offsets_in_document': [{'start': 313, 'end': 317}], 'offsets_in_context': [{'start': 70, 'end': 74}], 'document_ids': ['6940e459fb997a495706c275bdd852a2'], 'meta': {'_split_id': 153, 'vector_id': '94'}}>,
    <Answer {'answer': 'second\nhalf of 2020', 'type': 'extractive', 'score': 0.73577910

# Generative Model Pipeline

In [3]:
from haystack.nodes import Seq2SeqGenerator
from haystack.nodes.answer_generator.transformers import _BartEli5Converter

generator = Seq2SeqGenerator(model_name_or_path="../initial_moddeling/t5-qa/t5-small-finetuned-squadv2-finetuned-NLB-QA/", input_converter=_BartEli5Converter())

In [4]:
p_generative = Pipeline()
p_generative.add_node(component=retriever, name="Retriever", inputs=["Query"])
p_generative.add_node(component=generator, name="Generator", inputs=["Retriever"])
res = p_generative.run(
    query="When were the anticorruption policies and procedures revised?", params={"Generator": {"top_k": 5}}
)
print_answers(res, details="all")

'Query: When were the anticorruption policies and procedures revised?'
'Answers:'
[   <Answer {'answer': 'second half of 2020', 'type': 'generative', 'score': None, 'context': None, 'offsets_in_document': None, 'offsets_in_context': None, 'document_ids': ['baeb9fdd6531b33d53a369cd89476249', 'ea844cf4d61d912aac1b6d45638f2f60', '68fa2971e62700ad6557cb7d9795059d', '6940e459fb997a495706c275bdd852a2', '54801d5afb011fafac218eea52041d3e', '9880a9be6286139a933f6d3fe107fd3c', '9d62c736460f4723707cd101c87cf668', 'dab3e16587e77fc186b1187eefd512c1', '3679ea062311b473c2d99fe756138744', '705a756196c89ca20e9cd59b60353e1d'], 'meta': {'doc_scores': [0.6590033859811371, 0.6571665722874188, 0.6564186538274209, 0.6516233219710503, 0.6504398652880188, 0.6496594735609219, 0.6479736160215593, 0.6468528726713799, 0.6463240189833409, 0.646236936319553], 'content': ['Table 1: % of women in leadership positions in 2020\nNo corruption\nThe anticorruption policy and\nprocedures were revised in second\nhalf of 2020