In [4]:
from haystack.document_stores import ElasticsearchDocumentStore
from haystack.utils import clean_wiki_text, convert_files_to_docs, print_answers
from haystack.nodes import FARMReader
import requests

# Elastic Search Setup and Document Storing

In [5]:
print(requests.get("http://localhost:9200/_cat/indices").text)

yellow open label      TOmAl2WNQdCPjhDww-_9Vw 1 1  0 0    208b    208b
yellow open sampledata ObtWn_pcRl6RXZI2jO0M4Q 1 1 15 0 304.6kb 304.6kb



In [6]:
# requests.delete("http://localhost:9200/document")

In [7]:
# Connect to Elasticsearch
document_store = ElasticsearchDocumentStore(host="localhost", username="", password="", index="sampledata")

In [8]:
doc_dir = "../data/sampleData"
docs = convert_files_to_docs(dir_path=doc_dir, clean_func=clean_wiki_text, split_paragraphs=True)
document_store.write_documents(docs)

INFO - haystack.utils.preprocessing -  Converting ..\data\sampleData\0_Toilet_Training_issues_1.txt
INFO - haystack.utils.preprocessing -  Converting ..\data\sampleData\1_Toilet_Training_issues_2.txt
INFO - haystack.utils.preprocessing -  Converting ..\data\sampleData\2_Toilet_Training_issues_3.txt
INFO - haystack.utils.preprocessing -  Converting ..\data\sampleData\3_Toilet_Training_issues_4.txt
INFO - haystack.utils.preprocessing -  Converting ..\data\sampleData\4_Picky_Eaters_1.txt
INFO - haystack.utils.preprocessing -  Converting ..\data\sampleData\5_Picky_Eaters_2.txt
INFO - haystack.utils.preprocessing -  Converting ..\data\sampleData\6_Picky_Eaters_3.txt
INFO - haystack.utils.preprocessing -  Converting ..\data\sampleData\7_Behavior_1.txt


In [9]:
requests.get('http://localhost:9200/sampledata/_count').json()

{'count': 15,
 '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0}}

In [10]:
from haystack.document_stores.elasticsearch import ElasticsearchDocumentStore
from haystack.retriever.dense import DensePassageRetriever

In [11]:
doc_store = ElasticsearchDocumentStore(
    host='localhost',
    username='', password='',
    index='sampledata'
)

# Retriever

In [12]:
retriever = DensePassageRetriever(
    document_store=doc_store,
    query_embedding_model='facebook/dpr-question_encoder-single-nq-base',
    passage_embedding_model='facebook/dpr-ctx_encoder-single-nq-base',
    use_gpu=True,
    embed_title=True
)

INFO - haystack.modeling.utils -  Using devices: CPU
INFO - haystack.modeling.utils -  Number of GPUs: 0
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Could not find facebook/dpr-question_encoder-single-nq-base locally.
INFO - haystack.modeling.model.language_model -  Looking on Transformers Model Hub (in local cache and online)...
INFO - haystack.modeling.model.language_model -  Loaded facebook/dpr-question_encoder-single-nq-base
The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. 
The tokenizer class you load from this checkpoint is 'DPRQuestionEncoderTokenizer'. 
The class this function is called from is 'DPRContextEncoderTokenizerFast'.
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Could not find facebook/dpr-ctx_encoder-single-nq-base locally.
INFO 

In [23]:
# DensePassageRetriever.save(retriever,"../models/retriever")

INFO - haystack.modeling.model.biadaptive_model -  prediction_head saving


In [14]:
doc_store.update_embeddings(retriever=retriever)

INFO - haystack.document_stores.elasticsearch -  Updating embeddings for all 15 docs ...
Updating embeddings: 10000 Docs [00:08, 1141.14 Docs/s]       


# Qusetion Answering - get context about question

In [15]:
prediction=retriever.retrieve("How do I toilet train my child as he keeps wetting himself?")

In [16]:
from pprint import pprint
pprint(prediction)

[<Document: {'content': 'According to the Cleveland Clinic, about 15% of children ages 5 or older actually\nstop wetting the bed each year. So its still perfectly normal for a child to wet the\nbed up until the age of 7. It is often just a stage in their development. And while\ngirls can and do wet the bed, it is 2 times for likely for a boy to wet the bed.\nThere are a few things that can cause bedwetting. Dr. Charles Kwon is a pediatric\nnephrologist, says that the underlying issue is usually a bladder thatâ€™s not\nmatured yet. Other possible causes can be a hormone imbalance, constipation,\nthe child is having a lot of stress and anxiety, they have ADHD, they have a small\nbladder, or there is a family history of childhood bedwetting. Bed wetting in some\ncases could also be a sign of sleep apnea if your child snores a lot or shows other\nsigns of sleep apnea, a UTI (urinary tract infections), or even diabetes. Talking\nwith your childs doctor is always suggested, just in case.\nTh

# Reader

In [17]:
from haystack.reader import FARMReader
reader = FARMReader(model_name_or_path="deepset/roberta-base-squad2", use_gpu=True)


INFO - haystack.modeling.utils -  Using devices: CPU
INFO - haystack.modeling.utils -  Number of GPUs: 0
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Could not find deepset/roberta-base-squad2 locally.
INFO - haystack.modeling.model.language_model -  Looking on Transformers Model Hub (in local cache and online)...
INFO - haystack.modeling.model.language_model -  Loaded deepset/roberta-base-squad2
INFO - haystack.modeling.utils -  Using devices: CPU
INFO - haystack.modeling.utils -  Number of GPUs: 0
INFO - haystack.modeling.infer -  Got ya 7 parallel workers to do inference ...
INFO - haystack.modeling.infer -   0     0     0     0     0     0     0  
INFO - haystack.modeling.infer -  /w\   /w\   /w\   /w\   /w\   /w\   /w\ 
INFO - haystack.modeling.infer -  /'\   / \   /'\   /'\   / \   / \   /'\ 


In [24]:
# FARMReader.save(reader,"../models/reader")

INFO - haystack.nodes.reader.farm -  Saving reader model to ../models/reader


# Pipeline

In [19]:
from haystack.pipelines import ExtractiveQAPipeline

pipe = ExtractiveQAPipeline(reader, retriever)

In [20]:
prediction = pipe.run(
    query="My child has a hard time drinking from the bottle, what can I do?", params={"Retriever": {"top_k": 10}, "Reader": {"top_k": 5}}
)

  start_indices = flat_sorted_indices // max_seq_len
Inferencing Samples: 100%|██████████| 1/1 [00:03<00:00,  3.96s/ Batches]
Inferencing Samples: 100%|██████████| 1/1 [00:01<00:00,  1.32s/ Batches]
Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00,  1.13 Batches/s]
Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00,  1.96 Batches/s]
Inferencing Samples: 100%|██████████| 1/1 [00:01<00:00,  1.82s/ Batches]
Inferencing Samples: 100%|██████████| 1/1 [00:02<00:00,  2.58s/ Batches]
Inferencing Samples: 100%|██████████| 1/1 [00:02<00:00,  2.47s/ Batches]
Inferencing Samples: 100%|██████████| 1/1 [00:01<00:00,  1.46s/ Batches]
Inferencing Samples: 100%|██████████| 1/1 [00:01<00:00,  1.96s/ Batches]
Inferencing Samples: 100%|██████████| 1/1 [00:02<00:00,  2.26s/ Batches]


# Question Answering

In [21]:
from pprint import pprint

pprint(prediction)

{'answers': [<Answer {'answer': 'try and give the your child the bottle as soon as\nthey wake up', 'type': 'extractive', 'score': 0.27849215269088745, 'context': 'inking from\na bottle, one thing would be to try and give the your child the bottle as soon as\nthey wake up, wither that be when they wake up in the mo', 'offsets_in_document': [{'start': 115, 'end': 177}], 'offsets_in_context': [{'start': 44, 'end': 106}], 'document_id': '8b16f49e9942a635ee7c3637949f501b', 'meta': {'name': '6_Picky_Eaters_3.txt'}}>,
             <Answer {'answer': 'stop their fluid intake', 'type': 'extractive', 'score': 0.26187076419591904, 'context': 'crease their liquid intake earlier in the\nday and after dinner, stop their fluid intake. Another thing you can do is schedule\nbathroom breaks for your', 'offsets_in_document': [{'start': 1145, 'end': 1168}], 'offsets_in_context': [{'start': 64, 'end': 87}], 'document_id': 'ce8b05d655380a32d9b5bd9c71ad8b4e', 'meta': {'name': '3_Toilet_Training_issues_4.txt'

In [22]:
print_answers(prediction, details="minimum")


Query: My child has a hard time drinking from the bottle, what can I do?
Answers:
[   {   'answer': 'try and give the your child the bottle as soon as\n'
                  'they wake up',
        'context': 'inking from\n'
                   'a bottle, one thing would be to try and give the your '
                   'child the bottle as soon as\n'
                   'they wake up, wither that be when they wake up in the mo'},
    {   'answer': 'stop their fluid intake',
        'context': 'crease their liquid intake earlier in the\n'
                   'day and after dinner, stop their fluid intake. Another '
                   'thing you can do is schedule\n'
                   'bathroom breaks for your'},
    {   'answer': 'cradling them at a 45-degree angle',
        'context': ' while sitting or lying down a\n'
                   'certain way. You could try cradling them at a 45-degree '
                   'angle while keeping their\n'
                   'head and neck aligned, si