## IMPORTS & INSTALLATION

In [None]:
# install haystack directly
!pip install git+https://github.com/deepset-ai/haystack.git

In [53]:
import requests

from haystack.nodes import FARMReader, TransformersReader
from haystack.utils import print_answers

In [2]:
# initialization for colab
! wget https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-7.6.2-linux-x86_64.tar.gz -q
! tar -xzf elasticsearch-7.6.2-linux-x86_64.tar.gz
! chown -R daemon:daemon elasticsearch-7.6.2

import os
from subprocess import Popen, PIPE, STDOUT
es_server = Popen(['elasticsearch-7.6.2/bin/elasticsearch'],
                   stdout=PIPE, stderr=STDOUT,
                   preexec_fn=lambda: os.setuid(1)
                  )
# wait - avoids errors
! sleep 30

In [None]:
# check if the host is running
requests.get("http://localhost:9200/_cluster/health").json()

## WIKI API & DATA RETRIEVAL

In [None]:
# https://pypi.org/project/Wikipedia-API/
# https://www.thepythoncode.com/article/access-wikipedia-python

## DATA PREPARATION

In [6]:
from haystack.document_stores import ElasticsearchDocumentStore

document_store = ElasticsearchDocumentStore(host="localhost", 
                                            username="", 
                                            password="", 
                                            index="wiki")

In [94]:
wiki_doc = "Hello my name is Petra Bajac\nI like cats"
wiki_doc = wiki_doc.split("\n")

In [95]:
wiki_doc

['Hello my name is Petra Bajac', 'I like cats']

In [100]:
dics = [{"content": text} for text in wiki_doc]

In [102]:
dics[:2]

[{'content': 'Hello my name is Petra Bajac'}, {'content': 'I like cats'}]

In [103]:
len(dics)

2

In [104]:
# upload the data to ElasticSearch
document_store.write_documents(dics)

In [None]:
# check if successful
requests.get("http://localhost:9200/wiki/_count").json()

## ELASTIC SEARCH

In [106]:
from haystack.nodes import ElasticsearchRetriever

retriever = ElasticsearchRetriever(document_store)

In [None]:
reader = FARMReader(model_name_or_path="deepset/bert-base-cased-squad2")

In [110]:
from haystack.pipelines import ExtractiveQAPipeline

QA = ExtractiveQAPipeline(reader, retriever)

## ASKING QUESTIONS

In [111]:
def get_ans(question):
  es_response = QA.run(question)
  ans = es_response["answers"][0].answer
  
  print(f"\n\n\n{ans}")

In [112]:
get_ans("What is your name?")

  start_indices = flat_sorted_indices // max_seq_len
Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00,  1.13 Batches/s]




Petra Bajac



