## IMPORTS

In [None]:
# install haystack directly
!pip install git+https://github.com/deepset-ai/haystack.git

In [None]:
!pip install wikipedia-api

In [3]:
# imports for ES initialization
import os
from subprocess import Popen, PIPE, STDOUT

In [4]:
import wikipediaapi

In [None]:
from haystack.nodes import FARMReader, TransformersReader, ElasticsearchRetriever
from haystack.document_stores import ElasticsearchDocumentStore
from haystack.pipelines import ExtractiveQAPipeline

In [6]:
# initialization of ElasticSearch for colab
! wget https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-7.6.2-linux-x86_64.tar.gz -q
! tar -xzf elasticsearch-7.6.2-linux-x86_64.tar.gz
! chown -R daemon:daemon elasticsearch-7.6.2

es_server = Popen(["elasticsearch-7.6.2/bin/elasticsearch"],
                   stdout=PIPE, stderr=STDOUT,
                   preexec_fn=lambda: os.setuid(1)
                  )
# wait - avoids errors
! sleep 30

## WIKI

In [7]:
wiki_wiki = wikipediaapi.Wikipedia(language="en", extract_format=wikipediaapi.ExtractFormat.WIKI)

## ELASTICSEARCH

In [8]:
document_store = ElasticsearchDocumentStore(host="localhost", 
                                            username="", 
                                            password="", 
                                            index="wiki")

In [9]:
retriever = ElasticsearchRetriever(document_store)

In [None]:
reader = FARMReader(model_name_or_path="deepset/bert-base-cased-squad2", use_gpu=True) 
# try also "deepset/roberta-base-squad2"

In [11]:
# the entire QA model
QA = ExtractiveQAPipeline(reader, retriever)

## ASKING QUESTIONS

In [12]:
# checks if Wikipedia has the desired page
def check_topic(topic):
  wiki_page = wiki_wiki.page(topic)
  # returns a boolean
  return wiki_page.exists()

# gets the contents of the page and uploads it to ElasticSearch
def get_wiki(topic):
  wiki_page = wiki_wiki.page(topic).text
  wiki_doc = wiki_page.split("\n")
  # converts the data into json-like format
  dics =[{"content": text} for text in wiki_doc]
  # upload
  document_store.write_documents(dics)

# performs the query and returns an answer
def get_ans(question):
  # query
  es_response = QA.run(question)
  # fetching the top answer
  ans = es_response["answers"][0].answer
  
  print(f"\n\n\n{ans}")

In [13]:
topic = input("What topic would you like to discuss? ")

if check_topic(topic):
  question = input(f"What would you like to know about {topic.lower()}? ")
  get_wiki(topic)
  get_ans(question)
else:
  print("The topic you selected doesn't exist in our database.")

What topic would you like to discuss? Cats
What would you like to know about cats? What is the lifespan of a cat?


  start_indices = flat_sorted_indices // max_seq_len
Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00,  3.20 Batches/s]
Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00,  6.81 Batches/s]
Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 16.61 Batches/s]
Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 17.06 Batches/s]
Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 18.45 Batches/s]
Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 12.40 Batches/s]
Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00,  5.01 Batches/s]
Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 18.46 Batches/s]
Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 14.79 Batches/s]
Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 20.88 Batches/s]




seven years



