In [1]:
!pip3 install elasticsearch



In [2]:
!pip3 install elasticsearch-dsl



In [3]:
import sys
sys.path.insert(0, '../../../BERT-FAQ/')

# import required libraries
from elasticsearch_dsl import Index, Document, Integer, Text, analyzer, Keyword, Double
from elasticsearch_dsl.connections import connections
from elasticsearch import Elasticsearch, helpers

from evaluation import get_relevance_label_df
from shared.utils import load_from_json
from shared.utils import dump_to_json
from shared.utils import make_dirs
from indexer import ingest_data
from indexer import QA
import logging

In [4]:
# get list of query answer pairs

query_answer_pairs_filepath = '../../../BERT-FAQ/data/CovidFAQ/query_answer_pairs.json'
relevance_label_df = get_relevance_label_df(query_answer_pairs_filepath)
faq_qa_pair_df = relevance_label_df[relevance_label_df['query_type'] == 'faq']
faq_qa_pairs = faq_qa_pair_df.T.to_dict().values()

In [5]:
# check relevance_label_df
relevance_label_df

Unnamed: 0,label,query_type,question,answer,id
0,1,faq,What is COVID-19?,COVID-19 is a new coronavirus that we have not...,1
1,1,faq,What is a coronavirus?,Coronavirus are a type of virus - there are ma...,2
2,1,faq,Any advice about how to minimize risk during g...,"Currently, there is no evidence of food or foo...",3
3,1,faq,Why do I have to stay at home?,Socially distancing is a practice that aims to...,4
4,1,faq,What is social distancing and how does it help...,Social distancing is one of the most effective...,5
...,...,...,...,...,...
1450,1,user_query,will coronavirus stop in summer,"We do not know. Some viruses, like the common ...",1451
1451,1,user_query,will coronavirus stop in the summer,"We do not know. Some viruses, like the common ...",1452
1452,1,user_query,will coronavirus survive in the summer,"We do not know. Some viruses, like the common ...",1453
1453,1,user_query,will coronavirus survive on surfaces,A recent study shows that the virus can live i...,1454


In [6]:
# check faq_qa_pair_df
faq_qa_pair_df

Unnamed: 0,label,query_type,question,answer,id
0,1,faq,What is COVID-19?,COVID-19 is a new coronavirus that we have not...,1
1,1,faq,What is a coronavirus?,Coronavirus are a type of virus - there are ma...,2
2,1,faq,Any advice about how to minimize risk during g...,"Currently, there is no evidence of food or foo...",3
3,1,faq,Why do I have to stay at home?,Socially distancing is a practice that aims to...,4
4,1,faq,What is social distancing and how does it help...,Social distancing is one of the most effective...,5
...,...,...,...,...,...
63,1,faq,Can COVID-19 be spread through surface-touching?,"It can be possible, as people can touch a surf...",64
64,1,faq,How do I prepare my child for a COVID-19 outbr...,It is important to talk with your child about ...,65
65,1,faq,What steps should parents take to protect chil...,"Remember - this is a new virus, and there is a...",66
66,1,faq,Why are schools closing down becasue of COVID-19?,School dismissals are a method that can help s...,67


In [7]:
# convert to list
faq_qa_pairs = list(faq_qa_pairs)

In [8]:
# check first 5 entries in faq_qa_pairs
faq_qa_pairs[:5]

[{'label': 1,
  'query_type': 'faq',
  'question': 'What is COVID-19? ',
  'answer': 'COVID-19 is a new coronavirus that we have not seen previously; it is not the same as the flu or common cold. Coronaviruses are types of viruses that cause illnesses. These include the common cold, flu, to more severe diseases like Middle East Respiratory Syndrom (MERs-CoV) and Severe Acute Respiratory Syndrome (SARS-CoV).   [[Would you like to know more about COVID-19 symptoms?]]',
  'id': '1'},
 {'label': 1,
  'query_type': 'faq',
  'question': 'What is a coronavirus?',
  'answer': 'Coronavirus are a type of virus - there are many kinds, and some of them can cause disease. Examples of coronaviruses include the common cold, flu, to more severe diseases like Middle East Respiratory Syndrome (MERS-CoV) and Severe Acute Respiratory Syndrome (SARS-CoV).   COVID19 is a novel coronavirus that has been recently identified.   [[Would you like more information on COVID19?]] [[Would you like to know the sympto

In [9]:
try:
    
    es = connections.create_connection(hosts=['localhost'])
    
    # Index data to Elasticsearch 
    index_name = "covidfaq"
    
    # Initialize index (only perform once)
    index = Index(index_name)

    # Define custom settings
    index.settings(
        number_of_shards=1,
        number_of_replicas=0
    )

    # Delete the index, ignore if it doesn't exist
    index.delete(ignore=404)

    # Create the index in Elasticsearch
    index.create()

    # Register a document with the index
    index.document(QA)

    # Ingest data to Elasticsearch
    ingest_data(faq_qa_pairs, es=es, index=index_name)

    print("Finished indexing {} records to {} index".format(len(faq_qa_pairs), index_name))

except Exception:
    logging.error('exception occured', exc_info=True)


100%|██████████| 68/68 [00:00<00:00, 19422.04it/s]

Finished indexing 68 records to covidfaq index



