In [3]:
from elasticsearch import Elasticsearch
from transformers import pipeline

index_name = "qa_dataset"
es = Elasticsearch(['http://localhost:9200'], basic_auth=('elastic', 'elastic'))
question_answerer = pipeline("question-answering", model="app\models\checkpoint-7971")

def context_gen_from_elasticsearch(question):
  contexts = []
  body = {
    "query": {
      "multi_match": {
        "query": question,
        "fields": [
          "title^3",
          "question^2",
          "context"
        ],
        "fuzziness": "AUTO"
      }
    }
  }

  response = es.search(index=index_name, body=body)
  top_hits = response["hits"]["hits"]
  for hit in top_hits:
    contexts.append(hit["_source"]["context"])
  return contexts

def answer_question(question, k=3):
  contexts = context_gen_from_elasticsearch(question)
  unique_answers = set()
  results = []
  for c in contexts:
    model_out = question_answerer(question=question, context=c)
    result = {
      "answer": model_out["answer"],
      "score": model_out["score"],
      "context": c
    }
    
    if result["answer"] not in unique_answers:
      results.append(result)
      unique_answers.add(result["answer"])
  
  results.sort(key=lambda x: x['score'], reverse=True)
  prompt_answer = ""
  for r in results[:k]:
    prompt_answer += r['answer'].replace('.','') + ", "
  prompt_answer = prompt_answer[:-2] + "."
  return prompt_answer

In [16]:
question = "What is the capital of USA"
answer_question(question)

'Washington, USB, micro-USB receptacles.'

In [9]:
# scores representation:
question = "when was the first moon landing"

conts = context_gen_from_elasticsearch(question)
for c in conts:
  print(question_answerer(question=question, context=c))

{'score': 0.5759463906288147, 'start': 77, 'end': 90, 'answer': 'November 1969'}
{'score': 0.5759463906288147, 'start': 77, 'end': 90, 'answer': 'November 1969'}
{'score': 0.1969580203294754, 'start': 1518, 'end': 1522, 'answer': '1895'}
{'score': 0.13519753515720367, 'start': 740, 'end': 753, 'answer': 'July 31, 1964'}
{'score': 0.6445551514625549, 'start': 145, 'end': 149, 'answer': '1994'}
{'score': 0.14042796194553375, 'start': 386, 'end': 404, 'answer': 'September 14, 1959'}
{'score': 0.6524192094802856, 'start': 534, 'end': 547, 'answer': 'July 20, 1969'}
{'score': 0.03875073418021202, 'start': 5, 'end': 27, 'answer': 'July to September 1989'}
{'score': 0.42686399817466736, 'start': 283, 'end': 293, 'answer': 'March 1969'}
{'score': 0.9269189834594727, 'start': 1144, 'end': 1157, 'answer': 'July 20, 1969'}
