In [1]:
from openai import OpenAI
import os
from dotenv import load_dotenv

load_dotenv()

client_open_ai = OpenAI(api_key=os.environ.get("OPEN_AI_API_KEY"))

In [2]:
from opensearchpy import OpenSearch

def get_open_search_client():
    host = 'localhost'
    port = 9200

    auth = ('admin', 'admin')

    client_open_search = OpenSearch(
        hosts = [{'host': host, 'port': port}],
        http_auth = auth,
        use_ssl = True,
        verify_certs = False
    )
    return client_open_search

In [3]:
def open_search_quary(search_body, index_name, client_open_search, size, from_where: int=None):
    response = client_open_search.search(index=index_name, body=search_body, size=size, from_=from_where)
    if len(response['hits'])>0:
        return response['hits']['hits']
    else:
        print('There is no hits in open search DB.')
        return None

In [4]:
def query_with_open_AI(paper_detail):
  completion = client_open_ai.chat.completions.create(
    model="gpt-3.5-turbo",
    messages=[
      {"role": "system", "content": 'Generate 4 QA pairs for these information, inforamtion is seperated with \\n'},
      {"role": "user", "content": f"{paper_detail['Title']}\n{paper_detail['Authors']}\n{paper_detail['Abstract']}\n{paper_detail['ArticleDate']}"},
      {"role": "system", "content": 'Output question and answer as dictionary of list'},
    ],
  )
  print(f"{completion.choices[0].message.content}\n")

  return completion.choices[0].message.content



In [5]:
import time
import ast
def generate_qa_pairs(paper_detail):
    data = []
    for i in paper_detail:
        paper = i['_source']
        result = query_with_open_AI(paper)
        result_cleaned = result.replace('\n', '')
        formatted_qa_dict = ast.literal_eval(result_cleaned)
        questions_list = formatted_qa_dict['questions']
        answers_list = [answer[0] if len(answer)>0 and type(answer)==list else answer for answer in formatted_qa_dict['answers']]

        article_title = paper['Title']
        PMID = paper['PMID']
        author = paper['Authors']
        article_date = paper['ArticleDate']
        context = paper['Abstract']
        final_result = [[qu, an, article_title, PMID, author, context ,article_date] for qu, an in zip(questions_list, answers_list)]
        data+=final_result
        # Avoid OpenAI blocking
        time.sleep(10)
    return data

In [6]:
import csv
import urllib3

urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

index_name = 'pubmed_intelligence'

saving_file_name = 'reformatting_testing_qa.csv'

header = ['question', 'answer', 'article_title', 'PMID', 'author', 'context', 'article_date']

client_open_search = get_open_search_client()

search_body = {
    "query": {
        "match": {
            "_index": index_name
        }
    },
}

query_result = open_search_quary(search_body,index_name,client_open_search, 5, 2)




In [9]:
if query_result is not None:

    data = generate_qa_pairs(query_result)

    with open(saving_file_name, 'a+') as f:
        writer = csv.writer(f)
        # write the header
        writer.writerow(header)
        # write the data
        writer.writerows(data)

{
  "questions": [
    "What was the purpose of the study on new-onset benign epilepsy with centrotemporal spikes (BECTS)?",
    "How many children with BECTS and healthy children were enrolled in the study?",
    "Which domains showed significant differences between patients with BECTS and healthy controls?",
    "What implications did the study's results suggest for the assessment and intervention of children with new-onset BECTS?"
  ],
  "answers": [
    "The purpose of the study was to define the very early neuropsychological profile of children with benign epilepsy with centrotemporal spikes (BECTS), focusing on executive functions.",
    "Fifteen school-aged children with BECTS and fifteen healthy children were enrolled in the study.",
    "Significant differences between patients and controls emerged in the domains of language, executive functions, and academic skills.",
    "The study results suggested that children with new-onset BECTS may demonstrate neuropsychological dysfun