In [69]:
from openai import OpenAI
import os
from dotenv import load_dotenv

load_dotenv()

client_open_ai = OpenAI(api_key=os.environ.get("OPEN_AI_API_KEY"))

In [70]:
from opensearchpy import OpenSearch

def get_open_search_client():
    host = 'localhost'
    port = 9200

    auth = ('admin', 'admin')

    client_open_search = OpenSearch(
        hosts = [{'host': host, 'port': port}],
        http_auth = auth,
        use_ssl = True,
        verify_certs = False
    )
    return client_open_search

In [81]:
def open_search_quary(search_body, index_name, client_open_search, size, from_where: int=None):
    response = client_open_search.search(index=index_name, body=search_body, size=size, from_=from_where)
    if len(response['hits'])>0:
        return response['hits']['hits']
    else:
        print('There is no hits in open search DB.')
        return None

In [84]:
def query_with_open_AI(paper_detail):
  completion = client_open_ai.chat.completions.create(
    model="gpt-3.5-turbo",
    messages=[
      {"role": "system", "content": 'Generate 4 QA pairs for these information, inforamtion is seperated with \\n'},
      {"role": "user", "content": f"{paper_detail['Title']}\n{paper_detail['Authors']}\n{paper_detail['Abstract']}\n{paper_detail['ArticleDate']}"},
      {"role": "system", "content": 'Output question and answer as dictionary of list'},
    ],
  )
  print(f"{completion.choices[0].message.content}\n")

  return completion.choices[0].message.content



In [89]:
import time
def generate_qa_pairs(paper_detail):
    data = []
    for i in paper_detail:
        paper = i['_source']
        result = query_with_open_AI(paper)
        qa_pair = [qa for qa in result.split('\n') if len(qa)]
        formatted_qa_pair = [clean_qa.split(':')[1].strip() for clean_qa in qa_pair]
        questions = formatted_qa_pair[0::2]
        answers = formatted_qa_pair[1::2]
        article_title = paper['Title']
        PMID = paper['PMID']
        author = paper['Authors']
        article_date = paper['ArticleDate']
        final_result = [[qu, an, article_title, PMID, author, article_date] for qu, an in zip(questions, answers)]
        data+=final_result
        # Avoid OpenAI blocking
        time.sleep(10)
    return data

In [90]:
import csv
import urllib3

urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

index_name = 'pubmed_intelligence'

saving_file_name = 'reformatting_testing_qa.csv'

header = ['question', 'answer', 'article_title', 'PMID', 'author', 'article_date']

client_open_search = get_open_search_client()

search_body = {
    "query": {
        "match": {
            "_index": index_name
        }
    },
}

query_result = open_search_quary(search_body,index_name,client_open_search, 5, 2)

if query_result is not None:

    data = generate_qa_pairs(query_result)

    with open(saving_file_name, 'a+') as f:
        writer = csv.writer(f)
        # write the header
        writer.writerow(header)
        # write the data
        writer.writerows(data)




1. Q: What is the purpose of the study?
   A: The purpose of the study is to define the neuropsychological profile of children with benign epilepsy with centrotemporal spikes (BECTS), with a focus on executive functions.

2. Q: How many children with BECTS and healthy children were enrolled in the study?
   A: Fifteen school-aged children with BECTS and fifteen healthy children were enrolled in the study.

3. Q: What domains showed significant differences between patients with BECTS and healthy controls?
   A: Significant differences were observed in the domains of language, executive functions, and academic skills.

4. Q: What are the implications of the study findings?
   A: The study findings suggest that children with new-onset BECTS may experience difficulties in executive attention, language, and academic skills, despite having a normal IQ, low frequency of sleep discharges, and the absence of medication. Early assessment and intervention targeting executive functions are recomme