In [None]:
import re
import requests
import urllib.request
from bs4 import BeautifulSoup
import json

# Specify your search query here. Works on single words or multiple words.
# query = 'P2RY8'
query = 'Intelligence'

# Formats query in correct format
def format_query(search_query):
    if ' ' not in search_query:
        query = search_query
    else:
        query = '"' + '+'.join(search_query.split()) + '"'
    return query

query = format_query(query)
print("Query: " + query)

# common settings between esearch and efetch
base_url = 'http://eutils.ncbi.nlm.nih.gov/entrez/eutils/'
db = 'db=pubmed'

# esearch specific settings
search_eutil = 'esearch.fcgi?'
search_term = '&term=' + query
search_usehistory = '&usehistory=y'
search_rettype = '&rettype=json'

search_url = base_url + search_eutil + db + search_term + search_usehistory + search_rettype
print(search_url)

f = urllib.request.urlopen(search_url)
search_data = f.read().decode('utf-8')

# obtain total abstract count
total_abstract_count = int(re.findall("<Count>(\d+?)</Count>", search_data)[0])

# obtain webenv and querykey settings for efetch command
fetch_webenv = "&WebEnv=" + re.findall("<WebEnv>(\S+)<\/WebEnv>", search_data)[0]
fetch_querykey = "&query_key=" + re.findall("<QueryKey>(\d+?)</QueryKey>", search_data)[0]

for i in range(100):
    # other efetch settings
    fetch_eutil = 'efetch.fcgi?'
    retmax = 1
    retstart = i+1
    fetch_retstart = "&retstart=" + str(retstart)
    fetch_retmax = "&retmax=" + str(retmax)
    fetch_retmode = "&retmode=xml"  # Use XML for structured data
    fetch_rettype = "&rettype=abstract"

    fetch_url = base_url + fetch_eutil + db + fetch_querykey + fetch_webenv + fetch_retstart + fetch_retmax + fetch_retmode + fetch_rettype
    print(fetch_url)

    # Make the request to fetch the XML response
    response = requests.get(fetch_url)

    # Parse the XML response
    soup = BeautifulSoup(response.text, 'xml')

    # Extract PMID, title, and abstract
    pmid = soup.find("PMID").text if soup.find("PMID") else None
    title = soup.find("ArticleTitle").text if soup.find("ArticleTitle") else None
    abstract_tag = soup.find("AbstractText")
    abstract = abstract_tag.text if abstract_tag else None

    # Creating JSON structure with only PMID, title, and abstract
    data = {
        "id": pmid,
        "title": title,
        "text": abstract
    }

    print(data)

    # Save JSON data to a file
    output_file = "pubmed_data.json"
    with open(output_file, 'w') as json_file:
        json.dump(data, json_file, indent=2)

    !curl -X POST "https://localhost:9200/pub_med/_doc" -u admin:admin --insecure -H 'Content-Type: application/json' -d @pubmed_data.json  



Query: Intelligence
http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=pubmed&term=Intelligence&usehistory=y&rettype=json
http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&query_key=1&WebEnv=MCID_65a3127b0d9ecb4b5d1ef741&retstart=1&retmax=1&retmode=xml&rettype=abstract
{'id': '38217783', 'title': 'Towards a dynamic, comprehensive conceptualization of dyslexia.', 'text': 'Here we build from the central strength of the existing definition of dyslexia-its emphasis on neurobiological origins-and proffer a set of seven core principles for a new, more comprehensive conceptualization of dyslexia. These principles derive from two major research directions: (1) the still evolving history of attempts to explain dyslexia, including in varied writing systems; and (2) the study of the reading brain circuit, its development, and its genetic and environmental influences. What emerges from connecting these two directions is a dynamic conceptualization of dyslexia that incorp

{"_index":"pub_med","_id":"BRwABY0BZKko4otCVawF","_version":1,"result":"created","_shards":{"total":2,"successful":1,"failed":0},"_seq_no":116,"_primary_term":1}http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&query_key=1&WebEnv=MCID_65a3127b0d9ecb4b5d1ef741&retstart=8&retmax=1&retmode=xml&rettype=abstract
{'id': '38217478', 'title': 'Evaluating insomnia queries from an artificial intelligence chatbot for patient education.', 'text': "To evaluate the accuracy of ChatGPT in addressing insomnia-related queries for patient education. To further assess ChatGPT's ability to provide varies responses based on differinf prompting scenarios."}
{"_index":"pub_med","_id":"BhwABY0BZKko4otCW6zP","_version":1,"result":"created","_shards":{"total":2,"successful":1,"failed":0},"_seq_no":117,"_primary_term":1}http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&query_key=1&WebEnv=MCID_65a3127b0d9ecb4b5d1ef741&retstart=9&retmax=1&retmode=xml&rettype=abstract
{'id': '38217

{"_index":"pub_med","_id":"DBwABY0BZKko4otCdqw8","_version":1,"result":"created","_shards":{"total":2,"successful":1,"failed":0},"_seq_no":123,"_primary_term":1}http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&query_key=1&WebEnv=MCID_65a3127b0d9ecb4b5d1ef741&retstart=15&retmax=1&retmode=xml&rettype=abstract
{'id': '38217155', 'title': 'Voice-controlled quantum chemistry.', 'text': 'Over the past decade, artificial intelligence has been propelled forward by advances in machine learning algorithms and computational hardware, opening up myriads of new avenues for scientific research. Nevertheless, virtual assistants and voice control have yet to be widely used in the natural sciences. Here, we present ChemVox, an interactive Amazon Alexa skill that uses speech recognition to perform quantum chemistry calculations. This new application interfaces Alexa with cloud computing and returns the results through a capable device. ChemVox paves the way to making computational chem

{"_index":"pub_med","_id":"ExwABY0BZKko4otClawU","_version":1,"result":"created","_shards":{"total":2,"successful":1,"failed":0},"_seq_no":130,"_primary_term":1}http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&query_key=1&WebEnv=MCID_65a3127b0d9ecb4b5d1ef741&retstart=22&retmax=1&retmode=xml&rettype=abstract
{'id': '38216937', 'title': 'In vivo therapy of osteosarcoma using anion transporters-based supramolecular drugs.', 'text': 'Osteosarcoma represents a serious clinical challenge due to its widespread genomic alterations, tendency for drug resistance and distant metastasis. New treatment methods are urgently needed to address those treatment difficulties in osteosarcoma to improve patient prognoses. In recent years, small-molecule based anion transporter have emerged as innovative and promising therapeutic compound with various biomedical applications. However, due to a lack of efficient delivery methods, using ion transporters as therapeutic drugs in vivo remains a

{"_index":"pub_med","_id":"GRwABY0BZKko4otCsKyZ","_version":1,"result":"created","_shards":{"total":2,"successful":1,"failed":0},"_seq_no":136,"_primary_term":1}http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&query_key=1&WebEnv=MCID_65a3127b0d9ecb4b5d1ef741&retstart=28&retmax=1&retmode=xml&rettype=abstract
{'id': '38216725', 'title': 'Highly anisotropic spin transport in ultrathin black phosphorus.', 'text': 'In anisotropic crystals, the direction-dependent effective mass of carriers can have a profound impact on spin transport dynamics. The puckered crystal structure of black phosphorus leads to direction-dependent charge transport and optical response, suggesting that it is an ideal system for studying anisotropic spin transport. To this end, we fabricate and characterize high-mobility encapsulated ultrathin black-phosphorus-based spin valves in a four-terminal geometry. Our measurements show that in-plane spin lifetimes are strongly gate tunable and exceed one nan

{"_index":"pub_med","_id":"HhwABY0BZKko4otCx6zZ","_version":1,"result":"created","_shards":{"total":2,"successful":1,"failed":0},"_seq_no":141,"_primary_term":1}http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&query_key=1&WebEnv=MCID_65a3127b0d9ecb4b5d1ef741&retstart=33&retmax=1&retmode=xml&rettype=abstract
{'id': '38216635', 'title': 'Single-cell profiling of the microenvironment in human bone metastatic renal cell carcinoma.', 'text': 'Bone metastasis is of common occurrence in renal cell carcinoma with poor prognosis, but no optimal treatment approach has been established for bone metastatic renal cell carcinoma. To explore the potential therapeutic targets for bone metastatic renal cell carcinoma, we profile single cell transcriptomes of 6 primary renal cell carcinoma and 9 bone metastatic renal cell carcinoma. We also include scRNA-seq data of early-stage renal cell carcinoma, late-stage renal cell carcinoma, normal kidneys and healthy bone marrow samples in the 

{"_index":"pub_med","_id":"IxwABY0BZKko4otC5qwy","_version":1,"result":"created","_shards":{"total":2,"successful":1,"failed":0},"_seq_no":146,"_primary_term":1}http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&query_key=1&WebEnv=MCID_65a3127b0d9ecb4b5d1ef741&retstart=38&retmax=1&retmode=xml&rettype=abstract
{'id': '38216413', 'title': 'Artificial Intelligence for Breast Cancer Detection on Mammography: Factors Related to Cancer Detection.', 'text': 'Little is known about the factors affecting the Artificial Intelligence (AI) software performance on mammography for breast cancer detection. This study was to identify factors associated with abnormality scores assigned by the AI software.'}
{"_index":"pub_med","_id":"JBwABY0BZKko4otC66xm","_version":1,"result":"created","_shards":{"total":2,"successful":1,"failed":0},"_seq_no":147,"_primary_term":1}http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&query_key=1&WebEnv=MCID_65a3127b0d9ecb4b5d1ef741&retstart

{"_index":"pub_med","_id":"LRwBBY0BZKko4otCG6yA","_version":1,"result":"created","_shards":{"total":2,"successful":1,"failed":0},"_seq_no":156,"_primary_term":1}http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&query_key=1&WebEnv=MCID_65a3127b0d9ecb4b5d1ef741&retstart=48&retmax=1&retmode=xml&rettype=abstract
{'id': '38216132', 'title': 'Correction: Deep learning radiomics of multimodal ultrasound for classifying metastatic\xa0cervical\xa0lymphadenopathy into primary cancer sites: a feasibility study.', 'text': None}
{"_index":"pub_med","_id":"LhwBBY0BZKko4otCIqw2","_version":1,"result":"created","_shards":{"total":2,"successful":1,"failed":0},"_seq_no":157,"_primary_term":1}http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&query_key=1&WebEnv=MCID_65a3127b0d9ecb4b5d1ef741&retstart=49&retmax=1&retmode=xml&rettype=abstract
{'id': '38216119', 'title': 'Data standards in drug discovery: a long way to go.', 'text': 'Each year, millions to trillions of data p

{"_index":"pub_med","_id":"NBwBBY0BZKko4otCO6yL","_version":1,"result":"created","_shards":{"total":2,"successful":1,"failed":0},"_seq_no":163,"_primary_term":1}http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&query_key=1&WebEnv=MCID_65a3127b0d9ecb4b5d1ef741&retstart=55&retmax=1&retmode=xml&rettype=abstract
{'id': '38215859', 'title': 'A novel artificial intelligence-assisted "vascular-healing" diagnosis for prediction of future clinical relapse in patients with ulcerative colitis: a prospective cohort study.', 'text': 'Image-enhanced endoscopy (IEE) has attracted attention as a method for detecting inflammation and predicting outcomes in patients with ulcerative colitis (UC); however, the procedure requires specialist endoscopists. Artificial intelligence (AI)-assisted IEE may help non-experts to provide objective accurate predictions using optical imaging. We aimed to develop a novel AI-based system using 8853 images from 167 patients with UC to diagnose "vascular-h

{"_index":"pub_med","_id":"OhwBBY0BZKko4otCVaxk","_version":1,"result":"created","_shards":{"total":2,"successful":1,"failed":0},"_seq_no":169,"_primary_term":1}http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&query_key=1&WebEnv=MCID_65a3127b0d9ecb4b5d1ef741&retstart=61&retmax=1&retmode=xml&rettype=abstract
{'id': '38215708', 'title': 'Perioperative Fluid and Vasopressor Therapy in 2050: From Experimental Medicine to Personalization Through Automation.', 'text': 'Intravenous (IV) fluids and vasopressor agents are key components of hemodynamic management. Since their introduction, their use in the perioperative setting has continued to evolve, and we are now on the brink of automated administration. IV fluid therapy was first described in Scotland during the 1832 cholera epidemic, when pioneers in medicine saved critically ill patients dying from hypovolemic shock. However, widespread use of IV fluids only began in the 20th century. Epinephrine was discovered and purif

{"_index":"pub_med","_id":"PxwBBY0BZKko4otCb6wZ","_version":1,"result":"created","_shards":{"total":2,"successful":1,"failed":0},"_seq_no":174,"_primary_term":1}http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&query_key=1&WebEnv=MCID_65a3127b0d9ecb4b5d1ef741&retstart=66&retmax=1&retmode=xml&rettype=abstract
{'id': '38215555', 'title': 'Artificial intelligence-based prediction models for acute myeloid leukemia using real-life data: A DATAML registry study.', 'text': 'We designed artificial intelligence-based prediction models (AIPM) using 52 diagnostic variables from 3687 patients included in the DATAML registry treated with intensive chemotherapy (IC, N\xa0=\xa03030) or azacitidine (AZA, N\xa0=\xa0657) for an acute myeloid leukemia (AML). A neural network called multilayer perceptron (MLP) achieved a prediction accuracy for overall survival (OS) of 68.5% and 62.1% in the IC and AZA cohorts, respectively. The Boruta algorithm could select the most important variables f

{"_index":"pub_med","_id":"RBwBBY0BZKko4otCh6wN","_version":1,"result":"created","_shards":{"total":2,"successful":1,"failed":0},"_seq_no":179,"_primary_term":1}http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&query_key=1&WebEnv=MCID_65a3127b0d9ecb4b5d1ef741&retstart=71&retmax=1&retmode=xml&rettype=abstract
{'id': '38215452', 'title': 'Evaluating the Accuracy of ChatGPT and Google BARD in Fielding Oculoplastic Patient Queries: A Comparative Study on Artificial versus Human Intelligence.', 'text': "This study evaluates and compares the accuracy of responses from 2 artificial intelligence platforms to patients' oculoplastics-related questions."}
{"_index":"pub_med","_id":"RRwBBY0BZKko4otCi6wv","_version":1,"result":"created","_shards":{"total":2,"successful":1,"failed":0},"_seq_no":180,"_primary_term":1}http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&query_key=1&WebEnv=MCID_65a3127b0d9ecb4b5d1ef741&retstart=72&retmax=1&retmode=xml&rettype=abstract
{'i

{"_index":"pub_med","_id":"SxwBBY0BZKko4otCpqyx","_version":1,"result":"created","_shards":{"total":2,"successful":1,"failed":0},"_seq_no":186,"_primary_term":1}http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&query_key=1&WebEnv=MCID_65a3127b0d9ecb4b5d1ef741&retstart=78&retmax=1&retmode=xml&rettype=abstract
{'id': '38215214', 'title': 'Approximating Projections of Conformational Boltzmann Distributions with AlphaFold2 Predictions: Opportunities and Limitations.', 'text': 'Protein thermodynamics is intimately tied to biological function and can enable processes such as signal transduction, enzyme catalysis, and molecular recognition. The relative free energies of conformations that contribute to these functional equilibria evolved for the physiology of the organism. Despite the importance of these equilibria for understanding biological function and developing treatments for disease, computational and experimental methods capable of quantifying the energetic determinan

{"_index":"pub_med","_id":"TxwBBY0BZKko4otCu6xl","_version":1,"result":"created","_shards":{"total":2,"successful":1,"failed":0},"_seq_no":190,"_primary_term":1}http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&query_key=1&WebEnv=MCID_65a3127b0d9ecb4b5d1ef741&retstart=82&retmax=1&retmode=xml&rettype=abstract
{'id': '38214975', 'title': "Mannose-Integrated Nanoparticle Hitchhike Glucose Transporter 1 Recycling to Overcome Various Barriers of Oral Delivery for Alzheimer's Disease Therapy.", 'text': "A brain-targeting nanodelivery system has been a hot topic and has undergone rapid progression. However, due to various obstacles such as the intestinal epithelial barrier (IEB) and the blood-brain barrier (BBB), few nanocarriers can achieve brain-targeting through oral administration. Herein, an intelligent oral brain-targeting nanoparticle (FTY@Man NP) constructed from a PLGA-PEG skeleton loaded with fingolimod (FTY) and externally modified with mannose was designed in comb

In [30]:
!curl -X DELETE "https://localhost:9200/pub_med" -u admin:admin --insecure -H 'Content-Type: application/json' -d @pubmed_data.json  


{"acknowledged":true}

In [40]:
!curl -X GET "https://localhost:9200/pub_med/_search" -ku admin:admin -H 'Content-Type: application/json' -d '{"_source": false,"query":{"match_all":{}}}'

{"took":641,"timed_out":false,"_shards":{"total":1,"successful":1,"skipped":0,"failed":0},"hits":{"total":{"value":110,"relation":"eq"},"max_score":1.0,"hits":[{"_index":"pub_med","_id":"kBz1BI0BZKko4otChatj","_score":1.0},{"_index":"pub_med","_id":"khz1BI0BZKko4otCiauq","_score":1.0},{"_index":"pub_med","_id":"kxz1BI0BZKko4otCsKuV","_score":1.0},{"_index":"pub_med","_id":"lBz1BI0BZKko4otCtKuF","_score":1.0},{"_index":"pub_med","_id":"lRz1BI0BZKko4otCuKuN","_score":1.0},{"_index":"pub_med","_id":"lhz1BI0BZKko4otCvKtq","_score":1.0},{"_index":"pub_med","_id":"lxz1BI0BZKko4otCwKum","_score":1.0},{"_index":"pub_med","_id":"mBz1BI0BZKko4otCxKs0","_score":1.0},{"_index":"pub_med","_id":"mRz1BI0BZKko4otCx6vo","_score":1.0},{"_index":"pub_med","_id":"mhz1BI0BZKko4otCy6v2","_score":1.0}]}}

In [33]:
!curl -XPUT "https://localhost:9200/pub_med" -H 'Content-Type: application/json' -u admin:admin --insecure

{"acknowledged":true,"shards_acknowledged":true,"index":"pub_med"}

In [41]:
!curl -X GET "https://localhost:9200/pub_med/_search" -ku admin:admin -d '{"query":{"term":{"text": "intelligence"}}}' -H 'Content-Type: application/json' > filtered_pub_med.json

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0100  5950  100  5907  100    43   456k   3403 --:--:-- --:--:-- --:--:--  484k
