# Example Code to test the Pub_worm API

In [None]:
import sys
import inspect
import pandas as pd
from openpyxl import Workbook
from openpyxl.styles import PatternFill
from openpyxl.utils import get_column_letter
import os
import json

# Add pub_worm directory to the Python path
sys.path.append("/Users/dan/Code/Python/pub_worm")

from pub_worm.ncbi.entreze_api import EntrezAPI

# Find where EntrezAPI is being load from
module = inspect.getmodule(EntrezAPI)
if hasattr(module, "__file__"):
    file_path = module.__file__
    print("EntrezAPI imported from:", file_path)
else:
    print("Could not determine the file path.")

In [None]:
ncbi_api = EntrezAPI()

search_term = "Marnett L[au] AND (2019/01/01:2024/04/16[pdat])AND (vanderbilt[affil])"
esearch_params = {'term': search_term }
esearch_result = ncbi_api.entreze_esearch(esearch_params)

elink_result = ncbi_api.entreze_elink_pmid_to_pmcid(esearch_result)

params= {'db': 'pmc'}
epost_result = ncbi_api.entreze_epost(elink_result, params)

efetch_result = ncbi_api.entreze_efetch(epost_result)


efetch_result_df = pd.DataFrame(efetch_result)
for index, row in efetch_result_df.iterrows():
    pmcid = row['pmcid']
    body = row['body']
    filename = f"output/{pmcid}.txt"
    
    # Write body content to file
    with open(filename, 'w') as file:
        file.write(body)

efetch_result_df = efetch_result_df.drop('body', axis=1)
efetch_result_df.to_csv("output/pmc_articles.csv",index=False)
efetch_result_df

In [None]:
# Example call to ePost and eSummary
data = ["10021351", "10022905", "10022914", "10022975", "10048487", "10048958", "10049162", "10049362", "10049567", "10049576", "10051671", "10051850", "10064800", "10066248"]

ncbi_api = EntrezAPI()
entreze_epost_result = ncbi_api.entreze_epost(data)

if 'WebEnv' in entreze_epost_result:
    entreze_summaries_result = ncbi_api.entreze_pmid_summaries(entreze_epost_result)
    pretty_data = json.dumps(entreze_summaries_result, indent=4)
    print(pretty_data)

In [None]:
# Emple code to Get full Articles from entrez/eutils
import requests
import time

def fetch_full_article(pmid):
    base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi"
    params = {
        "db": "pmc",
        "id": pmid,
        "retmode": "xml"
    }

    response = requests.get(base_url, params=params)
    time.sleep(2)

    if response.status_code == 200:
        return response.text
    else:
        print(f"Failed to fetch article for PMID {pmid}. Status code: {response.status_code}")
        return None

# Example usage
pmids = ["8253488"]  # Replace with the PMID of the article you want to fetch
#pmids = ["10021351", "10022905", "10022914", "10022975", "10048487", "10048958", "10049162", "10049362", "10049567", "10049576", "10051671", "10051850", "10064800", "10066248"]
for pmid in pmids:
    article_xml = fetch_full_article(pmid)
    if article_xml:
        with open(f"./output/article_{pmid}.xml", "w") as file:
            file.write(article_xml)


In [None]:
import os
env_vars = os.environ

# Print each environment variable
for key, value in env_vars.items():
    print(f"{key}: {value}")

In [None]:
import requests
import json
from urllib.parse import urlencode
import xml.etree.ElementTree as ET

# Step 1: ESearch
url_esearch = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?"
params_esearch = {
    "db": "pubmed",
    "term": "Marnett L[au] AND (2019/01/01:2024/04/16[pdat]) AND (vanderbilt[affil])",
    "usehistory": "y",
    "retmode": "json"
}
response_esearch = requests.get(url_esearch + urlencode(params_esearch))
data_esearch = response_esearch.json()
webenv = data_esearch["esearchresult"]["webenv"]
query_key = data_esearch["esearchresult"]["querykey"]

# Step 2: ELink
url_elink = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/elink.fcgi?"
params_elink = {
    "dbfrom": "pubmed",
    "linkname": "pubmed_pmc",
    "query_key": query_key,
    "WebEnv": webenv,
    "retmode": "xml"
}
response_elink = requests.get(url_elink + urlencode(params_elink))
#data_elink = response_elink.json()
data_elink = response_elink.text

print(data_elink)

# # Extracting PMC IDs
# linksets = data_elink.get("linksets", [])
# pmc_ids = []
# for linkset in linksets:
#     links = linkset.get("linksetdbs", [])
#     for link in links:
#         ids = link.get("links", [])
#         for id in ids:
#             pmc_ids.append(id["id"])

# print("PMC IDs:")
# for pmc_id in pmc_ids:
#     print(pmc_id)
