In [6]:
import pandas as pd
import requests
from tqdm import tqdm

df = pd.read_csv("PMC-Patients.csv")
df = df.set_index('patient_id')

def fetch_article_info(pmid):
    url = f"https://www.ebi.ac.uk/europepmc/webservices/rest/search?query=EXT_ID:{pmid}&resultType=core&format=json"
    response = requests.get(url)
    
    if response.status_code == 200:
        data = response.json()
        if "resultList" in data and data["resultList"]["result"]:
            result = data["resultList"]["result"][0]
            is_open_access = result.get("isOpenAccess", "N") == "Y"
            abstract_text = result.get("abstractText", None)
            return is_open_access, abstract_text
    return False, None

valid_rows = []
limit = 100

with tqdm(total=min(len(df), limit if limit > 0 else len(df)), desc="Processing PMIDs") as pbar:
    for i, (patient_id, row) in enumerate(df.iterrows()):
        if limit > 0 and i >= limit:
            break
        pmid = row['PMID']
        is_open, abstract_text = fetch_article_info(pmid)
        if is_open:
            row['abstract_text'] = abstract_text
            valid_rows.append(row)
        pbar.update(1)

filtered_df = pd.DataFrame(valid_rows)

filtered_df.reset_index(inplace=True)


Processing PMIDs: 100%|██████████| 100/100 [00:15<00:00,  6.49it/s]


Check the Dataframe

In [11]:
filtered_df

'Coordination of care for patients with neuro-ophthalmic disorders can be very challenging in the community emergency department (ED) setting. Unlike university- or tertiary hospital-based EDs, the general ophthalmologist is often not as familiar with neuro-ophthalmology and the examination of neuro-ophthalmology patients in the acute ED setting. Embracing image capturing of the fundus, using a non-mydriatic camera, may be a game-changer for communication between ED physicians, ophthalmologists, and tele-neurologists. Patient care decisions can now be made with photographic documentation that is then conveyed through HIPAA-compliant messaging with accurate and useful information with both ease and convenience. Likewise, external photos of the anterior segment and motility are also helpful. Finally, establishing clinical and imaging guidelines for common neuro-ophthalmic disorders can help facilitate complete and appropriate evaluation and treatment.'

Run this to save the dataframe as csv file

Naming convention for the output file:\
oa: Open Access\
num_rows: The number of rows which are included in this dataset

In [None]:
num_rows = len(filtered_df)
output_csv_file = f"PMC-Patients-oa-with-abstract-{num_rows}.csv"
filtered_df.to_csv(output_csv_file, index=False)