In [4]:
import pandas as pd
import importlib
import functions.pubmed_api
importlib.reload(functions.pubmed_api)
from functions.pubmed_api import get_pmids, translate_pmid_to_pmcid, get_full_xml, extract_article_data

In [5]:
search_term = '(avian influenza[MeSH Terms]) AND (disease outbreak[MeSH Terms])'
# https://www.ncbi.nlm.nih.gov/mesh?term=Disease+Outbreaks

# TODO: Other diseases such as ehec and q-fever

pubmed_ids = get_pmids(search_term, 3)
pubmed_ids

['28915920', '39535188', '40145745']

In [6]:
pmc_ids = []

for pmid in pubmed_ids:
    pmc_id = translate_pmid_to_pmcid(pmid)
    pmc_ids.append(pmc_id)

pmc_ids

['PMC5603032', 'PMC11633217', 'PMC11998540']

In [7]:
df_full_texts = pd.DataFrame(columns=['pmid', 'doi', 'title', 'abstract', 'full_text', 'authors'])

for pmc_id in pmc_ids:
    df_temp = extract_article_data(get_full_xml(pmc_id))
    df_full_texts = pd.concat([df_full_texts, df_temp], ignore_index=True)

# df_full_texts['pmcid'] = pmc_ids

# Drop Articles that contain neither an abstract nor a full text
# df_full_texts.dropna(subset=['abstract', 'full_text'], inplace=True, thresh=2, ignore_index=True)
df_full_texts

Unnamed: 0,pmid,doi,title,abstract,full_text,authors
0,28915920,10.1186/s13567-017-0453-2,Current situation of H9N2 subtype avian influe...,"In China, H9N2 subtype avian influenza outbrea...",Introduction Avian influenza (AI) is initially...,"Gu Min gumin@yzu.edu.cn 1 2 3, Xu Lijun xuliju..."
1,39535188,10.1128/mbio.02542-24,Avian influenza A (H5N1) virus in dairy cattle...,ABSTRACTSince the emergence of highly pathogen...,INTRODUCTION Influenza viruses are classified ...,https://orcid.org/0000-0002-2878-5714 Mostafa ...
2,40145745,10.1128/jvi.02209-24,Highly pathogenic avian influenza H5N1: histor...,ABSTRACTThe H5N1 avian panzootic has resulted ...,INTRODUCTION Influenza virus pandemics are cau...,https://orcid.org/0000-0003-4121-776X Krammer ...


In [8]:
df_full_texts.to_csv('full_text.tsv', sep='\t', mode='w')