In [8]:
import pandas as pd
import importlib
import functions.pubmed_api
importlib.reload(functions.pubmed_api)
from functions.pubmed_api import get_pmcids, translate_pmid_to_pmcid, get_full_xml, extract_article_data

In [9]:
search_term = '(avian influenza[MeSH Terms]) AND (disease outbreak[MeSH Terms])'

pubmed_ids = get_pmcids(search_term, 25)
pubmed_ids

['39535188',
 '28915920',
 '39053575',
 '18455796',
 '16566867',
 '39695812',
 '39126117',
 '37166760',
 '38932174',
 '38543823',
 '37632036',
 '39339001',
 '38413243',
 '37651182',
 '39054655',
 '30135686',
 '34835082',
 '28859267',
 '40145745',
 '38781927',
 '17913533',
 '37853425',
 '32135031',
 '29368637',
 '14751692']

In [10]:
pmc_ids = []

for pmid in pubmed_ids:
    pmc_id = translate_pmid_to_pmcid(pmid)
    pmc_ids.append(pmc_id)

pmc_ids

['PMC11633217',
 'PMC5603032',
 'PMC11485258',
 'PMC7133821',
 'PMC7089356',
 'PMC11658083',
 'PMC11312817',
 'PMC10176828',
 'PMC11209369',
 'PMC10975894',
 'PMC10459121',
 'PMC11434679',
 'PMC10977829',
 'PMC10502594',
 'PMC11285265',
 'PMC6092596',
 'PMC8622263',
 'PMC5583414',
 'PMC11998540',
 'PMC11138997',
 'PMC7110774',
 'PMC10585835',
 'PMC9285678',
 'PMC5784696',
 'PMC7135713']

In [14]:
df_full_texts = pd.DataFrame(columns=['pmid', 'pmcid', 'title', 'abstract', 'full_text', 'authors'])

for pmc_id in pmc_ids:
    df_temp = extract_article_data(get_full_xml(pmc_id))
    df_full_texts = pd.concat([df_full_texts, df_temp], ignore_index=True)

df_full_texts['pmcid'] = pmc_ids

# Drop Articles that contain neither an abstract nor a full text
df_full_texts.dropna(subset=['abstract', 'full_text'], inplace=True, thresh=2)
df_full_texts

Unnamed: 0,pmid,pmcid,title,abstract,full_text,authors
0,39535188,PMC11633217,Avian influenza A (H5N1) virus in dairy cattle...,ABSTRACTSince the emergence of highly pathogen...,INTRODUCTION Influenza viruses are classified ...,https://orcid.org/0000-0002-2878-5714 Mostafa ...
1,28915920,PMC5603032,Current situation of H9N2 subtype avian influe...,"In China, H9N2 subtype avian influenza outbrea...",Introduction Avian influenza (AI) is initially...,"Gu Min gumin@yzu.edu.cn 1 2 3, Xu Lijun xuliju..."
2,39053575,PMC11485258,Spillover of highly pathogenic avian influenza...,The highly pathogenic avian influenza (HPAI) H...,Main The HPAI virus H5Nx goose/Guangdong linea...,http://orcid.org/0000-0003-1643-8560 Caserta L...
5,39695812,PMC11658083,Are we cultivating the perfect storm for a hum...,The emergence of highly pathogenic avian influ...,Introduction Dairy cattle infection with H5N1 ...,https://orcid.org/0000-0002-3769-390X Perez-Ac...
6,39126117,PMC11312817,Avian Influenza Virus A(H5Nx) and Prepandemic ...,Avian influenza virus has been long considered...,1. A Brief History of Influenza Virus A(H 5 N ...,https://orcid.org/0000-0001-8811-195X Focosi D...
8,38932174,PMC11209369,Molecular Markers and Mechanisms of Influenza ...,Influenza A viruses continue to be a serious h...,1. Introduction Influenza viruses are segmente...,"Guo Xinyi 1 †, Zhou Yang 2 †, Yan Huijun 3, An..."
9,38543823,PMC10975894,Insights from Avian Influenza: A Review of Its...,Avian influenza viruses (AIVs) have posed a si...,1. Introduction Influenza is a common illness ...,https://orcid.org/0009-0003-6258-5059 He Jiann...
10,37632036,PMC10459121,Multiple Vaccines and Strategies for Pandemic ...,Avian influenza viruses (AIV) are a continuous...,1. Introduction Avian influenza virus (AIV) ca...,"Xu Hai 1 2, Zhu Shanyuan 1 *, Govinden Roshini..."
11,39339001,PMC11434679,Review of the Highly Pathogenic Avian Influenz...,Highly pathogenic avian influenza (HPAI) is a ...,1. Introduction Avian influenza (AI) is a high...,https://orcid.org/0000-0002-5575-6214 Vagnozzi...
13,37651182,PMC10502594,Reported Global Avian Influenza Detections Amo...,BackgroundAvian influenza (AI) virus detection...,Introduction Avian influenza (AI) viruses are ...,"Szablewski Christine M MPH, DVM https://orcid...."


In [15]:
df_full_texts.to_csv('full_text.tsv', sep='\t', mode='w')