In [1]:
from Bio import Entrez
from Bio import Medline
from datetime import datetime
from dateutil import parser
import pandas as pd

In [2]:
searches = ['"Laird AR"[AUTH] AND ("2012/01/01"[PDAT] : "3000/12/31"[PDAT])',
            '"Sutherland MT"[AUTH] AND ("2012/01/01"[PDAT] : "3000/12/31"[PDAT])']

# Extract all publications matching term.
Entrez.email = 'tsalo006@fiu.edu'

In [3]:
rows = []

for TERM in searches:
    h = Entrez.esearch(db='pubmed', retmax='2', term=TERM)
    result = Entrez.read(h)
    print('Total number of publications containing {0}: {1}'.format(TERM, result['Count']))
    h_all = Entrez.esearch(db='pubmed', term=TERM, retmax=result['Count'])
    result_all = Entrez.read(h_all)
    ids_all = result_all['IdList']
    h = Entrez.efetch(db='pubmed', id=ids_all, rettype='medline', retmode='text')
    records = Medline.parse(h)

    acceptable_formats = ['journal article', 'comparative study', 'editorial',
                          'introductory journal article']
    for record in records:
        if any([type_.lower() in acceptable_formats for type_ in record.get('PT')]):
            pmid = record.get('PMID')
            pmcid = record.get('PMC', '')
            
            doi = [aid for aid in record.get('AID', []) if aid.endswith(' [doi]')]
            if doi:
                doi = doi[0].replace(' [doi]', '')
            else:
                doi = ''
            
            title = record.get('TI')
            authors = record.get('AU')

            pub_date = parser.parse(record.get('DP'))
            year = pub_date.year
            month = pub_date.month
            day = pub_date.day
            
            journal = record.get('TA')
            volume = record.get('VI', '')
            issue = record.get('IP', '')
            pages = record.get('PG', '')
            
            abstract = record.get('AB', '')
            
            row = [pmid, pmcid, doi, title, authors, year, month,
                   day, journal, volume, issue, pages, abstract]
            rows += [row]

df = pd.DataFrame(columns=['pmid', 'pmcid', 'doi', 'title', 'authors',
                           'year', 'month', 'day',
                           'journal', 'volume', 'issue', 'pages',
                           'abstract'],
                  data=rows)
df.to_csv('articles.csv', index=False)

Total number of publications containing "Laird AR"[AUTH] AND ("2012/01/01"[PDAT] : "3000/12/31"[PDAT]): 85
Total number of publications containing "Sutherland MT"[AUTH] AND ("2012/01/01"[PDAT] : "3000/12/31"[PDAT]): 13


In [4]:
df.head()

Unnamed: 0,pmid,pmcid,doi,title,authors,year,month,day,journal,volume,issue,pages,abstract
0,29180258,,10.1016/j.neubiorev.2017.11.012,Ten simple rules for neuroimaging meta-analysis.,"[Muller VI, Cieslik EC, Laird AR, Fox PT, Radu...",2017,11,24,Neurosci Biobehav Rev,,,,Neuroimaging has evolved into a widely used me...
1,29030105,,10.1016/j.neuroimage.2017.10.020,Definition and characterization of an extended...,"[Camilleri JA, Muller VI, Fox P, Laird AR, Hof...",2017,10,13,Neuroimage,165.0,,138-147,Neuroimaging evidence suggests that executive ...
2,28521007,,10.1093/cercor/bhx121,Computing the Social Brain Connectome Across S...,"[Alcala-Lopez D, Smallwood J, Jefferies E, Van...",2017,5,18,Cereb Cortex,,,1-26,Social skills probably emerge from the interac...
3,28467917,,10.1016/j.cortex.2017.03.016,Resting-state functional reorganization in Par...,"[Tahmasian M, Eickhoff SB, Giehl K, Schwartz F...",2017,7,1,Cortex,92.0,,119-138,Parkinson's disease (PD) is a common progressi...
4,28222386,PMC5408583,10.1016/j.neuroimage.2016.12.037,Heterogeneous fractionation profiles of meta-a...,"[Laird AR, Riedel MC, Okoe M, Jianu R, Ray KL,...",2017,4,1,Neuroimage,149.0,,424-435,Computational cognitive neuroimaging approache...
