# First title

In [63]:
import pandas as pd
from Bio import Entrez

In [64]:
def search(query):
    Entrez.email = 'corentin.gosling@gmail.com'
    handle = Entrez.esearch(db='pubmed',
                            sort='date',
                            retmax='250000',
                            retmode='xml',
                            term=query)
    results = Entrez.read(handle)
    return results

def fetch_details(id_list):
    ids = ','.join(id_list)
    Entrez.email = 'corentin.gosling@gmail.com'
    handle = Entrez.efetch(db='pubmed',
                           retmode='xml',
                           id=ids)
    results = Entrez.read(handle)
    return results

def extract_doi(list_id):
   res = list('no doi')
   for i in range(0, len(list_id)):
      if "10." in list_id[i]:
         res = list_id[i]
   return res

def extract_email(list_id):
   res = list('no contact author')
   for i in range(0, len(list_id)):
      if "@" in list_id[i]['AffiliationInfo'][0]['Affiliation']:  # and "Electronic" in list_id[i]['AffiliationInfo'][0]['Affiliation']:
         res = list_id[i]['AffiliationInfo'][0]['Affiliation']
   return res


In [65]:
chunk_size = 10000  

studies = search('(autism[tiab] OR ASD[tiab]) AND (intervention*[tiab]) AND (meta-analysis[pt])')
studiesIdList = studies['IdList']


In [66]:
title_list= []
doi_list= []
pmid_list= []
author1_list= []
authorN_list= []
author_cor_list= []
email_list= []
abstract_list=[]
journal_list = []
pubdate_year_list = []

studies = fetch_details(studiesIdList)


In [None]:
for chunk_i in range(0, len(studiesIdList), chunk_size):
  print(chunk_i)
  chunk = studiesIdList[chunk_i:chunk_i + chunk_size]
  papers = fetch_details(chunk)
  for i, paper in enumerate (papers['PubmedArticle']):
    # i = 0
    # print(i)
    # paper = papers['PubmedArticle'][0]
    title_list.append(paper['MedlineCitation']['Article']['ArticleTitle'])
    pmid_list.append(paper['MedlineCitation']['PMID'])
    doi_list.append(extract_doi(paper['PubmedData']['ArticleIdList']))
    journal_list.append(paper['MedlineCitation']['Article']['Journal']['Title'])

    try:
     paper['MedlineCitation']['Article']['AuthorList'][0]['AffiliationInfo'][0]['Affiliation']
     author1_list.append(paper['MedlineCitation']['Article']['AuthorList'][0]['ForeName'] + ' ' + paper['MedlineCitation']['Article']['AuthorList'][0]['LastName'] + ' [' + paper['MedlineCitation']['Article']['AuthorList'][0]['AffiliationInfo'][0]['Affiliation'] + ']')
    except:
     author1_list.append("No 1st author recorded")

    last = len(paper['MedlineCitation']['Article']['AuthorList']) - 1
    try:
     paper['MedlineCitation']['Article']['AuthorList'][last]['AffiliationInfo'][0]['Affiliation']
     authorN_list.append(paper['MedlineCitation']['Article']['AuthorList'][last]['ForeName'] + ' ' + paper['MedlineCitation']['Article']['AuthorList'][last]['LastName'] + ' [' + paper['MedlineCitation']['Article']['AuthorList'][last]['AffiliationInfo'][0]['Affiliation'] + ']')
    except:
     authorN_list.append("No last author recorded")
    
    try:
     author_cor_list.append(paper['MedlineCitation']['Article']['AuthorList'])
    except:
     author_cor_list.append("no")
    try:
     email_list.append(extract_email(paper['MedlineCitation']['Article']['AuthorList']))
    except:
     email_list.append("no")


    try:
      abstract_list.append(' '.join(list(map(str, paper['MedlineCitation']['Article']['Abstract']['AbstractText']))))
    except:
      abstract_list.append('No Abstract')

    try:
      pubdate_year_list.append(paper['MedlineCitation']['Article']['Journal']['JournalIssue']['PubDate']['Year'])
    except:
      pubdate_year_list.append('No Data')

In [68]:
df = pd.DataFrame(list(zip(
    title_list, doi_list, pmid_list, author1_list, authorN_list, abstract_list, journal_list, pubdate_year_list #author_cor_list, email_list, 
    )), 
    columns=[
             'Title', 'doi', 'PMID', 'First author', 'Last author', 'Abstract', 'Journal', 'Year' #'Corresponding author', 'email', 
             ])
df.shape
 
df.to_excel('scrapped.xlsx')
df.head(10)


Unnamed: 0,Title,doi,PMID,First author,Last author,Corresponding author,email,Abstract,Journal,Year
0,Comprehensive ABA-based interventions in the t...,10.1186/s12888-022-04412-1,36864429,"Theresa Eckes [Institute of Psychology, Univer...","Anne Möllmann [Institute of Psychology, Univer...","[{'AffiliationInfo': [{'Identifier': [], 'Affi...","Institute of Psychology, University of Münster...",Many studies display promising results for int...,BMC psychiatry,2023
1,Effects of Equine-Assisted Activities and Ther...,10.3390/ijerph20032630,36767996,"Ningkun Xiao [Department of Psychology, Instit...",Jingjing Qi [Engineering School of Information...,"[{'AffiliationInfo': [{'Identifier': [], 'Affi...","[n, o, , c, o, n, t, a, c, t, , a, u, t, h, ...",Autism spectrum disorder (ASD) has become a cr...,International journal of environmental researc...,2023
2,Melatonin for sleep disorders in people with a...,10.1016/j.pnpbp.2022.110695,36584862,Hellen Araujo Nogueira [Center for Biological ...,Marcos Pereira [Institute of Collective Health...,"[{'AffiliationInfo': [{'Identifier': [], 'Affi...","Institute of Collective Health, Federal Univer...",Melatonin is a potential therapeutic intervent...,Progress in neuro-psychopharmacology & biologi...,2023
3,In Search of Biomarkers to Guide Interventions...,10.1176/appi.ajp.21100992,36475375,Mara Parellada [Department of Child and Adoles...,Stephan J Sanders [Department of Child and Ado...,"[{'AffiliationInfo': [{'Identifier': [], 'Affi...","[n, o, , c, o, n, t, a, c, t, , a, u, t, h, ...",The aim of this study was to catalog and evalu...,The American journal of psychiatry,2023
4,Auditory Pitch Perception in Autism Spectrum D...,10.1044/2022_JSLHR-22-00254,36450443,"Yu Chen [Speech-Language-Hearing Center, Schoo...",Yang Zhang [Department of Speech-Language-Hear...,"[{'AffiliationInfo': [{'Identifier': [], 'Affi...","[n, o, , c, o, n, t, a, c, t, , a, u, t, h, ...",Pitch plays an important role in auditory perc...,"Journal of speech, language, and hearing resea...",2022
5,Effects of Therapeutic Horseback-Riding Progra...,10.3390/ijerph192114449,36361327,"Shihui Chen [Department of Kinesiology, Texas ...",Xiaolei Liu [Chinese Traditional Regimen Exerc...,"[{'AffiliationInfo': [{'Identifier': [], 'Affi...","[n, o, , c, o, n, t, a, c, t, , a, u, t, h, ...",Animal-assisted therapy has become a fast-grow...,International journal of environmental researc...,2022
6,Psychological interventions targeting mental h...,10.1111/dmcn.15432,36208472,Kavindri Kulasinghe [Queensland Cerebral Palsy...,Roslyn N Boyd [Queensland Cerebral Palsy and R...,"[{'AffiliationInfo': [{'Identifier': [], 'Affi...","[n, o, , c, o, n, t, a, c, t, , a, u, t, h, ...",To investigate the efficacy of psychological i...,Developmental medicine and child neurology,2023
7,Examining the relationship between cognitive i...,10.1002/aur.2826,36196666,"Jiedi Lei [Institute of Psychiatry, Psychology...","Matthew J Hollocks [Institute of Psychiatry, P...","[{'AffiliationInfo': [{'Identifier': [], 'Affi...","[n, o, , c, o, n, t, a, c, t, , a, u, t, h, ...","Compared to neurotypical peers, autistic adole...",Autism research : official journal of the Inte...,2022
8,Clinical risk factors associated with the deve...,10.1016/j.spinee.2022.08.011,36031098,Samuel E Broida [Department of Orthopaedic Sur...,no,"[{'AffiliationInfo': [{'Identifier': [], 'Affi...",no,Cervical fusion for degenerative disorders car...,The spine journal : official journal of the No...,2023
9,Intensive behavioural interventions based on a...,10.1371/journal.pone.0270833,35972929,Robert Hodgson [Centre for Reviews and Dissemi...,Ann Le Couteur [Population Health Sciences Ins...,"[{'AffiliationInfo': [{'Identifier': [], 'Affi...","[n, o, , c, o, n, t, a, c, t, , a, u, t, h, ...",The economic and social costs of autism are si...,PloS one,2022
