### Import

In [77]:
import pandas as pd
import datetime
import time
import requests
import os
import json

In [3]:
from pymed import PubMed

### Settings

In [79]:
authors_list = [
    'Danilov Gleb',
    'Shifrin Michael',
    'Potapov Alexander',
    'Shimansky Vadim'
]

tool = 'my_tool1'
email = 'tishankulov@nsi.ru'
max_results = 500

### Functions

In [119]:
def get_items(fullname):
    items = pd.DataFrame(columns=['fullname', 'pubmed_id', 'title', 'abstract', 'keywords', 'journal', 'publication_date',
                                  'authors', 'affiliations', 'methods', 'conclusions', 'results', 'copyrights', 'doi'])
    
    lastname = fullname.split()[0]
    firstname = fullname.split()[1]
    query = '{0}, {1}[Author]'.format(lastname, firstname)
    
    pubmed = PubMed(tool=tool, email=email)
    results = pubmed.query(query, max_results=max_results)
    
    while True:
        try:
            result = next(results)
            result_dict = result.toDict()
            result_dict['fullname'] = fullname
            result_dict['affiliations'] = [author['affiliation'] for author in result_dict['authors']]
            result_dict['affiliations'] = '; '.join(result_dict['affiliations'])
            result_dict['authors'] = [' '.join([author['lastname'], author['firstname']]) for author in result_dict['authors']]
            result_dict['authors'] = '; '.join(result_dict['authors'])
            result_dict['keywords'] = '; '.join(result_dict['keywords'])
            result_dict['pubmed_id'] = result_dict['pubmed_id'].replace('\n', '; ')
            del result_dict['xml']
            items = items.append(result_dict, ignore_index=True)
        except StopIteration:
            break
    
    return items

### Main

In [120]:
items = get_items(authors_list[0])
items.head()

Unnamed: 0,fullname,pubmed_id,title,abstract,keywords,journal,publication_date,authors,affiliations,methods,conclusions,results,copyrights,doi
0,Danilov Gleb,31349300,An Information Extraction Algorithm for Detect...,"Rich-in-morphology language, such as Russian, ...",Adverse Events; Electronic Health Records; Nat...,Studies in health technology and informatics,2019-07-28,Danilov Gleb; Shifrin Michael; Strunina Uliya;...,National Medical Research Center for Neurosurg...,,,,,10.3233/SHTI190051
1,Danilov Gleb,31121374,A Single-Center Retrospective Descriptive Coho...,Cerebrospinal fluid (CSF) leak remains a signi...,Cerebrospinal fluid leakage; Children; Craniot...,World neurosurgery,2019-05-24,Kushel Yury; Danilov Gleb; Tekoev Aslan; Cheld...,"2nd Neurosurgical Department, Burdenko Neurosu...",,,Postoperative wound CSF leakage was observed i...,Copyright © 2019 Elsevier Inc. All rights rese...,10.1016/j.wneu.2019.05.091
2,Danilov Gleb,30942728,Prediction of Postoperative Hospital Stay with...,Electronic Health Records (EHRs) conceal a hid...,Deep Learning; Electronic Health Records; Neur...,Studies in health technology and informatics,2019-04-04,Danilov Gleb; Kotik Konstantin; Shifrin Michae...,National Medical Research Center for Neurosurg...,,,,,
3,Danilov Gleb,30468924,Pineal Cyst-Related Aqueductal Stenosis as Cau...,Pineal cysts (PCs) are histologically benign l...,Cerebral aqueduct; Headache; Magnetic resonanc...,World neurosurgery,2018-11-24,Pitskhelauri David I; Konovalov Alexander N; A...,"Department of Neurooncology, Burdenko National...",,The results indicate a statistically significa...,In 25 patients (82%) we observed clinical impr...,Copyright © 2018 Elsevier Inc. All rights rese...,10.1016/j.wneu.2018.11.096
4,Danilov Gleb,30096505,Great Hospitals of the Russian Federation: Nat...,In 1929 the surgeon N. N. Burdenko and the neu...,Education; N. N. Burdenko Neurosurgery Center;...,World neurosurgery,2018-08-11,Potapov Alexander; Likhterman Leonid; Danilov ...,Federal State Autonomous Institution N. N. Bur...,,,,Copyright © 2018 The Authors. Published by Els...,10.1016/j.wneu.2018.07.280


In [121]:
items.to_excel('output/pubmed-pymed.xlsx')

### Debug

In [104]:
pubmed = PubMed(tool=tool, email=email)
results = pubmed.query("Danilov, Gleb[Author]", max_results=500)

In [106]:
type(results)

itertools.chain

In [117]:
test = next(results)

In [55]:
test.toDict().keys()

dict_keys(['pubmed_id', 'title', 'abstract', 'keywords', 'journal', 'publication_date', 'authors', 'methods', 'conclusions', 'results', 'copyrights', 'doi', 'xml'])

In [86]:
test.title

'A Single-Center Retrospective Descriptive Cohort Study of 211 Pediatric Patients: Cerebrospinal Fluid Leakage After Fourth Ventricle Tumor Resection.'