Pubmed Query Builder: https://pubmed.ncbi.nlm.nih.gov/advanced/

### Notes
* Dates stored in __repr___ format in JSON
* 000 = Code for unobtainable PMUID

### MeSH Terms
* Biomedical Engineering 
* Biomedical Technology
* Equipment Safety
* Equipment Design
* Prostheses and Implants

### Querys
(("Equipment and Supplies"[MeSH Terms] OR "Equipment Design"[MeSH Terms] OR "Equipment Safety"[MeSH Terms] OR "Biomedical Technology"[MeSH Terms] OR "Biomedical Engineering"[MeSH Terms]) AND 1850/01/01:1997/12/31[Date - Publication]) AND (English[Language])

(((medical device[Title/Abstract]) OR (medical devices[Title/Abstract])) AND (english[Language])) AND (("1850"[Date - Publication] : "1997"[Date - Publication]))

In [1]:
import pandas as pd
import json
import datetime
from pymed import PubMed

# Retrieve Data

In [8]:
# Uses Pymed to get results for the desired query
def search(query_string, pubmed_module, max_results=2000):
    results = pubmed_module.query(query_string, max_results=max_results)
    return results

In [4]:
# Gets relevant data from Pymed iterable and creates a dictionary
def pubmed_todict(pubmed_iterable):
    new_dict = {"Result Number": [], "Pubmed ID": [], "Title": [], "Journal": [], "Authors": [], "Date": []}
    
    for i, article in enumerate(pubmed_iterable):
        new_dict["Result Number"].append(i)
        if "\n" in article.pubmed_id:  # Deals with parsing issue for article PMUID
            new_dict["Pubmed ID"].append("000")
        else:
            new_dict["Pubmed ID"].append(article.pubmed_id)   
        new_dict["Title"].append(article.title)
        try:  # Deals with book (rather than articles) in the database
            new_dict["Journal"].append(article.journal)
        except AttributeError:
            new_dict["Journal"].append("Book") 
        new_dict["Authors"].append(article.authors)
        new_dict["Date"].append(article.publication_date)
        
    return new_dict

In [5]:
# Avoids serialization error with datetime in JSON dump
# See https://stackoverflow.com/questions/54557568/typeerror-object-of-type-date-is-not-json-serializable
def datetime_converter(object):
    if isinstance(object, datetime.datetime):
        return object.__repr__()

In [10]:
def search_and_dump(query_string, max_results, file_name):
    pubmed = PubMed(tool="Medical Device Author Network Analysis", email="bowrey@umd.edu")
    results = search(query_string=query_string, pubmed_module=pubmed, max_results=max_results)
    new_dict = pubmed_todict(pubmed_iterable=results)
    with open(file_name, "w") as f:
        # Default argument takes function that is called when JSON ecounters object it can't convert
        json.dump(new_dict, f, indent=4, default=datetime_converter)
        print("Search Completed. File dumped to JSON")

In [11]:
# Searches for all items with "medical device(s)" in the title or abstract
search_and_dump(query_string='''(((medical device[Title/Abstract]) OR (medical devices[Title/Abstract])) AND (english[Language])) 
                                AND (("1850"[Date - Publication] : "1997"[Date - Publication]))''',
               max_results=2000, file_name="device_pubmed_data.json")

Search Completed. File dumped to JSON


# Network Anaylsis

In [29]:
# Returns a clean list of lists of authors on each paper
def clean_author_data(file_path):
    with open(file_path) as f:
        data = json.load(f)["Authors"]
    
    clean_author_list = []
    for author_list in data:
        temp_list = []
        for author in author_list:
            try:
                new_dict = {"name": "{} {}".format(author["lastname"], author["initials"]),
                        "affiliation": author["affiliation"]}
            except KeyError:
                new_dict = {"name": "{} {}".format(author["lastname"], author["initials"]),
                            "affiliation": None}
            temp_list.append(new_dict)
        clean_author_list.append(temp_list)
   
    return clean_author_list

In [33]:
author_list_by_paper = clean_author_data(file_path="device_pubmed_data.json")

In [34]:
author_list_by_paper[1]

[{'name': 'Bhardwaj RS', 'affiliation': 'IZKF-BIOMAT, Aachen, Germany.'},
 {'name': 'Henze U', 'affiliation': None},
 {'name': 'Klein B', 'affiliation': None},
 {'name': 'Zwadlo-Klarwasser G', 'affiliation': None},
 {'name': 'Klinge U', 'affiliation': None},
 {'name': 'Mittermayer C', 'affiliation': None},
 {'name': 'Klosterhalfen B', 'affiliation': None}]

In [28]:
print(test_dict["2"])

None
