In [28]:
import requests
import json
import xmltodict
import urllib.parse
import opencitingpy
import datetime

# PubMed
## Teil 1
### Query mit Stichworten --> Liste der relevanten Dokumente
queryPubMed: Stichwortartige Suche <br>
idList: Liste mit PMIDs <br>
countResult: Anzahl (maximal) zurückgegebener Dokumente <br>

### URL 
db: welche Datenbank wird durchsucht (pubmed) <br>
term: queryPubMed <br>
RetMax: countResult <br>
sort: Art der Sortierung (default: relevance) <br>
retmod: Format der zurückgelieferten Daten <br>



In [18]:
## Liste der "relevanten" Dokumente
queryPubMed = "head injury"
countResult = 10
responsePubMedID = requests.get('https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=pubmed&term='+urllib.parse.quote(queryPubMed)+'&RetMax='+urllib.parse.quote(str(countResult))+'&sort=relevance&retmode=json')
responsePubMedID_json = json.loads(responsePubMedID.text)
idList = responsePubMedID_json["esearchresult"]["idlist"]
print(idList)

['25335757', '36637470', '22942365', '9279162', '9425562', '9339463', '3330962', '3931543', '27509653', '23431655']


## Teil 2
### Query mit PMID aus Teil 1 --> Liste der MeSH zu dem jeweiligen Dokument
Für jedes Dokument (Identifier: PMID) wird eine Detailsuche durchgeführt. <br>
Extrahierte Infos: <br>
Titel <br>
Liste der MeSH: [[PMID, [MeSH UI, MeSH Name], [MeSH UI, MeSH Name]] ,[PMID, [MeSH UI, MeSH Name], [MeSH UI, MeSH Name]] ,  ...] (beliebig viele MeSH pro Dokument)<br>
Datum Veröffentlichung: Wann wurde das Dokument veröffentlicht <br>

In [67]:
## Liste der MeSHs (zu allen Dokumenten)
meshList = []
for id in idList:
    # print(id)
    # temporäre Liste zum Anhängen an meshList
    meshList_temp = [id]
    # Abfrage zu einer einzelnen PMID
    response_PubMedMesh = requests.get("https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&id="+urllib.parse.quote(str(id)))
    # Formatierung
    xpars = xmltodict.parse(response_PubMedMesh.text)
    json_str = json.dumps(xpars, indent=4)
    json_data = json.loads(json_str)
    title = json_data["PubmedArticleSet"]["PubmedArticle"]["MedlineCitation"]["Article"]["ArticleTitle"]
    # Nicht alle Dokumente haben ein eigenes Veröffentlichungsdatum, dann wird sich auf das DateCompleted von PubMed berufen (TODO: recherchieren, was das genau ist)
    try:
        pubDateAll = json_data["PubmedArticleSet"]["PubmedArticle"]["MedlineCitation"]["Article"]["ArticleDate"]
        pubDate = datetime.datetime(int(pubDateAll["Year"]), int(pubDateAll["Month"]), int(pubDateAll["Day"]))
    except:
        pubDateAll = json_data["PubmedArticleSet"]["PubmedArticle"]["MedlineCitation"]["DateCompleted"]
        pubDate = datetime.datetime(int(pubDateAll["Year"]), int(pubDateAll["Month"]), int(pubDateAll["Day"]))
    meshAll = json_data["PubmedArticleSet"]["PubmedArticle"]["MedlineCitation"]["MeshHeadingList"]["MeshHeading"]
    for mesh in meshAll:
        meshUI = mesh["DescriptorName"]["@UI"]
        meshName = mesh["DescriptorName"]["#text"]
        meshList_temp.append([meshUI, meshName])
    meshList.append(meshList_temp)

# MeSH

In [4]:
## MeSH
response = requests.get('https://id.nlm.nih.gov/mesh/D001241.json')
data = json.loads(response.text)
print(response)

<Response [200]>


In [5]:
# print(response.text)

In [6]:
print(data["dateCreated"])

1999-01-01


In [7]:
print(data)

{'@id': 'http://id.nlm.nih.gov/mesh/D001241', '@type': 'http://id.nlm.nih.gov/mesh/vocab#TopicalDescriptor', 'http://id.nlm.nih.gov/mesh/vocab#active': True, 'allowableQualifier': ['http://id.nlm.nih.gov/mesh/Q000652', 'http://id.nlm.nih.gov/mesh/Q000138', 'http://id.nlm.nih.gov/mesh/Q000528', 'http://id.nlm.nih.gov/mesh/Q000008', 'http://id.nlm.nih.gov/mesh/Q000266', 'http://id.nlm.nih.gov/mesh/Q000493', 'http://id.nlm.nih.gov/mesh/Q000037', 'http://id.nlm.nih.gov/mesh/Q000032', 'http://id.nlm.nih.gov/mesh/Q000592', 'http://id.nlm.nih.gov/mesh/Q000302', 'http://id.nlm.nih.gov/mesh/Q000737', 'http://id.nlm.nih.gov/mesh/Q000627', 'http://id.nlm.nih.gov/mesh/Q000600', 'http://id.nlm.nih.gov/mesh/Q000097', 'http://id.nlm.nih.gov/mesh/Q000009', 'http://id.nlm.nih.gov/mesh/Q000633', 'http://id.nlm.nih.gov/mesh/Q000145', 'http://id.nlm.nih.gov/mesh/Q000506', 'http://id.nlm.nih.gov/mesh/Q000494', 'http://id.nlm.nih.gov/mesh/Q000276', 'http://id.nlm.nih.gov/mesh/Q000134', 'http://id.nlm.nih.go

In [None]:
## semantic scholar

In [51]:
offset = 0
limit = 1000
response = requests.get("https://api.semanticscholar.org/graph/v1/paper/PMID:25335757/citations?fields=year&offset="+str(offset)+"&limit="+str(limit))
json_str = json.loads(response.text)
citationCount = len(json_str["data"])
while citationCount != 0:
    print(json_str)
    offset = offset + limit
    response = requests.get("https://api.semanticscholar.org/graph/v1/paper/PMID:25335757/citations?fields=year&offset="+str(offset)+"&limit="+str(limit))
    json_str = json.loads(response.text)
    citationCount = len(json_str["data"])
    print(citationCount)
# print(json_str["data"])
# print(len(json_str["data"]))


{'offset': 0, 'data': [{'citingPaper': {'paperId': 'b6de1e809ad74fc2b438536c84a1c289006ad4db', 'year': 2023}}, {'citingPaper': {'paperId': '7a6754077f31a797d593bc37c9fe08ac7ba494e8', 'year': 2022}}, {'citingPaper': {'paperId': '8877efd382fda0823ec3dac72eb551ca8f91faf8', 'year': 2022}}, {'citingPaper': {'paperId': 'c67e1bc96798dae648950ee89bf82e3e01c63728', 'year': 2022}}, {'citingPaper': {'paperId': '008453b90cac84e3b0ae68d46cdf13470c9e8f19', 'year': 2022}}, {'citingPaper': {'paperId': 'efa0a82ec2bec4bc25c9e15296bebb68b7113b1c', 'year': 2022}}, {'citingPaper': {'paperId': '0f2bf3d8ec81ca36527d32d2d9a8db59949656c4', 'year': 2022}}, {'citingPaper': {'paperId': 'ef551092b64b0ed5914f673738b52907fe91acad', 'year': 2022}}, {'citingPaper': {'paperId': '72f11e8afe5b22c715617feb33c5f97ab40a0666', 'year': 2022}}, {'citingPaper': {'paperId': 'a18913ab3a1e112cc4ea16d9498902b916cf3fbf', 'year': 2022}}, {'citingPaper': {'paperId': '3c561f4ecbf92d12f434d3dd443e9158e61938fb', 'year': 2022}}, {'citingP