In [2]:
import requests
from xml.etree import ElementTree
import networkx as nx
import matplotlib.pyplot as plt
from pyvis.network import Network

In [10]:
def print_element_tree(element, indent=""):
    """Recursively prints the element tree structure."""
    print(indent + "<" + element.tag + ">")
    if element.text and element.text.strip():
        print(indent + "  " + element.text.strip())
    for subelement in element:
        print_element_tree(subelement, indent + "  ")
    print(indent + "</" + element.tag + ">")
# Build the URL for the search endpoint with the keyword query.
keyword = "bioinformatics"
url = f"https://www.ebi.ac.uk/europepmc/webservices/rest/search?query={keyword}&page=1&pageSize=100&format=xml"

# Get the XML response from Europe PMC.
response = requests.get(url)

# Parse the XML into an ElementTree.
pmids = []
tree = ElementTree.fromstring(response.content)
for result in tree.findall('.//result'):
    pmid_elem = result.find('pmid')
    if pmid_elem is not None:
        pmids.append(pmid_elem.text)
print(pmids)
hit_count_elem = tree.find('.//hitCount')
if hit_count_elem is not None:
    print("Total hits:", hit_count_elem.text)
len(pmids)

['40084236', '40007724', '39732146', '39994507', '40037522', '39546379', '40067792', '39637085', '39377391', '39475181', '40140766', '39420637', '39404857', '39558607', '39905288', '39470718', '39404071', '39526399', '39607707', '39695945', '39530240', '40025421', '39329270', '39964979', '39546631', '40065693', '39656924', '39576126', '40037622', '39526387', '39629064', '39854213', '40067093', '40097570', '40102721', '40036763', '39380496', '39525080', '39901337', '39898809', '39754035', '39286643', '40053519', '40088942', '39959758', '39141443', '39718766', '39773409', '40100137', '39867886', '38597606', '39329269', '40103322', '39726694', '39819419', '39607697', '39852261', '39689042', '39797569', '39695935', '39499138', '39881492', '39778365', '39137906', '39526405', '40151459', '40124852', '39470723', '39815230', '39558171', '39657122', '40099807', '39588754', '40013403', '40128719', '40133783', '39739718', '39910485', '39712000', '39512782', '39348154', '39827349', '39703424', '39

92

In [72]:
tree = ElementTree.fromstring(requests.get('https://www.ebi.ac.uk/europepmc/webservices/test/rest/MED/39546379/references?page=1&pageSize=100&format=xml').content)
#print_element_tree(tree)
for ref in tree.findall('.//reference'):
    print(ref.find('id').text)
    cited_pmid_elem = ref.find('pmid')
cited_pmid_elem

24220091
33211869
27125735
37953312
37553384
31950189
27382179
31182864
23970545
26553798
17694311
24008419
37992713


In [11]:
citation_dict = {}
for i, pmid in enumerate(pmids):
    print(i)
    url = f"https://www.ebi.ac.uk/europepmc/webservices/rest/MED/{pmid}/references?page=1&pageSize=100&format=xml"
    response = requests.get(url)
    tree = ElementTree.fromstring(response.content)
    
    # List to hold PMIDs that the current paper cites.
    cited_pmids = []
    # The XML is assumed to have <reference> elements each containing a <pmid> element.
    for ref in tree.findall('.//reference'):
        cited_pmid_elem = ref.find('id')
        if cited_pmid_elem is not None:
            cited_pmids.append(cited_pmid_elem.text)
    
    citation_dict[pmid] = cited_pmids

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91


In [13]:
graph = nx.Graph(citation_dict)

In [12]:
citation_dict

{'40084236': [],
 '40007724': ['20639541',
  '19304878',
  '34377978',
  '32956448',
  '34606606',
  '20624783',
  '38587192',
  '37023146',
  '37889074',
  '37074928',
  '33459763',
  '21278367',
  '29035372',
  '35316646',
  '29790974',
  '32015543',
  '36453861',
  '24514441',
  '34951622',
  '31639358',
  '16377612',
  '34850947',
  '37184890',
  '33541841',
  '34135355',
  '22039361',
  '34784345',
  '15976072',
  '38821063',
  '29912383',
  '29228171',
  '26553804',
  '24642063',
  '33737026'],
 '39732146': [],
 '39994507': ['33564394',
  '34930405',
  '31796933',
  '35110534',
  '28753430',
  '30395331'],
 '40037522': ['34557778',
  '15697438',
  '21085593',
  '10802651',
  '36590899',
  '12808457',
  '26356912',
  '19541911',
  '16199517',
  '33095870',
  '10592173',
  '17784955'],
 '39546379': ['24220091',
  '33211869',
  '27125735',
  '37953312',
  '37553384',
  '31950189',
  '27382179',
  '31182864',
  '23970545',
  '26553798',
  '17694311',
  '24008419',
  '37992713'],
 '40

In [14]:
# Create a PyVis network; set notebook=True if you're in a Jupyter notebook.
net = Network(height="750px", width="100%", directed=True)

# Load your networkx graph into PyVis.
net.from_nx(graph)

# Generate and open the interactive graph in a browser.
net.show("citation_graph.html")