In [1]:
import pandas as pd
import networkx as nx
from pyvis.network import Network
import matplotlib.pyplot as plt
import community

In [2]:
# Read node data from CSV
nodes_df = pd.read_csv('nodes.csv', dtype={'Id': str})

# Read edge data from CSV
edges_df = pd.read_csv('edges.csv', dtype={'Source': str, 'Target': str})

# Convert DataFrames to dictionaries
nodes_dict = nodes_df.to_dict(orient='records')
edges_dict = edges_df.to_dict(orient='records')

# Create a non-directed graph using NetworkX
G = nx.Graph()

# Add nodes and edges to the NetworkX graph
for node in nodes_dict:
    G.add_node(node['Id'], label=node['Label'], institution=node['INSTITUTION'], first_name=node['FIRST'], 
               middle_name=node['MIDDLE'], last_name=node['LAST'], degree=node['DEGREE'], hindex=node['H_INDEX'])

# Iterate through the DataFrame and aggregate weights
for index, row in edges_df.iterrows():
    source = row['Source']
    target = row['Target']

    if G.has_edge(source, target):
        # Edge already exists, aggregate the weights
        G[source][target]['weight'] += 1
    else:
        # Edge doesn't exist, add it with a weight of 1
        G.add_edge(source, target, weight=1)


In [3]:
# Examine network
node_count = G.number_of_nodes()
edge_count = G.number_of_edges()
print("This network has", node_count, "nodes and", edge_count, "edges.")

This network has 2465 nodes and 29759 edges.


In [4]:
# Examine nodes
G.nodes

NodeView(('6602108168', '6602906779', '6603462653', '7004413850', '7006634482', '14828095000', '36561447300', '56135849300', '57191984510', '57197875893', '57210129887', '57213615986', '57217091928', '6602384897', '6602796476', '6701781111', '7405452089', '55251561900', '56059116100', '56364178700', '57482097000', '57197319711', '57217533749', '6603586579', '6603753272', '7202312767', '7402336288', '7801657367', '36621974300', '55279743200', '55897638200', '56440263000', '57189178071', nan, '6602735965', '6701413757', '6701662904', '7005718629', '7005762310', '7006253149', '7102410636', '7202013704', '7202822434', '7403013173', '7403532257', '7801673685', '7801677152', '16053166500', '20735690400', '24367694800', '24432068600', '24481898100', '26325740000', '35182114900', '36157950800', '36981730900', '37051750300', '39062082600', '39863555200', '55270598500', '55841031100', '55872286500', '56457673900', '56711362300', '57189460622', '57190490968', '57193529861', '57194853864', '572158

In [5]:
# Examine nodes data
for node, data in G.nodes(data=True):
    print(f"Node {node}: {data}")

Node 6602108168: {'label': 'Lisa  Galati', 'institution': 'Albany Medical Center', 'first_name': 'Lisa ', 'middle_name': nan, 'last_name': 'Galati', 'degree': 'MD', 'hindex': 8.0}
Node 6602906779: {'label': 'Jason Mouzakes', 'institution': 'Albany Medical Center', 'first_name': 'Jason', 'middle_name': nan, 'last_name': 'Mouzakes', 'degree': 'MD', 'hindex': 10.0}
Node 6603462653: {'label': 'David Foyt', 'institution': 'Albany Medical Center', 'first_name': 'David', 'middle_name': nan, 'last_name': 'Foyt', 'degree': 'MD', 'hindex': 9.0}
Node 7004413850: {'label': 'Stanley  Shapshay', 'institution': 'Albany Medical Center', 'first_name': 'Stanley ', 'middle_name': 'M', 'last_name': 'Shapshay', 'degree': 'MD', 'hindex': 24.0}
Node 7006634482: {'label': 'Steven  Parnes', 'institution': 'Albany Medical Center', 'first_name': 'Steven ', 'middle_name': 'M', 'last_name': 'Parnes', 'degree': 'MD', 'hindex': 15.0}
Node 14828095000: {'label': 'Melissa Mortensen-Welch', 'institution': 'Albany Medic

In [6]:
# Examine edges
for u, v, data in G.edges(data=True):
    print(f"Edge ({u}, {v}) has weight: {data['weight']}")

Edge (6602108168, 7006634482) has weight: 1
Edge (6602108168, 6602906779) has weight: 1
Edge (6602108168, 36561447300) has weight: 2
Edge (6602108168, 57222001835) has weight: 1
Edge (6602108168, 7004413850) has weight: 3
Edge (6602108168, 7007049688) has weight: 1
Edge (6602108168, 57199379095) has weight: 1
Edge (6602906779, 36561447300) has weight: 3
Edge (6602906779, 7202032750) has weight: 3
Edge (6602906779, 25934756900) has weight: 3
Edge (6602906779, 6603462653) has weight: 1
Edge (6602906779, 56666992100) has weight: 1
Edge (6602906779, 7006634482) has weight: 1
Edge (6602906779, 36843803100) has weight: 1
Edge (6602906779, 7003830888) has weight: 1
Edge (6603462653, 57197875893) has weight: 3
Edge (6603462653, 55974818200) has weight: 1
Edge (6603462653, 7006634482) has weight: 1
Edge (6603462653, 57205216159) has weight: 1
Edge (6603462653, 6603823693) has weight: 1
Edge (6603462653, 7005494109) has weight: 1
Edge (7004413850, 7004320818) has weight: 6
Edge (7004413850, 6602

Edge (7006046741, 26424481500) has weight: 6
Edge (7006046741, 8422655700) has weight: 7
Edge (7006046741, 57210462724) has weight: 7
Edge (7006046741, 56715493400) has weight: 7
Edge (7006046741, 8450059800) has weight: 8
Edge (7006046741, 23972420300) has weight: 1
Edge (7006046741, 15757236900) has weight: 1
Edge (7006046741, 7003438973) has weight: 2
Edge (7006046741, 57192715312) has weight: 2
Edge (7006046741, 37032137800) has weight: 1
Edge (7006046741, 7201800670) has weight: 3
Edge (7006046741, 7201412258) has weight: 1
Edge (7006046741, 57197047356) has weight: 1
Edge (7006046741, 36723203600) has weight: 8
Edge (7006046741, 57031649400) has weight: 1
Edge (7006046741, 7407318248) has weight: 1
Edge (7006046741, 36553054500) has weight: 3
Edge (7006046741, 36601404700) has weight: 2
Edge (7006046741, 16025542300) has weight: 3
Edge (7006046741, 10839450400) has weight: 3
Edge (7006046741, 35475553400) has weight: 1
Edge (7006046741, 7006683702) has weight: 2
Edge (7006046741,

Edge (52263396500, 6701480797) has weight: 1
Edge (52263396500, 6506595502) has weight: 1
Edge (56025000700, 7403112357) has weight: 3
Edge (56025000700, 6505602863) has weight: 2
Edge (56025000700, 6504546420) has weight: 7
Edge (56025000700, 7403311244) has weight: 10
Edge (56025000700, 55220710000) has weight: 2
Edge (56025000700, 6506595502) has weight: 3
Edge (56025000700, 7202223817) has weight: 1
Edge (56025000700, 57207921888) has weight: 1
Edge (56025000700, 57196486314) has weight: 1
Edge (56025000700, 26027923800) has weight: 1
Edge (56025000700, 15725639900) has weight: 1
Edge (56025000700, 36844442000) has weight: 1
Edge (56025000700, 7006244582) has weight: 1
Edge (56025000700, 57193322721) has weight: 2
Edge (56025000700, 56331691200) has weight: 1
Edge (56025000700, 7006853922) has weight: 1
Edge (56025000700, 26656257300) has weight: 1
Edge (56025000700, 8673575700) has weight: 1
Edge (56768740000, 56597760800) has weight: 1
Edge (56768740000, 55882178400) has weight: 

Edge (12771933500, 7006125783) has weight: 2
Edge (12771933500, 6602653874) has weight: 1
Edge (12771933500, 53882142000) has weight: 1
Edge (12771933500, 55221084100) has weight: 1
Edge (12771933500, 7007175634) has weight: 2
Edge (12771933500, 36643606200) has weight: 1
Edge (12771933500, 6506595502) has weight: 2
Edge (12771933500, 7201515322) has weight: 3
Edge (12771933500, 56232543400) has weight: 1
Edge (12771933500, 7006853922) has weight: 1
Edge (12771933500, 37025428900) has weight: 1
Edge (12771933500, 56946134500) has weight: 2
Edge (12771933500, 56946350200) has weight: 1
Edge (12771933500, 7003772336) has weight: 1
Edge (12771933500, 7006462355) has weight: 1
Edge (12771933500, 26027923800) has weight: 2
Edge (12771933500, 26643408500) has weight: 2
Edge (12771933500, 8702326000) has weight: 1
Edge (12771933500, 55669338000) has weight: 1
Edge (12771933500, 55431707200) has weight: 1
Edge (12771933500, 15124929900) has weight: 1
Edge (12771933500, 7102738400) has weight: 

Edge (7201727299, 35586159300) has weight: 3
Edge (7201727299, 56993594800) has weight: 8
Edge (7201727299, 56823015700) has weight: 1
Edge (7201727299, 8418607000) has weight: 2
Edge (7201727299, 7005459986) has weight: 2
Edge (7201727299, 57194379623) has weight: 3
Edge (7201727299, 8711306100) has weight: 1
Edge (7201727299, 57210825698) has weight: 3
Edge (7201727299, 15623110800) has weight: 10
Edge (7201727299, 56406978900) has weight: 17
Edge (7201727299, 57200167443) has weight: 1
Edge (7201727299, 23668060700) has weight: 3
Edge (7201727299, 7006990636) has weight: 4
Edge (7201727299, 15725455800) has weight: 3
Edge (7201727299, 7004103189) has weight: 2
Edge (7201727299, 36982387100) has weight: 1
Edge (7201727299, 7403209578) has weight: 1
Edge (7201727299, 7101700375) has weight: 2
Edge (7201727299, 6602637699) has weight: 2
Edge (7201727299, 7103245180) has weight: 2
Edge (7201727299, 57209059466) has weight: 1
Edge (7201727299, 36602131700) has weight: 3
Edge (7201727299,

Edge (56494673200, 57190584353) has weight: 3
Edge (56494673200, 7005183280) has weight: 6
Edge (56494673200, 6603322017) has weight: 5
Edge (56494673200, 57168719100) has weight: 3
Edge (56494673200, 55326255600) has weight: 4
Edge (56494673200, 6504244869) has weight: 9
Edge (56494673200, 57052538000) has weight: 2
Edge (56494673200, 57090880700) has weight: 2
Edge (56494673200, 7202409766) has weight: 4
Edge (56494673200, 16302308500) has weight: 4
Edge (56494673200, 57206340904) has weight: 5
Edge (56494673200, 55382764000) has weight: 1
Edge (56494673200, 55637307000) has weight: 2
Edge (56494673200, 35936861900) has weight: 3
Edge (56494673200, 7005092306) has weight: 1
Edge (56494673200, 7402482132) has weight: 1
Edge (56494673200, 57190893904) has weight: 2
Edge (56494673200, 39961319800) has weight: 1
Edge (56494673200, 36459521200) has weight: 1
Edge (56494673200, 56190557400) has weight: 1
Edge (56494673200, 56489132500) has weight: 1
Edge (56494673200, 7005075436) has weigh

Edge (55166390300, 56028298300) has weight: 1
Edge (56019705100, 24477393800) has weight: 7
Edge (56019705100, 57192437044) has weight: 6
Edge (56019705100, 56642164600) has weight: 6
Edge (56019705100, 7005578857) has weight: 11
Edge (56019705100, 14059811700) has weight: 6
Edge (56019705100, 56066380800) has weight: 6
Edge (56019705100, 7103282063) has weight: 5
Edge (56019705100, 35320995700) has weight: 6
Edge (56019705100, 7201677584) has weight: 5
Edge (56019705100, 7003337864) has weight: 6
Edge (56019705100, 7202661733) has weight: 6
Edge (56019705100, 36150997700) has weight: 6
Edge (56019705100, 6602369636) has weight: 6
Edge (56019705100, 57190001804) has weight: 6
Edge (56019705100, 15058072600) has weight: 6
Edge (56019705100, 55484568000) has weight: 5
Edge (56019705100, 7003615326) has weight: 6
Edge (56019705100, 7005459986) has weight: 6
Edge (56019705100, 46261320300) has weight: 6
Edge (56019705100, 50061735500) has weight: 6
Edge (56019705100, 6506275783) has weight

Edge (6603250106, 57193153601) has weight: 1
Edge (6603250106, 48361205900) has weight: 1
Edge (6603250106, 53882142000) has weight: 2
Edge (6603250106, 57201720427) has weight: 1
Edge (6603250106, 55981172500) has weight: 1
Edge (7003306429, 7004363444) has weight: 4
Edge (7004021989, 57212759448) has weight: 3
Edge (7004021989, 55981172500) has weight: 12
Edge (7004021989, 56714959300) has weight: 1
Edge (7004021989, 57205550048) has weight: 2
Edge (7004021989, 23012538300) has weight: 1
Edge (7004021989, 7004176693) has weight: 1
Edge (7004021989, 55444437000) has weight: 7
Edge (7004021989, 7006515969) has weight: 5
Edge (7004021989, 36776351700) has weight: 4
Edge (7004021989, 7102738400) has weight: 1
Edge (7004021989, 17136017900) has weight: 1
Edge (7004021989, 57194941752) has weight: 1
Edge (7004021989, 54882134000) has weight: 1
Edge (7004021989, 53882142000) has weight: 1
Edge (7004363444, 36183572800) has weight: 1
Edge (7005052011, 7006369286) has weight: 79
Edge (7005052

In [7]:
# Remove nodes without edges
isolated_nodes = [node for node in G.nodes() if G.degree(node) == 0]
G.remove_nodes_from(isolated_nodes)
new_node_count = G.number_of_nodes()
print("Now this network has", new_node_count, "nodes.")

Now this network has 2274 nodes.


In [8]:
# Calculate network statistics
degree = G.degree()
eigenvector_centrality = nx.eigenvector_centrality(G, max_iter=500)
betweenness_centrality = nx.betweenness_centrality(G)
closeness_centrality = nx.closeness_centrality(G)

centrality_df = pd.DataFrame({
    'Node': list(G.nodes),
    'Degree': [degree[node] for node in G.nodes],
    'Eigenvector Centrality': list(eigenvector_centrality.values()),
    'Betweenness Centrality': list(betweenness_centrality.values()),
    'Closeness Centrality': list(closeness_centrality.values())
})

centrality_df

Unnamed: 0,Node,Degree,Eigenvector Centrality,Betweenness Centrality,Closeness Centrality
0,6602108168,7,0.000399,0.000125,0.296989
1,6602906779,9,0.001003,0.000137,0.300952
2,6603462653,7,0.000619,0.000396,0.297341
3,7004413850,29,0.005087,0.001732,0.360575
4,7006634482,22,0.004254,0.000926,0.347624
...,...,...,...,...,...
2269,57052587900,7,0.001987,0.000014,0.320318
2270,57090880700,23,0.007625,0.000390,0.343284
2271,57194722008,5,0.000319,0.000021,0.282940
2272,57218199095,2,0.000031,0.000000,0.244926


In [9]:
# Perform Louvain community detection
partition = community.best_partition(G)

for node, community_id in partition.items():
    print(f"Node {node} belongs to community {community_id}")

Node 6602108168 belongs to community 0
Node 6602906779 belongs to community 0
Node 6603462653 belongs to community 1
Node 7004413850 belongs to community 0
Node 7006634482 belongs to community 2
Node 14828095000 belongs to community 6
Node 36561447300 belongs to community 4
Node 56135849300 belongs to community 16
Node 57197875893 belongs to community 1
Node 57210129887 belongs to community 6
Node 57217091928 belongs to community 0
Node 6602384897 belongs to community 0
Node 6701781111 belongs to community 6
Node 7405452089 belongs to community 6
Node 55251561900 belongs to community 7
Node 56059116100 belongs to community 8
Node 56364178700 belongs to community 9
Node 57482097000 belongs to community 7
Node 57197319711 belongs to community 4
Node 6603586579 belongs to community 0
Node 6603753272 belongs to community 0
Node 7202312767 belongs to community 16
Node 7402336288 belongs to community 2
Node 36621974300 belongs to community 4
Node 55279743200 belongs to community 0
Node 55897

Node 57194377909 belongs to community 7
Node 57194537776 belongs to community 7
Node 57203248086 belongs to community 4
Node 57204507731 belongs to community 0
Node 57210585736 belongs to community 7
Node 6505822530 belongs to community 7
Node 6602578725 belongs to community 7
Node 7005186282 belongs to community 7
Node 7005492010 belongs to community 7
Node 12445489500 belongs to community 11
Node 36780322500 belongs to community 4
Node 54415358600 belongs to community 0
Node 55672119600 belongs to community 11
Node 55868257500 belongs to community 4
Node 56306570600 belongs to community 0
Node 56628044700 belongs to community 7
Node 57196022562 belongs to community 7
Node 57196458839 belongs to community 4
Node 57205113170 belongs to community 4
Node 57209466550 belongs to community 7
Node 57967750000 belongs to community 7
Node 6504408818 belongs to community 0
Node 6506966019 belongs to community 6
Node 6602667797 belongs to community 16
Node 6603412668 belongs to community 6
Node 

In [10]:
nx.set_node_attributes(G, partition, name='community_id')

In [11]:
# Due to the large size of the network, we pick two institutions and visualize a small sample for demonstration purpose.
institution1 = "Johns Hopkins University"
institution2 = "Vanderbilt University Medical Center"

# Create a Pyvis network
net = Network(
    notebook=True,
    cdn_resources="remote",
    select_menu=True,
    filter_menu=True,
    height='750px',
    width='100%'
)
# net.repulsion()

# Set solver to "forceAtlas2Based"
net.set_options("""
var options = {
  "physics": {
    "solver": "forceAtlas2Based"
  }
}
""")


# Define a mapping from community IDs to colors
community_color_mapping = {
    0: '#1f77b4',
    1: '#ff7f0e',
    2: '#2ca02c',
    3: '#d62728',
    4: '#9467bd',
    5: '#8c564b',
    6: '#e377c2',
    7: '#7f7f7f',
    8: '#bcbd22',
    9: '#17becf',
    10: '#1a55ff',
    11: '#ff6347',
    12: '#7fff00',
    13: '#dda0dd',
    14: '#20b2aa',
    15: '#ff8c00',
    16: '#9932cc',
    17: '#008080',
    18: '#8b0000',
    19: '#8a2be2'
}

# Add nodes and edges only for scholars from the specified institutions
for node_id in G.nodes:
    label = G.nodes[node_id]['label']
    institution = G.nodes[node_id]['institution']
    degree = G.nodes[node_id]['degree']
    hindex = G.nodes[node_id]['hindex']
    size = eigenvector_centrality[node_id] * 1000
    community_id = G.nodes[node_id]['community_id']

    # Check if the scholar belongs to one of the specified institutions
    if institution in {institution1, institution2}:
        color = community_color_mapping.get(community_id, 'gray')  # Default to gray if community_id not in mapping
        net.add_node(node_id, label=label, size=size, color=color,
                     institution=institution, degree=degree, hindex=hindex,
                     labelHighlightBold=True, Physics=True, font={'size': 40})

# Add edges only between scholars from the specified institutions
for edge in G.edges:
    source, target = edge
    source_institution = G.nodes[source]['institution']
    target_institution = G.nodes[target]['institution']

    if source_institution in {institution1, institution2} and target_institution in {institution1, institution2}:
        net.add_edge(source, target)


# Visualize the network
net.show("sample_network.html")

sample_network.html
