In [1]:
import pandas as pd
import networkx as nx

# Step 1: Load the Data
# Load the node data
nodes_df = pd.read_csv('hetionet-v1.0-nodes.tsv', sep='\t', header=0, names=['ID', 'Name', 'Kind'])

# Load the edge data
edges_df = pd.read_csv('edges.sif', sep='\t', header=0, names=['Source', 'Metaedge', 'Target'])

# Step 2: Create the Graph
G = nx.MultiDiGraph()

# Add nodes with ID as the node identifier
for _, row in nodes_df.iterrows():
    G.add_node(row['ID'], name=row['Name'], kind=row['Kind'])

# Add edges
for _, row in edges_df.iterrows():
    G.add_edge(row['Source'], row['Target'], type=row['Metaedge'])

# Now G is your graph structure containing nodes with their attributes and the edges between them

In [4]:
# Count different types for nodes
node_kind = {node_attrs['kind'] for _, node_attrs in G.nodes(data=True)}

# Count different types for edges
edge_types = {edge_attrs['type'] for _, _, edge_attrs in G.edges(data=True)}

H = G.to_directed()

# Print counts
print("Different types for nodes:", len(node_kind))
print("Different types for edges:", len(edge_types))

Different types for nodes: 11
Different types for edges: 24


In [5]:
# Count nodes associated with each type
node_counts = {}
for node, node_attrs in G.nodes(data=True):
    node_kind = node_attrs['kind']
    if node_kind not in node_counts:
        node_counts[node_kind] = 1
    else:
        node_counts[node_kind] += 1

# Count edges associated with each type
edge_counts = {}
for _, _, edge_attrs in G.edges(data=True):
    edge_type = edge_attrs['type']
    if edge_type not in edge_counts:
        edge_counts[edge_type] = 1
    else:
        edge_counts[edge_type] += 1

# Print counts
#List the Total Number of Entities for Each Unique Kind
print("Nodes associated with each kind:")
for node_kind, count in node_counts.items():
    print(f"{node_kind}: {count}")

print("\nEdges associated with each type:")
for edge_type, count in edge_counts.items():
    print(f"{edge_type}: {count}")

Nodes associated with each type:
Anatomy: 402
Biological Process: 11381
Cellular Component: 1391
Compound: 1552
Disease: 137
Gene: 20945
Molecular Function: 2884
Pathway: 1822
Pharmacologic Class: 345
Side Effect: 5734
Symptom: 438

Edges associated with each type:
AdG: 102240
AeG: 526407
AuG: 97848
CrC: 6486
CtD: 755
CbG: 11571
CuG: 18756
CcSE: 138944
CdG: 21102
CpD: 390
DdG: 7623
DpS: 3357
DlA: 3602
DrD: 543
DaG: 12623
DuG: 7731
GiG: 147164
GpCC: 73566
GpBP: 559504
GcG: 61690
GpMF: 97222
GpPW: 84372
Gr>G: 265672
PCiC: 1029


In [6]:
# H is our direct graph
from collections import defaultdict

# Initialize dictionaries to sum degrees and count nodes for each type
in_degrees = defaultdict(int)
out_degrees = defaultdict(int)
counts = defaultdict(int)

for node, attrs in H.nodes(data=True):
    node_kind = attrs['kind']
    counts[node_kind] += 1
    in_degrees[node_kind] += H.in_degree(node)
    out_degrees[node_kind] += H.out_degree(node)

# Calculate and print average in/out-degrees
avg_in_degrees = {node_kind: in_deg / counts[node_kind] for node_kind, in_deg in in_degrees.items()}
avg_out_degrees = {node_kind: out_deg / counts[node_kind] for node_kind, out_deg in out_degrees.items()}

print("Average In-Degree for each node type:")
for node_kind, avg_in in avg_in_degrees.items():
    print(f"{node_kind}: {avg_in}")

print("\nAverage Out-Degree for each node type:")
for node_kind, avg_out in avg_out_degrees.items():
    print(f"{node_kind}: {avg_out}")

Average In-Degree for each node type:
Anatomy: 8.960199004975124
Biological Process: 49.16123363500571
Cellular Component: 52.887131560028756
Compound: 4.842139175257732
Disease: 12.321167883211679
Gene: 61.1328240630222
Molecular Function: 33.71081830790569
Pathway: 46.30735455543359
Pharmacologic Class: 0.0
Side Effect: 24.231600976630624
Symptom: 7.664383561643835

Average Out-Degree for each node type:
Anatomy: 1807.2014925373135
Biological Process: 0.0
Cellular Component: 0.0
Compound: 127.5798969072165
Disease: 258.97080291970804
Gene: 61.55120553831463
Molecular Function: 0.0
Pathway: 0.0
Pharmacologic Class: 2.982608695652174
Side Effect: 0.0
Symptom: 0.0


In [7]:
# Initialize dictionaries to store in-degree and out-degree for diseases
disease_connections = defaultdict(lambda: {'in': 0, 'out': 0})

# Filter and compute degrees for diseases
for node, attrs in H.nodes(data=True):
    if attrs['kind'] == 'Disease':
        disease_connections[node]['in'] = H.in_degree(node)
        disease_connections[node]['out'] = H.out_degree(node)

# Sort diseases by in-degree and out-degree
most_connected_by_in_degree = sorted(disease_connections.items(), key=lambda x: x[1]['in'], reverse=True)
most_connected_by_out_degree = sorted(disease_connections.items(), key=lambda x: x[1]['out'], reverse=True)

# Display the top 5 most connected diseases by in-degree
print("Top 5 diseases by in-degree (most incoming connections):")
for disease, degrees in most_connected_by_in_degree[:5]:
    disease_name = H.nodes[disease]['name']  # Get the disease name
    print(f"{disease_name}: In-Degree = {degrees['in']}")

# Display the top 5 most connected diseases by out-degree
print("\nTop 5 diseases by out-degree (most outgoing connections):")
for disease, degrees in most_connected_by_out_degree[:5]:
    disease_name = H.nodes[disease]['name']  # Get the disease name
    print(f"{disease_name}: Out-Degree = {degrees['out']}")


Top 5 diseases by in-degree (most incoming connections):
hypertension: In-Degree = 73
hematologic cancer: In-Degree = 53
breast cancer: In-Degree = 44
asthma: In-Degree = 41
coronary artery disease: In-Degree = 40

Top 5 diseases by out-degree (most outgoing connections):
breast cancer: Out-Degree = 1115
hematologic cancer: Out-Degree = 1076
IgA glomerulonephritis: Out-Degree = 966
melanoma: Out-Degree = 931
rheumatoid arthritis: Out-Degree = 868


### Compute the sub-graph of “Disease” and “Symptom”

In [8]:
# H is our directed graph
# and it has a 'type' attribute for each node specifying whether it's a Disease, Symptom, or something else

# Step 1: Identify Disease and Symptom nodes
disease_and_symptom_nodes = [node for node, attrs in H.nodes(data=True) if attrs.get('kind') in ['Disease', 'Symptom']]

# Step 2: Create the sub-graph
disease_symptom_subgraph = H.subgraph(disease_and_symptom_nodes).copy()

# Now 'disease_symptom_subgraph' is a directed sub-graph of 'H' 
# that includes only Diseases and Symptoms and the edges between them


### Compute the average number of connections between a disease and a symptom.

In [9]:
# 'disease_symptom_subgraph' is your MultiDiGraph containing only diseases and symptoms

# Step 1: Count the total number of connections (edges)
# In a MultiDiGraph, this correctly accounts for multiple edges between the same nodes
total_connections = len(disease_symptom_subgraph.edges())

# Step 2: Determine the number of diseases and symptoms
diseases_count = sum(1 for _, data in disease_symptom_subgraph.nodes(data=True) if data.get('kind') == 'Disease')
symptoms_count = sum(1 for _, data in disease_symptom_subgraph.nodes(data=True) if data.get('kind') == 'Symptom')

# The total number of disease and symptom nodes
total_disease_symptom_nodes = diseases_count + symptoms_count

# Step 3: Compute the average number of connections
# The calculation of the average connections per node remains the same
average_connections = total_connections / total_disease_symptom_nodes

print(f"Total connections: {total_connections}")
print(f"Total disease and symptom nodes: {total_disease_symptom_nodes}")
print(f"Average number of connections between a disease and a symptom: {average_connections}")

Total connections: 3900
Total disease and symptom nodes: 575
Average number of connections between a disease and a symptom: 6.782608695652174


### Compute the similarity between “type 1 diabetes mellitus” and “type 2 diabetes mellitus”

In [13]:
# Step 1: Identify the node IDs for the two diseases
# Ensure we select only the first matching node for each disease name
node_id_type1 = next((node for node, attrs in H.nodes(data=True) if attrs.get('name') == "type 1 diabetes mellitus"), None)
node_id_type2 = next((node for node, attrs in H.nodes(data=True) if attrs.get('name') == "type 2 diabetes mellitus"), None)

if not node_id_type1 or not node_id_type2:
    print("One of the disease nodes could not be found.")
else:
    # Step 2: Get Neighbors for both diseases
    # For directed graphs, consider successors for outgoing and predecessors for incoming edges
    neighbors_type1 = set(H.successors(node_id_type1)) | set(H.predecessors(node_id_type1))
    neighbors_type2 = set(H.successors(node_id_type2)) | set(H.predecessors(node_id_type2))

    # Step 3: Compute Jaccard Similarity
    intersection = len(neighbors_type1 & neighbors_type2)
    union = len(neighbors_type1 | neighbors_type2)
    jaccard_similarity = intersection / union if union else 0

    print(f"Jaccard Similarity between 'type 1 diabetes mellitus' and 'type 2 diabetes mellitus' is {jaccard_similarity}")


Jaccard Similarity between 'type 1 diabetes mellitus' and 'type 2 diabetes mellitus' is 0.13267326732673268


The Jaccard similarity between "type 1 diabetes mellitus" and "type 2 diabetes mellitus" is approximately 13.27% of their combined neighbor sets (e.g., symptoms, treatments, related conditions) are shared. This suggests that the majority of their connections are unique to each disease.

### Define and compute also the similarity between two symptoms like “EyePain” and ”Blindness”

In [24]:
# Step 1: Identify the node IDs for the two symptoms
node_id_eye_pain = next((node for node, attrs in H.nodes(data=True) if attrs.get('name') == "Eye Pain" and attrs.get('kind') == "Symptom"), None)
node_id_blindness = next((node for node, attrs in H.nodes(data=True) if attrs.get('name') == "Blindness" and attrs.get('kind') == "Symptom"), None)

if not node_id_eye_pain or not node_id_blindness:
    print("One of the symptom nodes could not be found.")
else:
    # Step 2: Get Neighbors for both symptoms
    neighbors_eye_pain = set(H.successors(node_id_eye_pain)) | set(H.predecessors(node_id_eye_pain))
    neighbors_blindness = set(H.successors(node_id_blindness)) | set(H.predecessors(node_id_blindness))

    # Step 3: Compute Jaccard Similarity
    intersection = len(neighbors_eye_pain & neighbors_blindness)
    union = len(neighbors_eye_pain | neighbors_blindness)
    jaccard_similarity = intersection / union if union else 0

    print(f"Jaccard Similarity between 'Eye Pain' and 'Blindness': {jaccard_similarity}")

Jaccard Similarity between 'Eye Pain' and 'Blindness': 0.35


Similarity of 0 means they share no connections.

# Structural Similarity Measures (e.g., SimRank)

We adopt the SimRank a more sophisticated approaches than the straightforward Jaccard similarity for comparing nodes in a graph, it's usefull especially in our case in which the graphs has multi-layered relationships.

- similarity between "EyePain" and "Blindness" symptoms

In [28]:
# Function to find node ID by name
def find_node_id_by_name(G, name):
    for node, attrs in G.nodes(data=True):
        if 'name' in attrs and attrs['name'] == name and attrs['kind'] == "Symptom":
            return node
    return None

# Finding node IDs for "Eye Pain" and "Blindness"
node_id_eye_pain = find_node_id_by_name(H, "Eye Pain")
node_id_blindness = find_node_id_by_name(H, "Blindness")

print(f"Node ID for 'Eye Pain': {node_id_eye_pain}")
print(f"Node ID for 'Blindness': {node_id_blindness}")

Node ID for 'Eye Pain': Symptom::D058447
Node ID for 'Blindness': Symptom::D001766


In [29]:
def simrank(G, u, v, c=0.8, max_iter=10, eps=1e-4, sim_cache=None):
    # Initialize the cache on the first call
    if sim_cache is None:
        sim_cache = {}
    
    # Check for direct cache hit
    if (u, v) in sim_cache:
        return sim_cache[(u, v)]
    
    # Base cases
    if u == v:
        return 1.0
    predecessors_u = set(G.predecessors(u))
    predecessors_v = set(G.predecessors(v))
    if len(predecessors_u) == 0 or len(predecessors_v) == 0:
        return 0

    # Recursive computation with memoization
    sum_sim = 0
    for w in predecessors_u:
        for x in predecessors_v:
            if (w, x) not in sim_cache:  # Check if result is already in cache
                sim_cache[(w, x)] = simrank(G, w, x, c=c, max_iter=max_iter-1, eps=eps, sim_cache=sim_cache)
            sum_sim += sim_cache[(w, x)]
    
    sim_new = (c / (len(predecessors_u) * len(predecessors_v))) * sum_sim
    sim_cache[(u, v)] = sim_new  # Cache this computation
    
    return sim_new

In [30]:
if node_id_eye_pain and node_id_blindness:
    # Now call the SimRank function with the correct node IDs
    similarity = simrank(H, node_id_eye_pain, node_id_blindness)
    print(f"SimRank similarity between 'Eye Pain' and 'Blindness': {similarity}")
else:
    print("One or both of the symptom node IDs could not be found.")

SimRank similarity between 'Eye Pain' and 'Blindness': 0.04986847745134472


GUARDARE FINO QUIIII!!!
SOTTO CODICE VECCHIOOOO!!!

NON CANCELLARE CODICE SOTTO, ASPETTIAMO UN ATTIMO

In [1]:
import networkx as nx
import matplotlib.pyplot as plt

def read_node_file(file_path):
    nodes = []
    with open(file_path, 'r') as f:
        for line in f:
            parts = line.strip().split('\t')
            node_id = parts[0]
            node_name = parts[1]
            node_type = parts[2]
            nodes.append((node_id, node_name, node_type))
    return nodes

def read_edge_file(file_path):
    edges = []
    with open(file_path, 'r') as f:
        for line in f:
            parts = line.strip().split('\t')
            source = parts[0]
            target = parts[1]
            metaedge = parts[2]
            edges.append((source, target, metaedge))
    return edges

# File paths for nodes and edges
node_file_path = 'hetionet-v1.0-nodes.tsv'
edge_file_path = 'edges.sif'

# Read node and edge data
nodes_data = read_node_file(node_file_path)
edges_data = read_edge_file(edge_file_path)

edges_data = edges_data[1:]
#edges_data
nodes_data = nodes_data[1:]  
#nodes_data


In [2]:
d = [x[0] for x in nodes_data]
v = [x[2] for x in nodes_data]

nodes_stripped = [(x, y) for x, y in zip(d, v)]

#nodes_stripped

In [3]:
# Create a graph
G = nx.MultiDiGraph()

# Add nodes with types
for node_id, node_type in nodes_stripped:
    G.add_node(node_id, type=node_type)

# Add edges with types
for source, edge_type, target in edges_data:
    G.add_edge(source, target, type=edge_type)

# Count different types for nodes
node_types = {node_attrs['type'] for _, node_attrs in G.nodes(data=True)}

# Count different types for edges
edge_types = {edge_attrs['type'] for _, _, edge_attrs in G.edges(data=True)}

H = G.to_directed()

# Print counts
print("Different types for nodes:", len(node_types))
print("Different types for edges:", len(edge_types))

Different types for nodes: 11
Different types for edges: 24


In [4]:

# Count nodes associated with each type
node_counts = {}
for node, node_attrs in G.nodes(data=True):
    node_type = node_attrs['type']
    if node_type not in node_counts:
        node_counts[node_type] = 1
    else:
        node_counts[node_type] += 1

# Count edges associated with each type
edge_counts = {}
for _, _, edge_attrs in G.edges(data=True):
    edge_type = edge_attrs['type']
    if edge_type not in edge_counts:
        edge_counts[edge_type] = 1
    else:
        edge_counts[edge_type] += 1

# Print counts
#List the Total Number of Entities for Each Unique Kind
print("Nodes associated with each type:")
for node_type, count in node_counts.items():
    print(f"{node_type}: {count}")

print("\nEdges associated with each type:")
for edge_type, count in edge_counts.items():
    print(f"{edge_type}: {count}")

Nodes associated with each type:
Anatomy: 402
Biological Process: 11381
Cellular Component: 1391
Compound: 1552
Disease: 137
Gene: 20945
Molecular Function: 2884
Pathway: 1822
Pharmacologic Class: 345
Side Effect: 5734
Symptom: 438

Edges associated with each type:
AdG: 102240
AeG: 526407
AuG: 97848
CrC: 6486
CtD: 755
CbG: 11571
CuG: 18756
CcSE: 138944
CdG: 21102
CpD: 390
DdG: 7623
DpS: 3357
DlA: 3602
DrD: 543
DaG: 12623
DuG: 7731
GiG: 147164
GpCC: 73566
GpBP: 559504
GcG: 61690
GpMF: 97222
GpPW: 84372
Gr>G: 265672
PCiC: 1029


In [5]:
# H is our direct graph
from collections import defaultdict

# Initialize dictionaries to sum degrees and count nodes for each type
in_degrees = defaultdict(int)
out_degrees = defaultdict(int)
counts = defaultdict(int)

for node, attrs in H.nodes(data=True):
    node_type = attrs['type']
    counts[node_type] += 1
    in_degrees[node_type] += H.in_degree(node)
    out_degrees[node_type] += H.out_degree(node)

# Calculate and print average in/out-degrees
avg_in_degrees = {node_type: in_deg / counts[node_type] for node_type, in_deg in in_degrees.items()}
avg_out_degrees = {node_type: out_deg / counts[node_type] for node_type, out_deg in out_degrees.items()}

print("Average In-Degree for each node type:")
for node_type, avg_in in avg_in_degrees.items():
    print(f"{node_type}: {avg_in}")

print("\nAverage Out-Degree for each node type:")
for node_type, avg_out in avg_out_degrees.items():
    print(f"{node_type}: {avg_out}")

Average In-Degree for each node type:
Anatomy: 8.960199004975124
Biological Process: 49.16123363500571
Cellular Component: 52.887131560028756
Compound: 4.842139175257732
Disease: 12.321167883211679
Gene: 61.1328240630222
Molecular Function: 33.71081830790569
Pathway: 46.30735455543359
Pharmacologic Class: 0.0
Side Effect: 24.231600976630624
Symptom: 7.664383561643835

Average Out-Degree for each node type:
Anatomy: 1807.2014925373135
Biological Process: 0.0
Cellular Component: 0.0
Compound: 127.5798969072165
Disease: 258.97080291970804
Gene: 61.55120553831463
Molecular Function: 0.0
Pathway: 0.0
Pharmacologic Class: 2.982608695652174
Side Effect: 0.0
Symptom: 0.0


In [9]:
# Filter out the disease nodes
disease_nodes = [node for node, attrs in H.nodes(data=True) if attrs['type'] == 'Disease']

# Compute in-degrees and out-degrees for each disease node
disease_degrees = [(node, H.in_degree(node), H.out_degree(node)) for node in disease_nodes]

#disease_degrees

In [7]:
# Initialize variables to hold the disease with the highest in-degree and out-degree
max_in_degree_disease = None
max_out_degree_disease = None
max_in_degree = float('-inf')
max_out_degree = float('-inf')

# Iterate through the list to find the disease with the highest in-degree and out-degree
for disease, in_degree, out_degree in disease_degrees:
    if in_degree > max_in_degree:
        max_in_degree = in_degree
        max_in_degree_disease = disease
    if out_degree > max_out_degree:
        max_out_degree = out_degree
        max_out_degree_disease = disease

print(f"Disease with the highest in-degree: {max_in_degree_disease}, {max_in_degree}")
print(f"Disease with the highest out-degree: {max_out_degree_disease}, {max_out_degree}")

Disease with the highest in-degree: Disease::DOID:10763, 73
Disease with the highest out-degree: Disease::DOID:1612, 1115


In [25]:
H_nodes = H.nodes[disease]
H_nodes

{'type': 'Disease'}

In [24]:
# Filter out the disease nodes
disease_nodes = [node for node, attrs in H.nodes(data=True) if attrs['type'] == 'Disease']

# Compute in-degrees and out-degrees for each disease node
disease_degrees = [(node, H.in_degree(node), H.out_degree(node)) for node in disease_nodes]

# Sort the list by in-degree and out-degree to find the most connected diseases
most_connected_by_in_degree = sorted(disease_degrees, key=lambda x: x[0], reverse=True)
most_connected_by_out_degree = sorted(disease_degrees, key=lambda x: x[1], reverse=True)

# Display the top 5 most connected diseases by in-degree, assuming the name is in position 1
print("Top 5 diseases by in-degree (most incoming connections):")
for disease, in_degree, _ in most_connected_by_in_degree[:5]:
    # Assuming the name is stored under index 1 in the node attributes
    disease_name = H.nodes[disease][0] if 0 in H.nodes[disease] else "Unknown"
    print(f"Disease: {disease_name}, In-Degree: {in_degree}")

# Display the top 5 most connected diseases by out-degree, with the same assumption
print("\nTop 5 diseases by out-degree (most outgoing connections):")
for disease, _, out_degree in most_connected_by_out_degree[:5]:
    disease_name = H.nodes[disease][0] if 0 in H.nodes[disease] else "Unknown"
    print(f"Disease: {disease_name}, Out-Degree: {out_degree}")


Top 5 diseases by in-degree (most incoming connections):
Disease: Unknown, In-Degree: 14
Disease: Unknown, In-Degree: 0
Disease: Unknown, In-Degree: 1
Disease: Unknown, In-Degree: 3
Disease: Unknown, In-Degree: 5

Top 5 diseases by out-degree (most outgoing connections):
Disease: Unknown, Out-Degree: 699
Disease: Unknown, Out-Degree: 1076
Disease: Unknown, Out-Degree: 1115
Disease: Unknown, Out-Degree: 277
Disease: Unknown, Out-Degree: 595


In [34]:
# Inspect the first few nodes to see how their attributes are stored
for node, attrs in list(H.nodes(data=True))[:5]:
    print(node, attrs)


Anatomy::UBERON:0000002 {'type': 'Anatomy'}
Anatomy::UBERON:0000004 {'type': 'Anatomy'}
Anatomy::UBERON:0000006 {'type': 'Anatomy'}
Anatomy::UBERON:0000007 {'type': 'Anatomy'}
Anatomy::UBERON:0000010 {'type': 'Anatomy'}


In [10]:
# H is our directed graph
# and it has a 'type' attribute for each node specifying whether it's a Disease, Symptom, or something else

# Step 1: Identify Disease and Symptom nodes
disease_and_symptom_nodes = [node for node, attrs in H.nodes(data=True) if attrs.get('type') in ['Disease', 'Symptom']]

# Step 2: Create the sub-graph
disease_symptom_subgraph = H.subgraph(disease_and_symptom_nodes).copy()

# Now 'disease_symptom_subgraph' is a directed sub-graph of 'H' 
# that includes only Diseases and Symptoms and the edges between them


In [11]:
# 'disease_symptom_subgraph' is your MultiDiGraph containing only diseases and symptoms

# Step 1: Count the total number of connections (edges)
# In a MultiDiGraph, this correctly accounts for multiple edges between the same nodes
total_connections = len(disease_symptom_subgraph.edges())

# Step 2: Determine the number of diseases and symptoms
diseases_count = sum(1 for _, data in disease_symptom_subgraph.nodes(data=True) if data.get('type') == 'Disease')
symptoms_count = sum(1 for _, data in disease_symptom_subgraph.nodes(data=True) if data.get('type') == 'Symptom')

# The total number of disease and symptom nodes
total_disease_symptom_nodes = diseases_count + symptoms_count

# Step 3: Compute the average number of connections
# The calculation of the average connections per node remains the same
average_connections = total_connections / total_disease_symptom_nodes

print(f"Total connections: {total_connections}")
print(f"Total disease and symptom nodes: {total_disease_symptom_nodes}")
print(f"Average number of connections between a disease and a symptom: {average_connections}")

Total connections: 3900
Total disease and symptom nodes: 575
Average number of connections between a disease and a symptom: 6.782608695652174


Compute the similarity between “type 1 diabetes mellitus” and “type 2 diabetes mellitus”

In [None]:
# Step 1: Identify the node IDs for the two diseases
# This step assumes you have some way to map disease names to node IDs. 
# For demonstration, let's assume these are the node IDs:
node_id_type1 = "type 1 diabetes mellitus"
node_id_type2 = "type 2 diabetes mellitus"

# Step 2: Get Neighbors for both diseases
# Adjust this step based on whether you're interested in successors, predecessors, or both
neighbors_type1 = set(H.successors(node_id_type1)) | set(H.predecessors(node_id_type1))
neighbors_type2 = set(H.successors(node_id_type2)) | set(H.predecessors(node_id_type2))

# Step 3: Compute Jaccard Similarity
intersection = len(neighbors_type1 & neighbors_type2)
union = len(neighbors_type1 | neighbors_type2)
jaccard_similarity = intersection / union if union else 0

print(f"Jaccard Similarity: {jaccard_similarity}")
