In [2]:
import requests
import pandas as pd
from pyvis.network import Network
from collections import defaultdict
from IPython.display import display, IFrame


In [3]:
def fetch_metapaths(source_gene_ids, target_node_ids, p_thresh=0.01):
    metapath_records = []
    
    for source_id in source_gene_ids:
        for target_id in target_node_ids:
            url = f"https://search-api.het.io/v1/metapaths/source/{source_id}/target/{target_id}/"
            response = requests.get(url)
            if response.status_code == 200:
                data_metapaths = response.json()
                for pc in data_metapaths.get("path_counts", []):
                    if pc.get("adjusted_p_value", 1) < p_thresh:
                        metapath_records.append({
                            "source_id": source_id,
                            "target_id": target_id,
                            "metapath_id": pc.get("metapath_id"),
                            "metapath_abbr": pc.get("metapath_name"),
                            "dwpc": pc.get("dwpc"),
                            "p_value": pc.get("p_value"),
                            "adjusted_p_value": pc.get("adjusted_p_value"),
                            "path_count": pc.get("path_count"),
                        })
            else:
                print(f"Failed to fetch metapaths for source {source_id} and target {target_id}")
    
    return pd.DataFrame(metapath_records)


def fetch_paths(df_metapaths):
    source_tagged_paths = []
    nodes_dict, rels_dict = {}, {}
    for _, row in df_metapaths.iterrows():
        source_id = row["source_id"]
        target_id = row["target_id"]
        metapath_id = row["metapath_id"]
        url = f"http://search-api.het.io/v1/paths/source/{source_id}/target/{target_id}/metapath/{metapath_id}/?format=json"
        response = requests.get(url)
        if response.status_code == 200:
            data_paths = response.json()
            for path in data_paths["paths"]:
                path["source_id"] = source_id
                source_tagged_paths.append(path)
            nodes_dict.update(data_paths["nodes"])
            rels_dict.update(data_paths["relationships"])
        else:
            print(f"Failed to fetch paths for source {source_id} and metapath {metapath_id}")
    return pd.DataFrame(source_tagged_paths), nodes_dict, rels_dict


def map_node_ids_to_names(node_id_list, nodes_dict):
    return [nodes_dict[str(nid)]["properties"]["name"] for nid in node_id_list]

def map_rel_ids_to_kinds(rel_id_list, rels_dict):
    return [rels_dict[str(rid)]["kind"] for rid in rel_id_list]


In [4]:
import requests
import pandas as pd
from pyvis.network import Network
from IPython.display import IFrame
from collections import defaultdict
import textwrap

# === Utility function to wrap long node labels ===
def wrap_label(text, max_width=12):
    return "\n".join(textwrap.wrap(text, width=max_width))

# === Visualization function ===
def build_network(df_paths, nodes_dict, rels_dict, output_html="hetionet_final_white_text.html"):
    # Color maps
    relation_color_map = {
        "participates": "#424242", "upregulates": "#2E7D32", "downregulates": "#C62828",
        "expresses": "#1565C0", "localizes": "#00897B", "binds": "#6A1B9A",
        "interacts": "#37474F", "associates": "#5D4037", "covaries": "#546E7A",
        "treats": "#EF6C00", "palliates": "#F57C00", "causes": "#FDD835",
        "resembles": "#9E9E9E", "includes": "#7B1FA2", "regulates": "#0097A7"
    }

    metanode_color_map = {
        "Gene": "#42A5F5", "Disease": "#795548", "Anatomy": "#66BB6A", "Pathway": "#FB8C00",
        "Biological Process": "#FB8C00", "Molecular Function": "#FB8C00",
        "Cellular Component": "#FB8C00", "Symptom": "#607D8B", "Compound": "#D81B60",
        "Side Effect": "#FFEB3B", "Pharmacologic Class": "#8E24AA"
    }

    metanode_x_pos = {
        "Pathway": -1000, "Biological Process": -850, "Molecular Function": -700,
        "Cellular Component": -550, "Pharmacologic Class": -400, "Side Effect": -250,
        "Compound": -100, "Gene": 100, "Anatomy": 300, "Disease": 500, "Symptom": 700
    }

    # Identify source/target nodes separately
    source_nodes = set()
    target_nodes = set()
    for nodes in df_paths["node_names"]:
        if isinstance(nodes, list) and len(nodes) >= 2:
            source_nodes.add(nodes[0])
            target_nodes.add(nodes[-1])

    # Initialize PyVis network
    net = Network(height="750px", width="100%", notebook=True, directed=False)
    net.cdn_resources = 'in_line'
    net.set_options("""
    var options = {
      "nodes": {
        "font": {"size": 16, "face": "arial", "multi": true, "align": "center", "color": "white"},
        "shape": "circle",
        "scaling": {"min": 50, "max": 50}
      },
      "edges": {
        "width": 3,
        "font": {"size": 14, "align": "middle"},
        "smooth": false
      },
      "layout": {"improvedLayout": true},
      "physics": {"enabled": false}
    }""")

    node_positions = defaultdict(set)
    for _, row in df_paths.iterrows():
        for node in row["node_names"]:
            nid = next((k for k, v in nodes_dict.items() if v["properties"]["name"] == node), None)
            if nid:
                metanode = nodes_dict[str(nid)].get("metanode", "Unknown")
                node_positions[metanode].add(node)

    center_offset = {k: -(len(v) - 1) * 60 for k, v in node_positions.items()}
    y_tracker = defaultdict(lambda: 0)
    added_nodes = set()

    # Add nodes and edges
    for _, row in df_paths.iterrows():
        for i in range(len(row["rel_names"])):
            src, tgt, rel = row["node_names"][i], row["node_names"][i + 1], row["rel_names"][i]

            for node in [src, tgt]:
                if node in added_nodes:
                    continue
                nid = next((k for k, v in nodes_dict.items() if v["properties"]["name"] == node), None)
                if nid is None:
                    continue
                node_data = nodes_dict[str(nid)]
                metanode = node_data.get("metanode", "Unknown")
                color = metanode_color_map.get(metanode, "lightgray")
                x = metanode_x_pos.get(metanode, 0)
                y = y_tracker[metanode] + center_offset[metanode]
                y_tracker[metanode] += 120
                label = wrap_label(node)

                # Determine highlight color
                if node in source_nodes:
                    border_color = "green"
                elif node in target_nodes:
                    border_color = "red"
                else:
                    border_color = "gray"

                net.add_node(
                    node, label=label, title=f"{metanode}: {node}", shape="circle",
                    x=x, y=y, fixed={"x": True, "y": True},
                    borderWidth=6 if node in source_nodes.union(target_nodes) else 2,
                    color={
                        "border": border_color,
                        "background": color,
                        "highlight": {"border": border_color, "background": color}
                    } if node in source_nodes.union(target_nodes) else color
                )
                added_nodes.add(node)

            net.add_edge(
                src, tgt, label=rel, title=f"Relation: {rel}",
                smooth=False, color=relation_color_map.get(rel, "gray"), width=3
            )

    # Export and display
    net.write_html(output_html)
    return IFrame(output_html, width="100%", height="750px")


In [5]:
# === Inputs ===
source_gene_ids = [18631, 15599, 38110, 45583]  # PF4, IFNG, etc.
target_node_id = [25506]  # Down Syndrome

# === Step 1: Fetch Metapaths ===
df_metapaths = fetch_metapaths(source_gene_ids, target_node_id)
display(df_metapaths.head())

# === Step 2: Fetch Paths ===
df_paths, nodes_dict, rels_dict = fetch_paths(df_metapaths)

# === Step 3: Filter and Map ===
if not df_paths.empty:
    df_paths = (
        df_paths.groupby("source_id", group_keys=False)
        .apply(lambda g: g.nlargest(max(1, int(len(g) * 0.01)), "score"))
        .reset_index(drop=True)
    )

    df_paths["node_names"] = df_paths["node_ids"].apply(lambda ids: map_node_ids_to_names(ids, nodes_dict))
    df_paths["rel_names"] = df_paths["rel_ids"].apply(lambda ids: map_rel_ids_to_kinds(ids, rels_dict))
    display(df_paths)

    # === Step 4: Visualize ===
    iframe = build_network(df_paths, nodes_dict, rels_dict)
    display(iframe)
else:
    print("No paths found for visualization.")


Unnamed: 0,source_id,target_id,metapath_id,metapath_abbr,dwpc,p_value,adjusted_p_value,path_count
0,18631,25506,GeAlDpS,Gene–expresses–Anatomy–localizes–Disease–prese...,1.748917,0.000102,0.002453,22
1,15599,25506,GeAlDpS,Gene–expresses–Anatomy–localizes–Disease–prese...,2.286689,6.4e-05,0.001544,74


  .apply(lambda g: g.nlargest(max(1, int(len(g) * 0.01)), "score"))


Unnamed: 0,metapath,node_ids,rel_ids,PDP,percent_of_DWPC,score,source_id,PC,DWPC,node_names,rel_names
0,GeAlDpS,"[15599, 601, 9851, 25506]","[479433, 1504799, 1568241]",8.8e-05,13.452783,37.822104,15599,,,"[IFNG, skin epidermis, alopecia areata, Down S...","[expresses, localizes, presents]"
1,GeAlDpS,"[18631, 41399, 41316, 25506]","[2134494, 684041, 527093]",5.8e-05,15.642953,40.834178,18631,,,"[PF4, blood plasma, hepatitis B, Down Syndrome]","[expresses, localizes, presents]"




In [6]:
# === Inputs ===
source_gene_ids = [18631, 15599, 38110, 45583]  # PF4, IFNG, etc.
target_node_id = [37154]  # Obesiy

# === Step 1: Fetch Metapaths ===
df_metapaths = fetch_metapaths(source_gene_ids, target_node_id)
display(df_metapaths.head())

# === Step 2: Fetch Paths ===
df_paths, nodes_dict, rels_dict = fetch_paths(df_metapaths)

# === Step 3: Filter and Map ===
if not df_paths.empty:
    df_paths = (
        df_paths.groupby("source_id", group_keys=False)
        .apply(lambda g: g.nlargest(max(1, int(len(g) * 0.01)), "score"))
        .reset_index(drop=True)
    )

    df_paths["node_names"] = df_paths["node_ids"].apply(lambda ids: map_node_ids_to_names(ids, nodes_dict))
    df_paths["rel_names"] = df_paths["rel_ids"].apply(lambda ids: map_rel_ids_to_kinds(ids, rels_dict))
    display(df_paths)

    # === Step 4: Visualize ===
    iframe = build_network(df_paths, nodes_dict, rels_dict)
    display(iframe)
else:
    print("No paths found for visualization.")


Unnamed: 0,source_id,target_id,metapath_id,metapath_abbr,dwpc,p_value,adjusted_p_value,path_count
0,18631,37154,GeAlDpS,Gene–expresses–Anatomy–localizes–Disease–prese...,2.247945,6.073651e-07,1.5e-05,9
1,15599,37154,GeAlDpS,Gene–expresses–Anatomy–localizes–Disease–prese...,2.105773,4.431029e-05,0.001063,21
2,45583,37154,GuDpS,Gene–upregulates–Disease–presents–Symptom,5.416331,0.001342101,0.004026,1


  .apply(lambda g: g.nlargest(max(1, int(len(g) * 0.01)), "score"))


Unnamed: 0,metapath,node_ids,rel_ids,PDP,percent_of_DWPC,score,source_id,PC,DWPC,node_names,rel_names
0,GeAlDpS,"[15599, 26127, 17782, 37154]","[1910519, 1388843, 1983217]",0.000149,27.423574,81.53808,15599,,,"[IFNG, exocrine gland, Barrett's esophagus, Ob...","[expresses, localizes, presents]"
1,GeAlDpS,"[18631, 5887, 28137, 37154]","[476495, 872224, 1402401]",0.000114,18.15657,87.811328,18631,,,"[PF4, artery, metabolic syndrome X, Obesity]","[expresses, localizes, presents]"
2,GuDpS,"[45583, 17782, 37154]","[2234164, 1983217]",0.008165,100.0,239.509342,45583,,,"[IL10RB, Barrett's esophagus, Obesity]","[upregulates, presents]"




In [7]:
# === Inputs ===
source_gene_ids = [18631, 15599, 38110, 45583]  # PF4, IFNG, etc.
target_node_id = [34172]  # Sleep Apnea

# === Step 1: Fetch Metapaths ===
df_metapaths = fetch_metapaths(source_gene_ids, target_node_id)
display(df_metapaths.head())

# === Step 2: Fetch Paths ===
df_paths, nodes_dict, rels_dict = fetch_paths(df_metapaths)

# === Step 3: Filter and Map ===
if not df_paths.empty:
    df_paths = (
        df_paths.groupby("source_id", group_keys=False)
        .apply(lambda g: g.nlargest(max(1, int(len(g) * 0.05)), "score"))
        .reset_index(drop=True)
    )

    df_paths["node_names"] = df_paths["node_ids"].apply(lambda ids: map_node_ids_to_names(ids, nodes_dict))
    df_paths["rel_names"] = df_paths["rel_ids"].apply(lambda ids: map_rel_ids_to_kinds(ids, rels_dict))
    display(df_paths)

    # === Step 4: Visualize ===
    iframe = build_network(df_paths, nodes_dict, rels_dict)
    display(iframe)
else:
    print("No paths found for visualization.")


Unnamed: 0,source_id,target_id,metapath_id,metapath_abbr,dwpc,p_value,adjusted_p_value,path_count
0,18631,34172,GeAlDpS,Gene–expresses–Anatomy–localizes–Disease–prese...,2.333189,3.515398e-08,8.436956e-07,24
1,15599,34172,GeAlDpS,Gene–expresses–Anatomy–localizes–Disease–prese...,2.143851,0.0001091902,0.002620564,55
2,45583,34172,GaDpS,Gene–associates–Disease–presents–Symptom,4.990626,0.003214477,0.009643432,1


  .apply(lambda g: g.nlargest(max(1, int(len(g) * 0.05)), "score"))


Unnamed: 0,metapath,node_ids,rel_ids,PDP,percent_of_DWPC,score,source_id,PC,DWPC,node_names,rel_names
0,GeAlDpS,"[15599, 19018, 2961, 34172]","[765039, 1654680, 2123983]",4.5e-05,7.983133,20.609296,15599,,,"[IFNG, respiratory system, idiopathic pulmonar...","[expresses, localizes, presents]"
1,GeAlDpS,"[15599, 47008, 28137, 34172]","[70984, 1335952, 2040539]",3.1e-05,5.489678,14.172182,15599,55.0,0.000564,"[IFNG, endothelium, metabolic syndrome X, Slee...","[expresses, localizes, presents]"
2,GeAlDpS,"[18631, 5887, 28137, 34172]","[476495, 872224, 2040539]",7.4e-05,10.743353,65.25313,18631,,,"[PF4, artery, metabolic syndrome X, Sleep Apne...","[expresses, localizes, presents]"
3,GaDpS,"[45583, 26663, 34172]","[251614, 102067]",0.008375,100.0,201.576838,45583,,,"[IL10RB, narcolepsy, Sleep Apnea, Obstructive]","[associates, presents]"


