In [None]:
import obonet
import pandas as pd
from collections import defaultdict
import sys
from pathlib import Path
import os
# from pypalettes import load_cmap

# Rajout des fonctions présentes dans utils
current_dir = Path(os.getcwd())
parent_dir = current_dir.parent
sys.path.append(str(parent_dir))

# Import de la fonction
from utils.helpers import load_ontology, create_id_name_mappings, subset_ontology_by_term

In [None]:
hpo = load_ontology("../data/hpo_v2025_01_16.obo") #  Charger l'ontologie
hpor = hpo.reverse()
hpos = subset_ontology_by_term(hpo,'HP:0000118') # Subset
hpors = hpos.reverse() # reverse tree

id2name, name2id = create_id_name_mappings(hpo) # 

In [None]:
import networkx as nx
import pandas as pd
import numpy as np

ROOT_ID = "HP:0000118"
branches = list(hpors.successors(ROOT_ID))

results = []

def compute_depths(subgraph, root):
    lengths = nx.single_source_shortest_path_length(subgraph, root)
    return list(lengths.values())

# 1. Analyser toutes les branches principales SAUF la racine
for branch_id in branches:
    descendants = nx.descendants(hpors, branch_id)
    nodes = descendants.union({branch_id})
    subgraph = hpors.subgraph(nodes).copy()
    depths = compute_depths(subgraph, branch_id)

    results.append({
        "branch id": branch_id,
        "branch name": id2name.get(branch_id, branch_id),
        "number of nodes": len(subgraph.nodes),
        "number of edges": len(subgraph.edges),
        "average depth": np.mean(depths),
        "median depth": np.median(depths),
        "maximal depth": np.max(depths),
        "is_total": False
    })

# 2. Ajouter les stats pour toute l'ontologie (racine incluse) à la fin
descendants = nx.descendants(hpors, ROOT_ID)
nodes = descendants.union({ROOT_ID})
subgraph = hpors.subgraph(nodes).copy()
depths = compute_depths(subgraph, ROOT_ID)

results.append({
    "branch id": ROOT_ID,
    "branch name": id2name.get(ROOT_ID, ROOT_ID),
    "number of nodes": len(subgraph.nodes),
    "number of edges": len(subgraph.edges),
    "average depth": np.mean(depths),
    "median depth": np.median(depths),
    "maximal depth": np.max(depths),
    "is_total": True
})

# 3. Créer le DataFrame
summary_df = pd.DataFrame(results)

# 4. Trier uniquement les branches (is_total = False), puis ajouter la racine en dernier
df_branches = summary_df[summary_df["is_total"] == False].sort_values(by="number of nodes", ascending=False)
df_total = summary_df[summary_df["is_total"] == True]
summary_df_sorted = pd.concat([df_branches, df_total], ignore_index=True)

# 5. Affichage
from IPython.display import display
display(summary_df_sorted.style
    .format({
        "average depth": "{:.2f}",
        "median depth": "{:.0f}",
        "maximal depth": "{:.0f}"
    })
    .background_gradient(subset=["number of nodes", "number of edges"], cmap="Blues")
    .bar(subset=["maximal depth"], color="#FFA07A")
)
