In [2]:
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt

from helpers import load_ontology, subset_ontology_by_term
from top_hpo_barplot import compute_top_hpo_by_branch, plot_top_hpo_bar

# 1) Charger la matrice EHR
ehr_hpo = pd.read_csv("../data/ohe_20kRennes_EHR_2025_03_10.csv", index_col=0)
if "HP:0100021" in ehr_hpo.columns:
    ehr_hpo = ehr_hpo.drop(columns=["HP:0100021"])

# 2) Charger l'ontologie et construire le graphe hpors
hpo = load_ontology("../data/hpo_v2025_01_16.obo")
hpos = subset_ontology_by_term(hpo, "HP:0000118")
hpors = hpos.reverse()

# 3) Calculer le top HPO + branche principale
top_counts = compute_top_hpo_by_branch(
    ehr_hpo,
    hpors,
    root_id="HP:0000118",
    top_n=20,
)

top_counts.head()


Unnamed: 0,hpo_id,count,hpo_name,main_branch_id,main_branch_name
0,HP:0002664,2492,Neoplasm,HP:0002664,Neoplasm
1,HP:0001250,1726,Seizure,HP:0000707,Abnormality of the nervous system
2,HP:0003002,1521,Breast carcinoma,HP:0002664,Neoplasm
3,HP:0001263,1178,Global developmental delay,HP:0000707,Abnormality of the nervous system
4,HP:0012759,1050,Neurodevelopmental abnormality,HP:0000707,Abnormality of the nervous system


In [None]:
ROOT_ID = "HP:0000118"
children_of_root = list(hpors.successors(ROOT_ID))

# Exemple : on garde les 25 premières branches et on définit les couleurs ici
main_branches = children_of_root[:25]

custom_colors = [
    "#CA3C66", "#CBEFB6", "#3357FF", "#B36A5E", "#7C9ACC",
    "#33FFF5", "#FFC733", "#8DFF33", "#A7E0E0", "#FEBB5F",
    "#3366FF", "#E3997E", "#FAEDCD", "#33CCFF", "#FFCC33",
    "#A0C6A9", "#EAC3A9", "#805050", "#EED7C5", "#F7AF9D",
    "#66CCFF", "#8CACD3", "#99FF33", "#CC7567", "#FF33FF"
]

branch_color_mapping = {
    b: c for b, c in zip(main_branches, custom_colors)
}

# Optionnel : couleur spéciale pour la racine
branch_color_mapping[ROOT_ID] = "#FF6347"


In [None]:
fig, ax = plt.subplots(figsize=(8, 8))

plot_top_hpo_bar(
    top_counts,
    branch_color_mapping=branch_color_mapping,
    ax=ax,
    title="Top 20 HPO terms by frequency",
    annotate=True,
)

plt.show()

fig, ax = plt.subplots(figsize=(8, 8))

plot_top_hpo_bar(
    top_counts,
    branch_color_mapping=branch_color_mapping,
    ax=ax,
    title="Top 20 HPO terms by frequency",
    annotate=True,
    savepath="../output/top20_barplot_wCP.png",  # si tu veux
)

plt.show()
