In [30]:
import pandas as pd
import networkx as nx
import os

# === CONFIG ===
file_path = r"Y:\public\projects\AnAl_20240405_Neuromod_PE\PE_mapping\processed_data\dff_downsampled\mean_dff_per_region_by_mouse_brainmapper.xlsx"
output_folder = r"Y:\public\projects\AnAl_20240405_Neuromod_PE\PE_mapping\processed_data\dff_downsampled\trees"

# === LOAD DATA ===
df_by_mouse = pd.read_excel(file_path, sheet_name=None)
os.makedirs(output_folder, exist_ok=True)

df = None
selected_mouse = None
for sheet_name, df_mouse in df_by_mouse.items():
    if sheet_name.lower() != "summary" and "structure_id_path" in df_mouse.columns:
        df = df_mouse.copy()
        selected_mouse = sheet_name
        break

if df is None:
    raise ValueError("No valid sheet found with 'structure_id_path' column.")

# === BUILD FULL TREE ===
G = nx.DiGraph()
for path in df["structure_id_path"].dropna().unique():
    parts = str(path).strip("/").split("/")
    for i in range(len(parts) - 1):
        G.add_edge(parts[i], parts[i + 1])

top_level_paths = {
    "Cerebrum": "/997/8/567/688/",
    "Cerebellum": "/997/8/512/",
    "Midbrain": "/997/8/343/313/",
    "Interbrain": "/997/8/343/1129/",
    "Hindbrain": "/997/8/343/1065/"
}

for region_name, path_prefix in top_level_paths.items():
    matching_paths = df["structure_id_path"].dropna().unique()
    filtered_paths = [p for p in matching_paths if p.startswith(path_prefix)]
    
    if not filtered_paths:
        print(f"[!] No paths found for {region_name} with prefix {path_prefix}")
        continue

    # Build region-specific graph
    subG = nx.DiGraph()
    for path in filtered_paths:
        parts = path.strip("/").split("/")
        for i in range(len(parts) - 1):
            parent = parts[i]
            child = parts[i + 1]
            subG.add_edge(parent, child)

    outpath = os.path.join(output_folder, f"structure_tree_{region_name}.dot")
    nx.drawing.nx_pydot.write_dot(subG, outpath)
    print(f"[✔] Saved {region_name} subtree to: {outpath}")



[✔] Saved Cerebrum subtree to: Y:\public\projects\AnAl_20240405_Neuromod_PE\PE_mapping\processed_data\dff_downsampled\trees\structure_tree_Cerebrum.dot
[✔] Saved Cerebellum subtree to: Y:\public\projects\AnAl_20240405_Neuromod_PE\PE_mapping\processed_data\dff_downsampled\trees\structure_tree_Cerebellum.dot
[✔] Saved Midbrain subtree to: Y:\public\projects\AnAl_20240405_Neuromod_PE\PE_mapping\processed_data\dff_downsampled\trees\structure_tree_Midbrain.dot
[✔] Saved Interbrain subtree to: Y:\public\projects\AnAl_20240405_Neuromod_PE\PE_mapping\processed_data\dff_downsampled\trees\structure_tree_Interbrain.dot
[✔] Saved Hindbrain subtree to: Y:\public\projects\AnAl_20240405_Neuromod_PE\PE_mapping\processed_data\dff_downsampled\trees\structure_tree_Hindbrain.dot


In [33]:

import pandas as pd
import networkx as nx

# Load reuploaded Excel file
file_path = r"Y:\public\projects\AnAl_20240405_Neuromod_PE\PE_mapping\processed_data\dff_downsampled\mean_dff_per_region_by_mouse_brainmapper.xlsx"
df_by_mouse = pd.read_excel(file_path, sheet_name=None)

# Use one mouse to extract full structure hierarchy and values
selected_mouse = None
for sheet_name, df_mouse in df_by_mouse.items():
    if sheet_name.lower() != "summary" and "structure_id_path" in df_mouse.columns:
        df = df_mouse.copy()
        selected_mouse = sheet_name
        break

# Build the hierarchy tree
G = nx.DiGraph()
structure_values = {}  # mean_dff per region_id

for _, row in df.iterrows():
    sid_path = str(row["structure_id_path"]).strip("/")
    mean_dff = row["mean_dff"]
    parts = sid_path.split("/")
    for i in range(len(parts) - 1):
        G.add_edge(parts[i], parts[i + 1])
    structure_values[parts[-1]] = mean_dff  # Use leaf node as key

# Function to compress tree into non-overlapping representative nodes
def compress_tree(graph, values, max_regions=130):
    leaves = [n for n in graph.nodes if graph.out_degree(n) == 0 and n in values]
    ranked = sorted(leaves, key=lambda x: values[x], reverse=True)

    keep = set()
    while len(keep) < max_regions and ranked:
        node = ranked.pop(0)
        if not (nx.ancestors(graph, node) & keep):
            keep.add(node)

    collapsed = {}
    for node in graph.nodes:
        if node in keep:
            collapsed[node] = node
        else:
            try:
                path = nx.shortest_path(graph, source="997", target=node)
                for ancestor in reversed(path):
                    if ancestor in keep:
                        collapsed[node] = ancestor
                        break
            except nx.NetworkXNoPath:
                continue

    collapsed_values = {}
    for node, parent in collapsed.items():
        if parent not in collapsed_values:
            collapsed_values[parent] = []
        if node in values:
            collapsed_values[parent].append(values[node])

    for k in collapsed_values:
        collapsed_values[k] = sum(collapsed_values[k]) / len(collapsed_values[k])

    return collapsed_values

# Apply compression
collapsed_result = compress_tree(G, structure_values, max_regions=130)

# Create DataFrame
collapsed_df = pd.DataFrame(list(collapsed_result.items()), columns=["region_id", "mean_dff"])
collapsed_df = collapsed_df.sort_values("mean_dff", ascending=False).reset_index(drop=True)

collapsed_df.head()

write_path = r"Y:\public\projects\AnAl_20240405_Neuromod_PE\PE_mapping\processed_data\dff_downsampled\collapsed_structure_values.xlsx"
collapsed_df.to_excel(write_path, index=False)  
