# Interactive sunburst graphs to explore data

In [1]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
import plotly.express as px
import numpy as np
import io

## Using mean intensity per volume

In [None]:
# --- Load Data and Preprocess ---

try:
    df_quanti = pd.read_csv('quanti.csv')
    df_structures = pd.read_csv('structures.csv')
except FileNotFoundError:
    print("Error: 'quanti.csv' or 'structures.csv' not found.")
    raise

# --- Helper Functions (Improved) ---

def get_structure_data(df_quanti, structure_name):
    """Safely retrieves structure data from df_quanti, handling missing data."""
    data = df_quanti.loc[df_quanti['structure_name'] == structure_name]
    if not data.empty:
        return data.iloc[0]
    return pd.Series({'total_intensity': 0, 'volume_mm3': 0, 'intensity_per_volume': 0}, dtype="float64")

def calculate_intensity_per_volume(total_intensity, total_volume):
    """Calculates intensity per volume, handling zero volume."""
    return total_intensity / total_volume if total_volume > 0 else 0

# --- Build Hierarchy ---

def build_hierarchy_from_path(df_structures, df_quanti):
    hierarchy = {}
    all_structure_ids = set(df_structures['id'])

    # 1. Create nodes
    for _, row in df_structures.iterrows():
        structure_id = row['id']
        path_parts = row['structure_id_path'].strip('/').split('/')
        level = len(path_parts) - 1

        hierarchy[structure_id] = {
            'children': [],
            'name': row['name'],
            'acronym': row['acronym'],
            'parent_structure_id': row['parent_structure_id'],
            'level': level,
            'total_intensity': 0,
            'total_volume': 0,
            'intensity_per_volume': 0,
            'structure_id_path': row['structure_id_path']
        }

    # 2. Build relationships
    for structure_id in hierarchy:
        parent_id = hierarchy[structure_id]['parent_structure_id']
        if parent_id in hierarchy:
            hierarchy[parent_id]['children'].append(structure_id)

    # 3. Handle root
    all_child_ids = {child_id for node in hierarchy.values() for child_id in node['children']}
    root_ids = set(hierarchy.keys()) - all_child_ids

    # 4. Aggregate data (bottom-up)
    max_level = max(node['level'] for node in hierarchy.values())
    for level in range(max_level, -1, -1):
        for structure_id in hierarchy:
            if hierarchy[structure_id]['level'] == level:
                node = hierarchy[structure_id]
                if not node['children']:  # Leaf nodes
                    data = get_structure_data(df_quanti, node['name'])
                    node['total_intensity'] = data['total_intensity']
                    node['total_volume'] = data['volume_mm3']
                    node['intensity_per_volume'] = data['intensity_per_volume']
                else:  # Parent nodes
                    total_intensity = sum(hierarchy[child_id]['total_intensity'] for child_id in node['children'])
                    total_volume = sum(hierarchy[child_id]['total_volume'] for child_id in node['children'])
                    node['total_intensity'] = total_intensity
                    node['total_volume'] = total_volume
                    node['intensity_per_volume'] = calculate_intensity_per_volume(total_intensity, total_volume)

    return hierarchy

hierarchy = build_hierarchy_from_path(df_structures.copy(), df_quanti.copy())


# --- Prepare Data for Plotly Sunburst ---

data_for_sunburst = []
structure_name_map = df_structures.set_index('id')['name'].to_dict()

for structure_id, data in hierarchy.items():
    # *KEY CHANGE: Only process LEAF nodes*
    if not data['children'] and data['total_volume'] > 0:
        path_str = data['structure_id_path'].strip('/')
        path_parts_ids = path_str.split('/') if path_str else []

        row_data = {
            'id': structure_id,
            'name': data['name'],
            'acronym': data['acronym'],
            'intensity_per_volume': data['intensity_per_volume'],
            'total_intensity': data['total_intensity'],
            'total_volume': data['total_volume']
        }
        path_names = []
        for i, part_id_str in enumerate(path_parts_ids):
            try:
                part_id = int(part_id_str)
                part_name = structure_name_map.get(part_id, f"ID_{part_id}")
                path_names.append(part_name)
                row_data[f'path_{i}'] = part_name
            except ValueError:
                print(f"Warning: Invalid ID in structure_id_path: {part_id_str}")
                path_names.append(part_id_str)
                row_data[f'path_{i}'] = part_id_str

        data_for_sunburst.append(row_data)


df_sunburst = pd.DataFrame(data_for_sunburst)

# Determine max_depth after filtering for leaf nodes.
max_depth = max((len([col for col in row if col.startswith('path_')]) for row in data_for_sunburst), default=0)
path_cols = [f'path_{i}' for i in range(max_depth)]


# --- Create Interactive Sunburst Chart ---

fig = px.sunburst(
    df_sunburst,
    path=path_cols,
    values='intensity_per_volume',
    color='intensity_per_volume',
    hover_data={'name': False, 'intensity_per_volume': ':.2e', 'acronym': False, 'total_volume': False, 'id': False},
    color_continuous_scale=px.colors.sequential.Plasma,
    branchvalues="total",
)

fig.update_layout(
    title='Brainglow - Intensity per volume, all regions',
    margin=dict(l=0, r=0, b=0, t=30)
)

fig.show()

In [3]:
# Saving the graph in interactive HTML
fig.write_html("intensity_per_volume_sunburst.html")

### Mean intensity per volume of regions above brain average

In [None]:
# --- Load Data and Preprocess ---

try:
    df_quanti = pd.read_csv('quanti.csv')
    df_structures = pd.read_csv('structures.csv')  # Read structures.csv from file
except FileNotFoundError:
    print("Error: 'quanti.csv' or 'structures.csv' not found.")
    raise  # Or sys.exit(1)

# --- Helper Functions ---

def get_structure_data(df_quanti, structure_name):
    """Safely retrieves structure data from df_quanti, handling missing data."""
    # Use .loc for safer and more explicit selection.
    data = df_quanti.loc[df_quanti['structure_name'] == structure_name]
    if not data.empty:
        return data.iloc[0]
    # Return a Series with the correct column names, so you don't have problems later.
    return pd.Series({'total_intensity': 0, 'volume_mm3': 0, 'intensity_per_volume': 0}, dtype="float64")

def calculate_intensity_per_volume(total_intensity, total_volume):
    """Calculates intensity per volume, handling zero volume."""
    return total_intensity / total_volume if total_volume > 0 else 0

# --- Build Hierarchy ---

def build_hierarchy_from_path(df_structures, df_quanti):
    hierarchy = {}
    all_structure_ids = set(df_structures['id'])

    # 1. Create nodes
    for _, row in df_structures.iterrows():
        structure_id = row['id']
        path_parts = row['structure_id_path'].strip('/').split('/')
        level = len(path_parts) - 1

        hierarchy[structure_id] = {
            'children': [],
            'name': row['name'],
            'acronym': row['acronym'],
            'parent_structure_id': row['parent_structure_id'],
            'level': level,
            'total_intensity': 0,
            'total_volume': 0,
            'intensity_per_volume': 0,
            'structure_id_path': row['structure_id_path']
        }

    # 2. Build relationships
    for structure_id in hierarchy:
        parent_id = hierarchy[structure_id]['parent_structure_id']
        if parent_id in hierarchy:
            hierarchy[parent_id]['children'].append(structure_id)

    # 3. Handle root
    all_child_ids = {child_id for node in hierarchy.values() for child_id in node['children']}
    root_ids = set(hierarchy.keys()) - all_child_ids



    # 4. Aggregate data (bottom-up)
    max_level = max(node['level'] for node in hierarchy.values())
    for level in range(max_level, -1, -1):
        for structure_id in hierarchy:
            if hierarchy[structure_id]['level'] == level:
                node = hierarchy[structure_id]
                if not node['children']:  # Leaf nodes
                    data = get_structure_data(df_quanti, node['name'])
                    node['total_intensity'] = data['total_intensity']
                    node['total_volume'] = data['volume_mm3']
                    node['intensity_per_volume'] = data['intensity_per_volume']
                else:  # Parent nodes
                    total_intensity = sum(hierarchy[child_id]['total_intensity'] for child_id in node['children'])
                    total_volume = sum(hierarchy[child_id]['total_volume'] for child_id in node['children'])
                    node['total_intensity'] = total_intensity
                    node['total_volume'] = total_volume
                    node['intensity_per_volume'] = calculate_intensity_per_volume(total_intensity, total_volume)

    return hierarchy

hierarchy = build_hierarchy_from_path(df_structures.copy(), df_quanti.copy())

# --- Prepare Data for Plotly Sunburst ---

data_for_sunburst = []
structure_name_map = df_structures.set_index('id')['name'].to_dict()

# Calculate the average intensity_per_volume
total_intensity_per_volume = 0
leaf_count = 0
for structure_id, data in hierarchy.items():
    if not data['children'] and data['total_volume'] > 0:
        total_intensity_per_volume += data['intensity_per_volume']
        leaf_count += 1
average_intensity_per_volume = total_intensity_per_volume / leaf_count if leaf_count > 0 else 0

# Now filter for leaf nodes above the average
for structure_id, data in hierarchy.items():
    if not data['children'] and data['total_volume'] > 0 and data['intensity_per_volume'] > average_intensity_per_volume:
        path_str = data['structure_id_path'].strip('/')
        path_parts_ids = path_str.split('/') if path_str else []

        row_data = {
            'id': structure_id,
            'name': data['name'],
            'acronym': data['acronym'],
            'intensity_per_volume': data['intensity_per_volume'],
            'total_intensity': data['total_intensity'],
            'total_volume': data['total_volume']
        }
        path_names = []
        for i, part_id_str in enumerate(path_parts_ids):
            try:
                part_id = int(part_id_str)
                part_name = structure_name_map.get(part_id, f"ID_{part_id}")
                path_names.append(part_name)
                row_data[f'path_{i}'] = part_name
            except ValueError:
                print(f"Warning: Invalid ID in structure_id_path: {part_id_str}")
                path_names.append(part_id_str)  # Keep string if not int
                row_data[f'path_{i}'] = part_id_str

        data_for_sunburst.append(row_data)


df_sunburst = pd.DataFrame(data_for_sunburst)

# Determine max_depth after filtering for leaf nodes.
max_depth = max((len([col for col in row if col.startswith('path_')]) for row in data_for_sunburst), default=0)
path_cols = [f'path_{i}' for i in range(max_depth)]


# --- Create Interactive Sunburst Chart ---

fig = px.sunburst(
    df_sunburst,
    path=path_cols,
    values='intensity_per_volume',
    color='intensity_per_volume',
    hover_data={'name': False, 'intensity_per_volume': ':.2e', 'acronym': False, 'total_volume': False, 'id': False},
    color_continuous_scale=px.colors.sequential.Plasma,
    branchvalues="total",
)

fig.update_layout(
    title='Brainglow - Above average intensity per volume',
    margin=dict(l=0, r=0, b=0, t=30)
)

fig.show()

In [5]:
# Saving the graph in interactive HTML
fig.write_html("above_average_regions_intensity_sunburst.html")

### Total intensity of regions

In [None]:
# --- Load Data and Preprocess ---

try:
    df_quanti = pd.read_csv('quanti.csv')
    df_structures = pd.read_csv('structures.csv')
except FileNotFoundError:
    print("Error: 'quanti.csv' or 'structures.csv' not found.")
    raise

# --- Helper Functions ---

def get_structure_data(df_quanti, structure_name):
    """Safely retrieves structure data."""
    data = df_quanti.loc[df_quanti['structure_name'] == structure_name]
    if not data.empty:
        return data.iloc[0]
    return pd.Series({'total_intensity': 0, 'volume_mm3': 0, 'intensity_per_volume': 0}, dtype="float64")

def calculate_intensity_per_volume(total_intensity, total_volume):
    """Calculates intensity per volume, handling zero volume."""
    return total_intensity / total_volume if total_volume > 0 else 0

# --- Build Hierarchy ---

def build_hierarchy_from_path(df_structures, df_quanti):
    hierarchy = {}

    # 1. Create nodes
    for _, row in df_structures.iterrows():
        structure_id = row['id']
        hierarchy[structure_id] = {
            'children': [],
            'name': row['name'],
            'acronym': row['acronym'],
            'parent_structure_id': row['parent_structure_id'],
            'level': len(row['structure_id_path'].strip('/').split('/')) - 1,
            'total_intensity': 0,
            'total_volume': 0,
            'intensity_per_volume': 0,
            'structure_id_path': row['structure_id_path']
        }

    # 2. Build relationships
    for structure_id in hierarchy:
        parent_id = hierarchy[structure_id]['parent_structure_id']
        if parent_id in hierarchy:
            hierarchy[parent_id]['children'].append(structure_id)

    # 3. Handle root
    all_child_ids = {child_id for node in hierarchy.values() for child_id in node['children']}
    root_ids = set(hierarchy.keys()) - all_child_ids

    # 4. Aggregate data (bottom-up)
    max_level = max(node['level'] for node in hierarchy.values())
    for level in range(max_level, -1, -1):
        for structure_id in hierarchy:
            if hierarchy[structure_id]['level'] == level:
                node = hierarchy[structure_id]
                if not node['children']:  # Leaf
                    data = get_structure_data(df_quanti, node['name'])
                    node['total_intensity'] = data['total_intensity']
                    node['total_volume'] = data['volume_mm3']
                    node['intensity_per_volume'] = data['intensity_per_volume']
                else:  # Parent
                    total_intensity = sum(hierarchy[child_id]['total_intensity'] for child_id in node['children'])
                    total_volume = sum(hierarchy[child_id]['total_volume'] for child_id in node['children'])
                    node['total_intensity'] = total_intensity
                    node['total_volume'] = total_volume
                    node['intensity_per_volume'] = calculate_intensity_per_volume(total_intensity, total_volume)

    return hierarchy

hierarchy = build_hierarchy_from_path(df_structures.copy(), df_quanti.copy())


# --- Prepare Data for Plotly Sunburst ---

data_for_sunburst = []
structure_name_map = df_structures.set_index('id')['name'].to_dict()

for structure_id, data in hierarchy.items():
    if data['total_volume'] > 0:
        parent_id = data['parent_structure_id']
        if parent_id is not None and parent_id in structure_name_map:
            parent_name = structure_name_map[parent_id]
        else:
            parent_name = "Root"

        row_data = {
            'id': structure_id,
            'name': data['name'],
            'parent': parent_name,
            'acronym': data['acronym'],
            'intensity_per_volume': data['intensity_per_volume'],
            'total_intensity': data['total_intensity'],
            'total_volume': data['total_volume'],
        }
        data_for_sunburst.append(row_data)

df_sunburst = pd.DataFrame(data_for_sunburst)

# --- Create Sunburst Chart ---

fig_intensity = px.sunburst(
    df_sunburst,
    names='name',  
    parents='parent',
    values='total_intensity',
    color='total_intensity',
    hover_data={'name': True, 'total_intensity': ':.2e', 'acronym': True,
                'total_volume': True, 'id': False, 'parent': True, 'intensity_per_volume': False},
    color_continuous_scale=px.colors.sequential.Plasma,
    branchvalues="total",
)

fig_intensity.update_layout(
    title='Brainglow- Total intensity, all regions',
    margin=dict(l=0, r=0, b=0, t=30)
)
fig_intensity.update_traces(hovertemplate=None)

fig_intensity.show()

In [7]:
# Saving the graph in interactive HTML
fig_intensity.write_html("total_intensity_sunburst2.html")