# Bar graphs visualization

In [4]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
import numpy as np
import io

### Intensity per volume, bar graph for every region

In [None]:
# Load the data
try:
    df = pd.read_csv('quanti.csv')
    df_sorted = df.sort_values('intensity_per_volume', ascending=False)
except FileNotFoundError:
    print("Error: 'quanti.csv' not found. Please run quantification.ipynb first.")

# --- Plot 1: Multi-Column Bar Plot - Log Scale, Global Colors ---

num_structures = len(df_sorted)
structures_per_column = (num_structures + 2) // 3

fig, axes = plt.subplots(1, 3, figsize=(30, 40), sharey=False)
axes = axes.flatten()

# Logarithmic normalization
norm = mcolors.LogNorm(vmin=df_sorted['intensity_per_volume'].min(), vmax=df_sorted['intensity_per_volume'].max())
cmap = plt.get_cmap('plasma')

for i in range(3):
    start_index = i * structures_per_column
    end_index = min((i + 1) * structures_per_column, num_structures)
    df_subset = df_sorted.iloc[start_index:end_index]

    sns.barplot(x='intensity_per_volume', y='structure_name', data=df_subset,
                orient='h', ax=axes[i], hue='intensity_per_volume', palette=cmap, hue_norm=norm, legend=False)

    axes[i].set_ylabel('Structure Name')
    if i == 1:
        axes[i].set_xlabel('Intensity per Volume (mm³)')
    else:
        axes[i].set_xlabel('')

    axes[i].yaxis.tick_left()
    axes[i].tick_params(axis='y', labelsize=9)
    axes[i].grid(True, axis='x', alpha=0.5)
    axes[i].set_xscale('log')
    axes[i].set_xlim(df_sorted['intensity_per_volume'].min(), df_sorted['intensity_per_volume'].max() * 2)

plt.suptitle('Intensity per Volume for Each Brain Structure (Multi-Column, Log Scale)', fontsize=16)
plt.tight_layout(rect=[0.03, 0.03, 0.97, 0.97])
plt.show()

### Intensity per Volume, one bar graph for each hierarchy level

In [None]:
# --- Load Data and Preprocess ---

try:
    df_quanti = pd.read_csv('quanti.csv')
    df_structures = pd.read_csv('structures.csv')
except FileNotFoundError:
    print("Error: 'quanti.csv' or 'structures.csv' not found.")

# --- Helper Functions ---
def get_structure_data(df_quanti, structure_name):
    """Safely retrieves structure data from df_quanti, handling missing data."""
    try:
        return df_quanti[df_quanti['structure_name'] == structure_name].iloc[0]
    except IndexError:
        return pd.Series({'total_intensity': 0, 'volume_mm3': 0, 'intensity_per_volume': 0})

def calculate_intensity_per_volume(total_intensity, total_volume):
    """Calculates intensity per volume, handling zero volume."""
    return total_intensity / total_volume if total_volume > 0 else 0

# --- Build Hierarchy ---

def build_hierarchy_from_path(df_structures, df_quanti):
    hierarchy = {}
    all_structure_ids = set(df_structures['id'])

    # 1. Create nodes from structure data, using structure_id_path to determine depth
    for _, row in df_structures.iterrows():
        structure_id = row['id']
        path_parts = row['structure_id_path'].strip('/').split('/')
        level = len(path_parts) - 1

        hierarchy[structure_id] = {
            'children': [],
            'name': row['name'],
            'acronym': row['acronym'],
            'parent_structure_id': row['parent_structure_id'],
            'level': level,
            'total_intensity': 0,
            'total_volume': 0,
            'intensity_per_volume': 0
        }

    # 2. Build parent-child relationships based on parent_structure_id
    for structure_id in hierarchy:
        parent_id = hierarchy[structure_id]['parent_structure_id']
        if parent_id in hierarchy:
            hierarchy[parent_id]['children'].append(structure_id)

    # 3. Handle root
    root_ids = set(df_structures['parent_structure_id']) - all_structure_ids

    # 4. Aggregate data (bottom-up, level by level - still needed as levels are from path, aggregation is data-driven)
    max_level = max(node['level'] for node in hierarchy.values()) if hierarchy else 0
    for level in range(max_level, -1, -1):
        for structure_id in hierarchy:
            if hierarchy[structure_id]['level'] == level:
                node = hierarchy[structure_id]
                if not node['children']: # Children nodes: get data directly
                    data = get_structure_data(df_quanti, node['name'])
                    node['total_intensity'] = data['total_intensity']
                    node['total_volume'] = data['volume_mm3']
                    node['intensity_per_volume'] = data['intensity_per_volume']
                else: # Parent nodes: aggregate from children
                    total_intensity = 0
                    total_volume = 0
                    for child_id in node['children']:
                        total_intensity += hierarchy[child_id]['total_intensity']
                        total_volume += hierarchy[child_id]['total_volume']
                    node['total_intensity'] = total_intensity
                    node['total_volume'] = total_volume
                    node['intensity_per_volume'] = calculate_intensity_per_volume(total_intensity, total_volume)

    return hierarchy

hierarchy = build_hierarchy_from_path(df_structures.copy(), df_quanti.copy())


# --- Plotting Function (per Level) ---

def plot_level(hierarchy, level, cmap, norm):
    # Filter structures for the current level
    level_structures = [
        (structure_id, data) for structure_id, data in hierarchy.items()
        if data['level'] == level and data['total_volume'] > 0
    ]

    if not level_structures:
        print(f"No structures to plot at level {level}.")
        return

    df_level = pd.DataFrame([
        {'structure_id': id, 'structure_name': data['name'], 'acronym': data['acronym'],
         'intensity_per_volume': data['intensity_per_volume'], 'total_intensity': data['total_intensity'],
         'total_volume': data['total_volume']}
        for id, data in level_structures
    ])

    df_level_sorted = df_level.sort_values('intensity_per_volume', ascending=False)
    num_structures = len(df_level_sorted)
    if num_structures == 0:
        print(f"No structures to plot for level {level}")
        return

    structures_per_column = (num_structures + 2) // 3
    fig, axes_raw = plt.subplots(1, 3, figsize=(30, max(10, num_structures // 4)), sharey=False)

    if isinstance(axes_raw, np.ndarray):
        axes = axes_raw.flatten()
    else:
        axes = [axes_raw]


    for i in range(min(3, num_structures)):
        start_index = i * structures_per_column
        end_index = min((i + 1) * structures_per_column, num_structures)
        df_subset = df_level_sorted.iloc[start_index:end_index]

        if not df_subset.empty:
            sns.barplot(x='intensity_per_volume', y='structure_name', data=df_subset,
                        orient='h', ax=axes[i], hue='intensity_per_volume', palette=cmap, hue_norm=norm, legend=False)

            axes[i].set_ylabel('Structure Name')
            if i == 1:
                axes[i].set_xlabel('Intensity per Volume (mm³)')
            else:
                axes[i].set_xlabel('')

            axes[i].yaxis.tick_left()
            axes[i].tick_params(axis='y', labelsize=9)
            axes[i].grid(True, axis='x', alpha=0.5)
            axes[i].set_xscale('log')
            axes[i].set_xlim(df_level_sorted['intensity_per_volume'].min(), df_level_sorted['intensity_per_volume'].max() * 2)
        else:
            fig.delaxes(axes[i])

    plt.suptitle(f'Intensity per Volume - Level {level} (Multi-Column, Log Scale)', fontsize=16)
    plt.tight_layout(rect=[0.03, 0.03, 0.97, 0.97])
    plt.show()


# --- Main Plotting Loop ---
norm = mcolors.LogNorm(vmin=df_quanti['intensity_per_volume'].min(), vmax=df_quanti['intensity_per_volume'].max())
cmap = plt.get_cmap('plasma')

max_level = max(node['level'] for node in hierarchy.values()) if hierarchy else 0

for level in range(max_level + 1):
    plot_level(hierarchy, level, cmap, norm)