In [2]:
# Import the modules
from pajek_converter import connections_to_pajek, get_available_neurotransmitters
from infomap_runner import run_complete_infomap_pipeline  
from connectome_gui import create_infomap_gui, quick_analysis

# Use the GUI
create_infomap_gui("connections_princeton.csv")

# Or run programmatically
#results = quick_analysis("connections_princeton.csv", ["GABA"], min_synapses=5)

VBox(children=(HTML(value="<h2 style='color: #333; margin-bottom: 5px;'>🧬 Drosophila Connectome Infomap Analys…

In [2]:
# Import the analysis functions
from module_parser import parse_infomap_modules, get_module_summary, filter_modules_by_size
from module_visualizer import create_3d_module_plot, create_interactive_module_viewer, plot_module_size_distribution

import pandas as pd
import numpy as np

In [3]:
# Example with your current file structure
selected_nts = ['gaba']  # or whatever neurotransmitters you used
threshold = 5  # or your threshold value

# Build file paths (matching your current structure)
nt_clean = "_".join(sorted(nt.lower() for nt in selected_nts))
output_dir = f"gui_output_{nt_clean}_thresh{threshold}"
tree_file = f"{output_dir}/{nt_clean}_thresh{threshold}_graph.tree"
pajek_file = f"{output_dir}/{nt_clean}_thresh{threshold}_graph.net"
coords_file = "coordinates.csv"

# Parse the module data
modules_df = parse_infomap_modules(
    tree_file=tree_file,
    pajek_file=pajek_file,
    coords_file=coords_file,
    max_levels=3  # Extract up to 3 hierarchical levels
)

print(f"Parsed {len(modules_df)} neurons")
print(f"Available columns: {list(modules_df.columns)}")
print(f"Sample data:\n{modules_df.head()}")

Parsed 211346 neurons
Available columns: ['neuron_id', 'module_path', 'level_1', 'level_2', 'level_3', 'root_id', 'position', 'supervoxel_id', 'x', 'y', 'z']
Sample data:
            neuron_id module_path  level_1  level_2  level_3  \
0  720575940614765777     1:1:1:1        1        1      1.0   
1  720575940614765777     1:1:1:1        1        1      1.0   
2  720575940614765777     1:1:1:1        1        1      1.0   
3  720575940613449256     1:1:1:2        1        1      1.0   
4  720575940613449256     1:1:1:2        1        1      1.0   

              root_id                position      supervoxel_id         x  \
0  720575940614765777  [829172 191580 196200]  84867523581250014  829172.0   
1  720575940614765777  [798004 200304 197920]  84304711066811153  798004.0   
2  720575940614765777  [794796 201024 198880]  84304711066836926  794796.0   
3  720575940613449256  [775136 250528 152880]  83953691845275910  775136.0   
4  720575940613449256  [683044 349776 141880]  8233685

In [5]:
# Filter to keep only large modules
filtered_df, large_module_ids = filter_modules_by_size(
    modules_df, 
    level=1, 
    min_size=20  # Only modules with ≥20 neurons
)

print(f"Kept {len(large_module_ids)} modules with ≥20 neurons")
print(f"Large modules: {large_module_ids}")
print(f"Filtered dataset: {len(filtered_df)} neurons")

Kept 7 modules with ≥20 neurons
Large modules: [2, 1, 3, 6, 4, 7, 5]
Filtered dataset: 211269 neurons


In [6]:
# Simple function call to visualize modules
fig = create_3d_module_plot(modules_df, level=1, min_module_size=10)
fig.show()

In [7]:
# Interactive widget with all controls
create_interactive_module_viewer(
    modules_df,
    neurotransmitters=selected_nts,
    threshold=threshold,
    default_min_size=10
)

VBox(children=(HTML(value='<h3>📊 Interactive Module Viewer</h3>'), HBox(children=(RadioButtons(description='Hi…

In [8]:
# Histogram of module sizes
fig_dist = plot_module_size_distribution(modules_df, level=1, bins=30)
fig_dist.show()

# Compare distributions across levels
import plotly.graph_objects as go
fig_compare = go.Figure()

for level in [1, 2]:
    level_col = f"level_{level}"
    if level_col in modules_df.columns:
        sizes = modules_df[level_col].value_counts().values
        fig_compare.add_trace(go.Histogram(
            x=sizes,
            name=f"Level {level}",
            opacity=0.7,
            nbinsx=20
        ))

fig_compare.update_layout(
    title="Module Size Distribution Comparison",
    xaxis_title="Neurons per Module",
    yaxis_title="Number of Modules",
    barmode='overlay'
)
fig_compare.show()

In [9]:
# Simple one-step usage
from id_matcher import diagnose_and_match, get_classification_summary

# This will automatically diagnose and apply the best matching strategy
merged_df = diagnose_and_match(modules_df, "classification.csv")

if merged_df is not None:
    print("✅ Successfully merged module and classification data!")
    summary = get_classification_summary(merged_df)
else:
    print("❌ Could not match IDs automatically")

🔍 COMPREHENSIVE ID MATCHING DIAGNOSTIC
✓ Loaded classification file: 139255 entries

📊 DATASET STRUCTURES:
Modules DataFrame columns: ['neuron_id', 'module_path', 'level_1', 'level_2', 'level_3', 'root_id', 'position', 'supervoxel_id', 'x', 'y', 'z']
Classification DataFrame columns: ['root_id', 'flow', 'super_class', 'class', 'sub_class', 'hemilineage', 'side', 'nerve']

🔍 ID FORMAT ANALYSIS:
Potential ID columns in modules data: ['neuron_id', 'root_id', 'supervoxel_id']
  neuron_id: ['720575940614765777', '720575940614765777', '720575940614765777', '720575940613449256', '720575940613449256'] (type: str)
  root_id: [720575940614765777, 720575940614765777, 720575940614765777, 720575940613449256, 720575940613449256] (type: int64)
  supervoxel_id: [84867523581250014, 84304711066811153, 84304711066836926, 83953691845275910, 82336859929819537] (type: int64)

Classification root_id: [720575940596125868, 720575940597856265, 720575940597944841, 720575940598267657, 720575940599333574] (type: i

In [10]:
# Step-by-step usage
from id_matcher import diagnose_id_matching, apply_id_matching

# First diagnose what matching strategy works best
strategy = diagnose_id_matching(modules_df, "classification.csv", "connections_princeton.csv")

if strategy and strategy != 'connections_rebuild':
    # Apply the strategy
    merged_df = apply_id_matching(modules_df, "classification.csv", strategy)
    print(f"Merged data shape: {merged_df.shape}")

🔍 COMPREHENSIVE ID MATCHING DIAGNOSTIC
✓ Loaded classification file: 139255 entries

📊 DATASET STRUCTURES:
Modules DataFrame columns: ['neuron_id', 'module_path', 'level_1', 'level_2', 'level_3', 'root_id', 'position', 'supervoxel_id', 'x', 'y', 'z']
Classification DataFrame columns: ['root_id', 'flow', 'super_class', 'class', 'sub_class', 'hemilineage', 'side', 'nerve']

🔍 ID FORMAT ANALYSIS:
Potential ID columns in modules data: ['neuron_id', 'root_id', 'supervoxel_id']
  neuron_id: ['720575940614765777', '720575940614765777', '720575940614765777', '720575940613449256', '720575940613449256'] (type: str)
  root_id: [720575940614765777, 720575940614765777, 720575940614765777, 720575940613449256, 720575940613449256] (type: int64)
  supervoxel_id: [84867523581250014, 84304711066811153, 84304711066836926, 83953691845275910, 82336859929819537] (type: int64)

Classification root_id: [720575940596125868, 720575940597856265, 720575940597944841, 720575940598267657, 720575940599333574] (type: i

In [11]:
from module_quality_analyzer import analyze_top_modules

# Get exactly 4 charts: sizes + rankings + super_class + flow
results, figures = analyze_top_modules(merged_df, level='level_1', top_n=8)

🔍 ANALYZING TOP MODULES (LEVEL_1)
   Selected top 7 modules
   Coverage: 100.0% of network

📈 Module Sizes



📈 Module Rankings



📈 Composition by Super Class



📈 Composition by Flow


In [None]:
# Topological Data Analysis (TDA) - Example Analysis
from persistence_analysis import create_tda_gui, quick_tda_analysis

print("🔬 Running TDA Analysis Examples")
print("=" * 40)

# Method 1: Interactive GUI for exploratory analysis
print("\n1. Interactive TDA GUI:")
print("   - Allows you to select neuron subsets")
print("   - Choose between neuron count or specific modules")
print("   - Interactive persistence diagram visualization")

try:
    create_tda_gui(
        modules_df=modules_df,
        output_dir=output_dir,
        nt_types=selected_nts,
        threshold=threshold
    )
    print("✅ TDA GUI created successfully!")
except ImportError as e:
    print(f"⚠️  GUDHI library required for TDA analysis: {e}")
    print("   Install with: pip install gudhi")
except Exception as e:
    print(f"❌ Error creating TDA GUI: {e}")

print("\n" + "="*40)

# Method 2: Programmatic analysis - analyze first 100 neurons  
print("\n2. Quick TDA Analysis (100 neurons):")
try:
    result, fig = quick_tda_analysis(
        modules_df=modules_df,
        output_dir=output_dir, 
        nt_types=selected_nts,
        threshold=threshold,
        subset_type='count',
        subset_value=100
    )
    
    # Display the persistence diagram
    fig.show()
    
    # Print summary statistics
    persistence_diagram = result['persistence_diagram']
    if len(persistence_diagram) > 0:
        lifetimes = [death - birth for birth, death in persistence_diagram if death != float('inf')]
        print(f"\n📊 TDA Results Summary:")
        print(f"   • Total H1 homology groups: {len(persistence_diagram)}")
        print(f"   • Finite persistence groups: {len(lifetimes)}")
        print(f"   • Infinite persistence groups: {len(persistence_diagram) - len(lifetimes)}")
        if lifetimes:
            print(f"   • Average lifetime: {np.mean(lifetimes):.2f}")
            print(f"   • Max lifetime: {max(lifetimes):.2f}")
    else:
        print("   • No H1 homology groups detected")
        
except ImportError as e:
    print(f"⚠️  GUDHI library required for TDA analysis: {e}")
    print("   Install with: pip install gudhi")
except Exception as e:
    print(f"❌ Error running TDA analysis: {e}")

print("\n" + "="*40)

# Method 3: Analyze specific module (if you want to focus on a particular module)
print("\n3. Module-specific TDA Analysis:")
print("   Example: Analyze the largest module at level 1")

try:
    # Find the largest module at level 1
    module_sizes = modules_df['level_1'].value_counts()
    if len(module_sizes) > 0:
        largest_module = module_sizes.index[0]
        largest_module_size = module_sizes.iloc[0]
        
        print(f"   • Analyzing module {largest_module} ({largest_module_size} neurons)")
        
        # Only analyze if module is not too large (to avoid memory issues)
        if largest_module_size <= 500:
            result_module, fig_module = quick_tda_analysis(
                modules_df=modules_df,
                output_dir=output_dir,
                nt_types=selected_nts, 
                threshold=threshold,
                subset_type='module',
                subset_value=largest_module,
                level=1
            )
            
            fig_module.show()
            
            # Summary for this module
            persistence_diagram = result_module['persistence_diagram']
            print(f"   • H1 groups in module {largest_module}: {len(persistence_diagram)}")
        else:
            print(f"   • Module too large ({largest_module_size} neurons) - skipping for performance")
            print(f"   • Consider using the GUI to analyze smaller subsets")
        
    else:
        print("   • No modules found in data")
        
except ImportError as e:
    print(f"⚠️  GUDHI library required: {e}")
except Exception as e:
    print(f"   ❌ Error in module analysis: {e}")

print(f"\n✅ TDA analysis examples complete!")
print(f"💡 TIP: Use the interactive GUI above to explore different neuron subsets")
print(f"💡 NOTE: Install GUDHI library with 'pip install gudhi' for full TDA functionality")

🔬 Running TDA Analysis Examples

1. Interactive TDA GUI:
   - Allows you to select neuron subsets
   - Choose between neuron count or specific modules
   - Interactive persistence diagram visualization


VBox(children=(HTML(value="<h2 style='color: #333;'>🔬 Topological Data Analysis</h2>"), HTML(value='<hr>'), HT…

✅ TDA GUI created successfully!


2. Quick TDA Analysis (100 neurons):
🔬 QUICK TDA ANALYSIS
Loading connectome data...
Loaded 836318 connections
Selecting 100 well-connected neurons...
Loaded 836318 connections
Selecting 100 well-connected neurons...
Selected 100 well-connected neurons

Running TDA analysis...
🔬 Running TDA Analysis...
  - Using subset of 100 neurons
  - Filtered to 421 connections within subset
  - Processing connectivity matrix of size (100, 100)
  - Created distance matrix (max weight: 1856)
  - Added 100 vertices and 4950 edges
  - Expanding complex to include higher-dimensional simplices...
  - Complex has 166750 total simplices
  - Computing persistence homology...
  - Found 11 H1 homology groups
✅ Found 11 H1 homology groups

✅ Analysis complete!
Found 11 H1 homology groups
Selected 100 well-connected neurons

Running TDA analysis...
🔬 Running TDA Analysis...
  - Using subset of 100 neurons
  - Filtered to 421 connections within subset
  - Processing connectivit


📊 TDA Results Summary:
   • Total H1 homology groups: 11
   • Finite persistence groups: 11
   • Infinite persistence groups: 0
   • Average lifetime: 8.09
   • Max lifetime: 18.00


3. Module-specific TDA Analysis:
   Example: Analyze the largest module at level 1
   • Analyzing module 2 (67502 neurons)
   • Module too large (67502 neurons) - skipping for performance
   • Consider using the GUI to analyze smaller subsets

✅ TDA analysis examples complete!
💡 TIP: Use the interactive GUI above to explore different neuron subsets
💡 NOTE: Install GUDHI library with 'pip install gudhi' for full TDA functionality
