Analysis of TOPAS-nBio simulations of cells with nanoparticles

This notebook analyses the results of the simulations of TOPAS and TOPAS-nbio of cells with nanoparticles irradiated with I125 radiation source.

In [1]:
import sys
import os
import pathlib
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pprint

# Configure matplotlib to display plots inline within the notebook
#%matplotlib inline

# Make sure plots display in the notebook and not in separate windows
from IPython.display import display
import matplotlib as mpl

# Set default figure size and style for better display in notebook
plt.rcParams['figure.figsize'] = (10, 6)
plt.rcParams['figure.dpi'] = 100
plt.style.use('seaborn-v0_8-whitegrid')  # Modern style for better visualization

# Force plots to be rendered within notebook
mpl.rcParams['figure.max_open_warning'] = 0  # Avoid warnings about too many open figures
mpl.rcParams['figure.raise_window'] = False  # Prevent figures from raising windows

# Import our custom modules
from dnadamage_phsp_manager import *
from chemistry_output_manager import *
from collections import defaultdict

## Notebook Functions

This notebook processes the results of multiple TOPAS simulations of cells with nanoparticles. It uses the following imported functions:

- `process_csv_file`: Processes TOPAS CSV files and extracts sum values and histories
- `process_original_hists`: Extracts history values from TOPAS simulation files
- `count_phsp_particles`: Counts particles in phase space files

These functions are organized in the appropriate module files:
- `topas_csv_files_manager.py`: Contains functions for processing TOPAS CSV files
- `phsp_manager.py`: Contains functions for processing phase space files

The notebook processes multiple runs and aggregates results for:
- Original histories
- Dose to nucleus (physical and chemical phases)
- Energy deposited in the cell
- Nanoparticle electron emissions

In [2]:
from analize_cell_sim_results import multirun_processing

# Set parameters for multirun processing
nruns = 100
filebase = '../TOPAS_CellsNPs/work/only_results_CellColony-med5-cell5/cell1'

# Process all runs and get results
Cell_results = multirun_processing(nruns, filebase)


Processing run 1/100...
Error processing GValues for run 1: [Errno 2] No such file or directory: '../TOPAS_CellsNPs/work/only_results_CellColony-med5-cell5/cell1/run1/IRTGValue.phsp'
Processing run 2/100...
Error processing GValues for run 2: [Errno 2] No such file or directory: '../TOPAS_CellsNPs/work/only_results_CellColony-med5-cell5/cell1/run2/IRTGValue.phsp'
Processing run 3/100...
Error processing GValues for run 3: [Errno 2] No such file or directory: '../TOPAS_CellsNPs/work/only_results_CellColony-med5-cell5/cell1/run3/IRTGValue.phsp'
Processing run 4/100...
Error processing GValues for run 4: [Errno 2] No such file or directory: '../TOPAS_CellsNPs/work/only_results_CellColony-med5-cell5/cell1/run4/IRTGValue.phsp'
Processing run 5/100...
Error processing GValues for run 5: [Errno 2] No such file or directory: '../TOPAS_CellsNPs/work/only_results_CellColony-med5-cell5/cell1/run5/IRTGValue.phsp'
Processing run 6/100...
Error processing GValues for run 6: [Errno 2] No such file or

In [3]:
def display_results(results):
    """Display processed results with proper formatting and error reporting."""
    print("\nResults Summary:")
    print("-" * 50)
    
    # Display physical quantities
    print("\nPhysical Quantities:")
    print("-" * 50)
    
    # Display particle counts
    print(f"Original histories: {results['Original_hists']['value']:,}")
    print(f"Nanoparticle electrons: {results['NP_el']['value']:,} particles")
    
    # Display dose and energy measurements
    for key in ['DoseToNucl_ph2', 'DoseToNucl_ph3', 'Ecell']:
        print(f"\n{key}:")
        print(f"  - Value: {results[key]['value']:.6e} ± {results[key]['error']:.6e} (2σ)")
    
    # Display chemical phase results 
    print("\nChemical Phase Results (G-Values):")
    print("-" * 50)
    
    # Sort species by G-Value for better presentation
    species_data = [(species, data['value'], data['error']) 
                   for species, data in results['GValues'].items() 
                   if 'value' in data]
    species_data.sort(key=lambda x: x[1], reverse=True)
    
    for species, value, error in species_data:
        print(f"\n{species}:")
        print(f"  - G-Value: {value:.4f} ± {error:.4f} molecules/100eV (2σ)")
    
    # Display DNA damage results if available
    if 'DNADamage' in results:
        print("\nDNA Damage Results:")
        print("-" * 50)
        
        
        if 'Dose' in results['DNADamage']:
            dnadose = results['DNADamage']['Dose']
            print(f"Total dose deposited: {dnadose:.2f} Gy")
        
        # Group and display damage statistics
        damage_keys = ['DSB', 'DSB_Direct', 'DSB_Indirect', 'DSB_Hybrid', 'SSB', 'SSB_Direct', 'SSB_Indirect', 
                         'SB', 'SB_Direct', 'SB_Indirect', 'BD', 'BD_Direct', 'BD_Indirect', 'DSB_positions', 
                         'Number_of_foci', 'Complexity2', 'Complexity3', 'Complexity4', 'Complexity5', 'Complexity6',
                         'Complexity7', 'Complexity8', 'Complexity9', 'Complexity10', 'Complexity11', 'Complexity12', 
                         'Complexity13', 'Complexity14', 'Complexity15']
        
        
        for dmg in damage_keys:
            if dmg in results['DNADamage'].keys():
                val = results['DNADamage'][dmg]
                print(f"\n{dmg}: {val:.0f}")


In [4]:
def plot_damage_distribution(damage_totals):
    """Create a stacked bar plot showing direct vs indirect damage distribution."""
    damage_pairs = [
        ('DSB_Direct', 'DSB_Indirect'),
        ('SSB_Direct', 'SSB_Indirect'),
        ('SB_Direct', 'SB_Indirect'),
        ('BD_Direct', 'BD_Indirect')
    ]
    
    valid_pairs = [(direct, indirect) for direct, indirect in damage_pairs 
                  if direct in damage_totals and indirect in damage_totals]
    
    if valid_pairs:
        fig, ax = plt.subplots(figsize=(10, 6))
        bar_width = 0.35
        x = np.arange(len(valid_pairs))
        labels = [pair[0].split('_')[0] for pair in valid_pairs]
        
        # Plot stacked bars with consistent colors
        direct_color = '#1f77b4'  # blue
        indirect_color = '#ff7f0e'  # orange
        for i, (direct, indirect) in enumerate(valid_pairs):
            direct_sum = damage_totals[direct]
            indirect_sum = damage_totals[indirect]
            ax.bar(i, direct_sum, bar_width, color=direct_color, label='Direct' if i == 0 else "")
            ax.bar(i, indirect_sum, bar_width, bottom=direct_sum, color=indirect_color, label='Indirect' if i == 0 else "")
        
        # Add value labels
        for i, (direct, indirect) in enumerate(valid_pairs):
            direct_sum = damage_totals[direct]
            indirect_sum = damage_totals[indirect]
            total = direct_sum + indirect_sum
            
            # Display direct values in middle of direct bar
            ax.text(i, direct_sum/2, f'{direct_sum:.0f}', ha='center', va='center', color='white', fontweight='bold')
            
            # Display indirect values in middle of indirect bar
            ax.text(i, direct_sum + indirect_sum/2, f'{indirect_sum:.0f}', ha='center', va='center', color='white', fontweight='bold')
            
            # Display total on top
            ax.text(i, total + 0.5, f'Total: {total:.0f}', ha='center', va='bottom')
        
        ax.set_xlabel('Damage Type', fontsize=12)
        ax.set_ylabel('Count', fontsize=12)
        ax.set_title('Direct vs Indirect Damage Distribution', fontsize=14)
        ax.set_xticks(x)
        ax.set_xticklabels(labels)
        ax.legend(fontsize=10)
        ax.grid(axis='y', alpha=0.3)
        
        plt.tight_layout()
        # Explicitly display the figure in the notebook
        plt.show()

# Create plot of G-values
def plot_gvalues(gvalue_results):
    """Create a horizontal bar plot showing G-values for chemical species."""
    # Convert results to dataframe format
    data = {
        'Species': [],
        'GValue': [],
        'Error': []
    }
    
    for species, result in gvalue_results.items():
        if 'value' in result:
            data['Species'].append(species)
            data['GValue'].append(result['value'])
            data['Error'].append(result['error'] / 2)  # Convert from 2σ to 1σ for error bars
    
    # Convert to DataFrame and sort by GValue
    df = pd.DataFrame(data)
    df = df.sort_values('GValue')
    
    # Create the plot
    fig, ax = plt.subplots(figsize=(10, 8))  # Taller figure for better species label display
    
    # Use categorical colormap based on value
    colors = plt.cm.viridis(df['GValue'] / df['GValue'].max())
    
    bars = ax.barh(
        df['Species'],
        df['GValue'],
        xerr=df['Error'],
        align='center',
        ecolor='black',
        capsize=3,
        color=colors,
        alpha=0.7
    )
    
    # Add value annotations
    for i, (bar, value) in enumerate(zip(bars, df['GValue'])):
        ax.text(value + df['Error'][i] + 0.05, i, 
                f'{value:.3f}', 
                va='center', fontsize=9)
    
    ax.set_xlabel('G-Value (molecules / 100 eV)', fontsize=12)
    ax.set_title('Chemical Species Production (G-Values)', fontsize=14)
    ax.grid(True, alpha=0.3, axis='x')
    
    # Add a colorbar legend
    sm = plt.cm.ScalarMappable(cmap=plt.cm.viridis, 
                              norm=plt.Normalize(vmin=0, vmax=df['GValue'].max()))
    sm.set_array([])
    cbar = plt.colorbar(sm, ax=ax, orientation='vertical', pad=0.01)
    cbar.set_label('G-Value Magnitude', fontsize=10)
    
    plt.tight_layout()
    # Explicitly display the figure in the notebook
    plt.show()

# Display the results
display_results(Cell_results)

# Plot damage distribution if DNA damage data is available
if 'DNADamage' in Cell_results:
    plot_damage_distribution(Cell_results['DNADamage'])

# Plot G-values if chemical species data is available    
if any('value' in data for data in Cell_results['GValues'].values()):
    plot_gvalues(Cell_results['GValues'])


Results Summary:
--------------------------------------------------

Physical Quantities:
--------------------------------------------------
Original histories: 60,000,000
Nanoparticle electrons: 17,375 particles

DoseToNucl_ph2:
  - Value: 4.054261e+00 ± 2.997430e-07 (2σ)

DoseToNucl_ph3:
  - Value: 4.134780e+00 ± 6.429610e-07 (2σ)

Ecell:
  - Value: 3.185680e+01 ± 1.687563e-06 (2σ)

Chemical Phase Results (G-Values):
--------------------------------------------------

OH^-1:
  - G-Value: 0.0000 ± 0.0000 molecules/100eV (2σ)

H2O2^0:
  - G-Value: 0.0000 ± 0.0000 molecules/100eV (2σ)

e_aq^-1:
  - G-Value: 0.0000 ± 0.0000 molecules/100eV (2σ)

H^0:
  - G-Value: 0.0000 ± 0.0000 molecules/100eV (2σ)

H3O^1:
  - G-Value: 0.0000 ± 0.0000 molecules/100eV (2σ)

OH^0:
  - G-Value: 0.0000 ± 0.0000 molecules/100eV (2σ)

H_2^0:
  - G-Value: 0.0000 ± 0.0000 molecules/100eV (2σ)

DNA Damage Results:
--------------------------------------------------
Total dose deposited: 4.05 Gy

DSB: 113

DSB_Di

## Multi-Cell Analysis

This section demonstrates processing multiple cells and computing statistics across them.

In [2]:
def plot_chemical_species_violin(all_cell_results):
    """Create violin plot for chemical species G-values across cells."""
    # Prepare data
    species_data = defaultdict(list)
    for cell_results in all_cell_results:
        for species, data in cell_results['GValues'].items():
            species_data[species].append(data['value'])
    
    # Convert to DataFrame and create violin plot
    df = pd.DataFrame(species_data)
    
    fig, ax = plt.subplots(figsize=(14, 8))
    
    # Get a colormap for different species
    import matplotlib.cm as cm
    colors = cm.tab10(np.linspace(0, 1, len(df.columns)))
    
    violin_parts = ax.violinplot(
        [df[col].values for col in df.columns],
        showmeans=True, 
        showmedians=True,
        vert=True
    )
    
    # Customize violin plot
    ax.set_xticks(range(1, len(df.columns) + 1))
    ax.set_xticklabels(df.columns, rotation=45, ha='right', fontsize=10)
    ax.set_ylabel('G-Value (molecules/100eV)', fontsize=12)
    ax.set_title('Distribution of G-Values Across Cells', fontsize=14)
    
    ax.grid(True, alpha=0.3, axis='y', linestyle='--')
    
    # Color the violins
    for i, (pc, color) in enumerate(zip(violin_parts['bodies'], colors)):
        pc.set_facecolor(color)
        pc.set_alpha(0.7)
        pc.set_edgecolor('black')
        pc.set_linewidth(1)
    
    # Color the median and mean lines
    for partname, part in violin_parts.items():
        if partname != 'bodies':
            if partname == 'cmeans':
                part.set_edgecolor('red')
                part.set_linewidth(1.5)
            elif partname == 'cmedians':
                part.set_edgecolor('black')
                part.set_linewidth(1.5)
            
    # Add a legend
    from matplotlib.lines import Line2D
    legend_elements = [
        Line2D([0], [0], color='red', lw=1.5, label='Mean'),
        Line2D([0], [0], color='black', lw=1.5, label='Median')
    ]
    ax.legend(handles=legend_elements, loc='upper right')
    
    # Add annotations for mean values
    for i, col in enumerate(df.columns):
        mean_val = df[col].mean()
        ax.text(i+1, mean_val, f'{mean_val:.3f}', 
                ha='center', va='bottom', fontsize=8, 
                bbox=dict(facecolor='white', alpha=0.5, edgecolor='none'))
    
    plt.tight_layout()
    # Explicitly display the figure in the notebook
    plt.show()

def plot_dna_damage_violin(all_cell_results):
    """Create violin plot for DNA damage across cells."""
    # Define damage types and their attributes
    damage_types = {
        'DSB': {'color': '#1f77b4', 'label': 'Double Strand Breaks'},
        'SSB': {'color': '#ff7f0e', 'label': 'Single Strand Breaks'},
        'SB': {'color': '#2ca02c', 'label': 'Strand Breaks'},
        'BD': {'color': '#d62728', 'label': 'Base Damage'}
    }
    
    # Collect data
    damage_data = defaultdict(list)
    for cell_results in all_cell_results:
        for damage_type in damage_types.keys():
            damage_data[damage_type].append(cell_results['DNADamage'][damage_type])
    
    # Create plot
    fig, ax = plt.subplots(figsize=(12, 7))
    df = pd.DataFrame(damage_data)
    
    # Create violin plots with enhanced styling
    violin_parts = ax.violinplot(
        [df[col].values for col in df.columns], 
        showmeans=True, 
        showmedians=True,
        vert=True
    )
    
    # Customize plot
    ax.set_xticks(range(1, len(df.columns) + 1))
    ax.set_xticklabels([damage_types[col]['label'] for col in df.columns], fontsize=11)
    ax.set_ylabel('Number of Events', fontsize=12)
    ax.set_title('Distribution of DNA Damage Events Across Cells', fontsize=14)
    
    # Color the violins
    for i, pc in enumerate(violin_parts['bodies']):
        damage_type = list(damage_types.keys())[i]
        pc.set_facecolor(damage_types[damage_type]['color'])
        pc.set_alpha(0.7)
        pc.set_edgecolor('black')
        pc.set_linewidth(1)
    
    # Style mean and median lines
    violin_parts['cmeans'].set_edgecolor('red')
    violin_parts['cmeans'].set_linewidth(1.5)
    violin_parts['cmedians'].set_edgecolor('black')
    violin_parts['cmedians'].set_linewidth(1.5)
    
    # Add a legend for mean and median
    from matplotlib.lines import Line2D
    legend_elements = [
        Line2D([0], [0], color='red', lw=1.5, label='Mean'),
        Line2D([0], [0], color='black', lw=1.5, label='Median')
    ]
    ax.legend(handles=legend_elements, loc='upper right')
    
    # Add mean value annotations
    for i, col in enumerate(df.columns):
        mean_val = df[col].mean()
        ax.text(i+1, mean_val, f'{mean_val:.1f}', 
                ha='center', va='bottom', fontsize=9,
                bbox=dict(facecolor='white', alpha=0.5, edgecolor='none'))
    
    ax.grid(True, alpha=0.3, axis='y', linestyle='--')
    plt.tight_layout()
    # Explicitly display the figure in the notebook
    plt.show()

def display_multicell_results(all_cell_results, multicell_stats):
    """Display comprehensive results from multicell analysis including tables and plots.
    
    Args:
        all_cell_results: List of results from each cell
        multicell_stats: Aggregated statistics across cells
    """
    # Create DataFrame with results table
    data = []

    # Define all columns
    columns = [
        'Cell',
        'DoseToNucl_ph2 (Gy)', 
        'DoseToNucl_ph3 (Gy)',
        'Energy to Cell (MeV)',
        'NP electrons',
        'DSB', 'SSB', 'SB', 'BD'
    ]

    # Add G-Value columns for each species
    species_list = list(all_cell_results[0]['GValues'].keys())
    gvalue_columns = [f'G({species})' for species in species_list]
    columns.extend(gvalue_columns)

    # Collect data for each cell
    for i, cell_results in enumerate(all_cell_results):
        row = [
            f'Cell {i+1}',
            cell_results['DoseToNucl_ph2']['value'],
            cell_results['DoseToNucl_ph3']['value'],
            cell_results['Ecell']['value'],
            cell_results['NP_el']['value'],
            cell_results['DNADamage']['DSB'],
            cell_results['DNADamage']['SSB'],
            cell_results['DNADamage']['SB'],
            cell_results['DNADamage']['BD']
        ]
        # Add G-Values
        for species in species_list:
            row.append(cell_results['GValues'][species]['value'])
        data.append(row)

    # Add mean values row
    mean_row = [
        'Mean',
        multicell_stats['DoseToNucl_ph2']['mean'],
        multicell_stats['DoseToNucl_ph3']['mean'],
        multicell_stats['Ecell']['mean'],
        multicell_stats['NP_el']['mean'],
        multicell_stats['DNADamage']['DSB']['mean'],
        multicell_stats['DNADamage']['SSB']['mean'],
        multicell_stats['DNADamage']['SB']['mean'],
        multicell_stats['DNADamage']['BD']['mean']
    ]
    # Add G-Value means
    for species in species_list:
        mean_row.append(multicell_stats['GValues'][species]['mean'])
    data.append(mean_row)

    # Add standard deviation row
    error_row = [
        'Uncertainty',
        multicell_stats['DoseToNucl_ph2']['error'],
        multicell_stats['DoseToNucl_ph3']['error'],
        multicell_stats['Ecell']['error'],
        multicell_stats['NP_el']['error'],
        multicell_stats['DNADamage']['DSB']['error'],
        multicell_stats['DNADamage']['SSB']['error'],
        multicell_stats['DNADamage']['SB']['error'],
        multicell_stats['DNADamage']['BD']['error']
    ]
    # Add G-Value standard deviations
    for species in species_list:
        error_row.append(multicell_stats['GValues'][species]['error'])
    data.append(error_row)

    # Create DataFrame and format display
    results_df = pd.DataFrame(data, columns=columns)

    # Format numbers with appropriate precision
    def format_value(x):
        # Handle integers for specific columns
        if isinstance(x, (int, np.integer)) or (isinstance(x, float) and x.is_integer()):
            return '{:d}'.format(int(x))
        # Handle other floats
        if isinstance(x, float):
            if abs(x) < 1e-4 or abs(x) > 1e4:
                return '{:.2e}'.format(x)
            return '{:.4f}'.format(x)
        return str(x)

    # Convert integer columns to int type
    integer_columns = ['NP electrons', 'DSB', 'SSB', 'SB', 'BD']
    for col in integer_columns:
        if col in results_df.columns:
            results_df[col] = results_df[col].astype('float').round().astype('Int64')

    pd.set_option('display.float_format', format_value)

    # Create styled DataFrame for display
    styled_df = results_df.style.format(format_value)

    # Add highlights for mean and std rows
    styled_df = styled_df.set_properties(**{
        'background-color': '#f2f2f2'
    }, subset=pd.IndexSlice[results_df.index[-2:], :])

    # Display the table
    display(styled_df)

    # Plot distributions
    plot_chemical_species_violin(all_cell_results)
    plot_dna_damage_violin(all_cell_results)



In [3]:
# Process conditions without nanoparticles
from analize_cell_sim_results import multicell_processing, process_multicell_results

# Set parameters for multicell processing
n_cells = 10  # Number of cells to process
n_runs = 100  # Number of runs per cell
base_dir = '../TOPAS_CellsNPs/work/only_results_CellColony-med0-cell0'  # Base directory containing cell directories

# Process all cells and their runs
all_cell_results_med0_cell0 = multicell_processing(n_cells, n_runs, base_dir)

# Compute statistics across cells
multicell_stats_med0_cell0 = process_multicell_results(all_cell_results_med0_cell0)

# Display results for condition without nanoparticles
display_multicell_results(all_cell_results_med0_cell0, multicell_stats_med0_cell0)


Processing cell 1/10...
Processing run 100/100...
Processing complete!
Processing all 100 available directories.
Number of directories with damage data to process: 100

Processing cell 2/10...
Processing run 100/100...
Processing complete!
Processing all 100 available directories.
Number of directories with damage data to process: 100

Processing cell 3/10...
Processing run 100/100...
Processing complete!
Processing all 100 available directories.
Number of directories with damage data to process: 100

Processing cell 4/10...
Processing run 100/100...
Processing complete!
Processing all 100 available directories.
Number of directories with damage data to process: 100

Processing cell 5/10...
Processing run 100/100...
Processing complete!
Processing all 100 available directories.
Number of directories with damage data to process: 100

Processing cell 6/10...
Processing run 100/100...
Processing complete!
Processing all 100 available directories.
Number of directories with damage data to

Unnamed: 0,Cell,DoseToNucl_ph2 (Gy),DoseToNucl_ph3 (Gy),Energy to Cell (MeV),NP electrons,DSB,SSB,SB,BD,G(OH^-1),G(H2O2^0),G(e_aq^-1),G(H^0),G(H3O^1),G(OH^0),G(H_2^0)
0,Cell 1,2.9112,3.0361,19.4407,0,100,2399,2599,5620,0.948,1.2845,3.5987,1.0959,4.1617,3.7099,0.9846
1,Cell 2,2.9097,2.8671,19.1737,0,98,2181,2377,5342,2.6165,2.5874,5.1906,2.5662,7.4935,6.5081,2.1198
2,Cell 3,2.8168,2.8285,18.7245,0,106,2311,2523,5266,0.9921,1.3534,2.6065,1.1074,3.4815,3.0146,1.0624
3,Cell 4,2.8809,2.9055,18.7468,0,101,2246,2448,5319,0.9798,1.3533,2.6907,1.12,3.5535,3.0952,1.054
4,Cell 5,2.6731,2.9137,18.1798,0,109,2256,2474,5553,2.3434,2.7041,4.2912,2.3362,6.4204,5.296,2.1455
5,Cell 6,2.9093,2.9102,18.6851,0,98,2341,2537,5323,7.1396,13.1693,19.2896,12.4715,25.8314,22.9859,9.0806
6,Cell 7,2.8566,2.9872,18.821,0,108,2358,2574,5427,0.9237,1.2737,2.6134,1.0364,3.4075,2.9545,0.9909
7,Cell 8,2.9161,2.8379,19.2602,0,103,2158,2364,5192,1.0962,1.481,3.1104,1.2415,4.0817,3.5589,1.1469
8,Cell 9,3.0633,2.888,19.5372,0,97,2218,2412,5286,1.2126,1.5848,3.4932,1.292,4.5681,3.966,1.2441
9,Cell 10,2.8431,3.1057,19.1927,0,122,2489,2733,5645,0.9294,1.2456,2.6439,1.0597,3.4693,3.0167,0.9542


In [4]:
# Cell colony with 1mg NP concentration in medium and cell
base_dir = '../TOPAS_CellsNPs/work/only_results_CellColony-med1-cell1'  # Base directory containing cell directories

# Process all cells and their runs
all_cell_results_med1_cell1 = multicell_processing(n_cells, n_runs, base_dir)

# Compute statistics across cells
multicell_stats_med1_cell1 = process_multicell_results(all_cell_results_med1_cell1)

# Display results for condition without nanoparticles
display_multicell_results(all_cell_results_med1_cell1, multicell_stats_med1_cell1)


Processing cell 1/10...
Processing run 100/100...
Processing complete!
Processing all 100 available directories.
Number of directories with damage data to process: 100

Processing cell 2/10...
Processing run 100/100...
Processing complete!
Processing all 100 available directories.
Number of directories with damage data to process: 100

Processing cell 3/10...
Processing run 100/100...
Processing complete!
Processing all 100 available directories.
Number of directories with damage data to process: 100

Processing cell 4/10...
Processing run 100/100...
Processing complete!
Processing all 100 available directories.
Number of directories with damage data to process: 100

Processing cell 5/10...
Processing run 100/100...
Processing complete!
Processing all 100 available directories.
Number of directories with damage data to process: 100

Processing cell 6/10...
Processing run 100/100...
Processing complete!
Processing all 100 available directories.
Number of directories with damage data to

Unnamed: 0,Cell,DoseToNucl_ph2 (Gy),DoseToNucl_ph3 (Gy),Energy to Cell (MeV),NP electrons,DSB,SSB,SB,BD,G(OH^-1),G(H2O2^0),G(e_aq^-1),G(H^0),G(H3O^1),G(OH^0),G(H_2^0)
0,Cell 1,2.9749,3.2792,20.7634,3707,120,2529,2769,6001,1.2106,1.542,2.8639,1.3234,3.9077,3.5101,1.2868
1,Cell 2,3.1818,3.1311,21.6209,3589,102,2310,2514,5501,0.9907,1.3383,2.6597,1.1305,3.5344,3.0434,1.023
2,Cell 3,2.9688,3.1335,21.0443,3346,119,2405,2643,5823,0.9482,1.3093,2.655,1.0307,3.455,2.9599,1.0205
3,Cell 4,3.2134,3.31,21.6764,4050,111,2580,2802,5939,0.943,1.2792,2.5558,1.0511,3.3759,2.9061,0.9903
4,Cell 5,3.24,3.2949,22.2074,4199,110,2500,2720,5900,0.9489,1.2729,2.671,1.0674,3.4645,3.022,0.9924
5,Cell 6,2.7753,2.9654,20.6203,3427,108,2295,2511,5317,1.0389,1.3229,2.7986,1.1155,3.7121,3.2942,1.0756
6,Cell 7,2.9277,3.2554,21.4997,4056,97,2659,2853,5856,1.108,1.5379,2.8669,1.3182,3.8465,3.4641,1.2415
7,Cell 8,2.914,3.0708,20.9904,3031,101,2388,2590,5696,2.9489,2.6989,4.4177,3.0369,6.623,6.7229,2.7048
8,Cell 9,2.9834,3.068,21.2538,3434,119,2403,2641,5479,1.0029,1.3888,2.7637,1.1338,3.6524,3.1777,1.086
9,Cell 10,3.0858,3.2862,21.4363,3939,95,2615,2805,6203,1.5092,2.0318,3.6038,1.6223,4.8824,4.3547,1.7114


In [5]:
# Cell colony with 1mg NP concentration in medium and cell
base_dir = '../TOPAS_CellsNPs/work/only_results_CellColony-med5-cell5'  # Base directory containing cell directories

# Process all cells and their runs
all_cell_results_med5_cell5 = multicell_processing(n_cells, n_runs, base_dir)

# Compute statistics across cells
multicell_stats_med5_cell5 = process_multicell_results(all_cell_results_med5_cell5)

# Display results for condition without nanoparticles
display_multicell_results(all_cell_results_med5_cell5, multicell_stats_med5_cell5)


Processing cell 1/10...

Error processing GValues for run 1: 'NoneType' object has no attribute 'groupby'

Error processing GValues for run 2: 'NoneType' object has no attribute 'groupby'

Error processing GValues for run 3: 'NoneType' object has no attribute 'groupby'

Error processing GValues for run 4: 'NoneType' object has no attribute 'groupby'

Error processing GValues for run 5: 'NoneType' object has no attribute 'groupby'

Error processing GValues for run 6: 'NoneType' object has no attribute 'groupby'

Error processing GValues for run 7: 'NoneType' object has no attribute 'groupby'

Error processing GValues for run 8: 'NoneType' object has no attribute 'groupby'

Error processing GValues for run 9: 'NoneType' object has no attribute 'groupby'

Error processing GValues for run 10: 'NoneType' object has no attribute 'groupby'

Error processing GValues for run 11: 'NoneType' object has no attribute 'groupby'

Error processing GValues for run 12: 'NoneType' object has no attribut

Unnamed: 0,Cell,DoseToNucl_ph2 (Gy),DoseToNucl_ph3 (Gy),Energy to Cell (MeV),NP electrons,DSB,SSB,SB,BD,G(OH^-1),G(H2O2^0),G(e_aq^-1),G(H^0),G(H3O^1),G(OH^0),G(H_2^0)
0,Cell 1,4.0543,4.1348,31.8568,17375,113,3184,3410,7672,0,0,0,0,0,0,0
1,Cell 2,3.9408,4.0049,32.3219,18510,156,2981,3293,7201,0,0,0,0,0,0,0
2,Cell 3,3.6907,4.0612,31.4389,18420,146,3157,3449,7313,0,0,0,0,0,0,0
3,Cell 4,3.7703,4.0397,31.3682,18908,149,3232,3530,7409,0,0,0,0,0,0,0
4,Cell 5,3.8686,4.0685,31.5661,18473,158,3140,3456,7483,0,0,0,0,0,0,0
5,Cell 6,3.7683,3.8297,30.5416,17362,135,2966,3236,6946,0,0,0,0,0,0,0
6,Cell 7,3.7923,3.9453,30.9624,17759,108,3056,3272,7040,0,0,0,0,0,0,0
7,Cell 8,3.8328,3.8138,30.7362,16460,138,2873,3149,6797,0,0,0,0,0,0,0
8,Cell 9,3.7906,3.903,31.2787,18329,133,3011,3277,7120,0,0,0,0,0,0,0
9,Cell 10,3.7276,4.0314,31.2948,17927,129,3037,3295,7102,0,0,0,0,0,0,0


## Enhancement Analysis

Compare results between conditions with and without nanoparticles to compute enhancement ratios.

In [6]:
# Create enhancement table
def display_enhancement_table_grouped(enhancement_results):
    """Display enhancement ratios grouped in separate tables."""
    # Define column names for all tables
    columns = ['Quantity', 'Enhancement Ratio', 'Uncertainty']
    
    # ---- Group 1: Dose and Energy ----
    data_dose_energy = []
    for key in ['DoseToNucl_ph2', 'DoseToNucl_ph3', 'Ecell']:
        if key in enhancement_results['simple_quantities']:
            display_name = {
                'DoseToNucl_ph2': 'Dose to Nucleus (Phase 2)',
                'DoseToNucl_ph3': 'Dose to Nucleus (Phase 3)',
                'Ecell': 'Energy to Cell'
            }[key]
            result = enhancement_results['simple_quantities'][key]
            data_dose_energy.append([
                display_name,
                result['ratio'],
                result['uncertainty']
            ])
    
    # ---- Group 2: G-Values ----
    data_gvalues = []
    for species in enhancement_results['GValues']:
        result = enhancement_results['GValues'][species]
        data_gvalues.append([
            f'G-Value ({species})',
            result['ratio'],
            result['uncertainty']
        ])
    
    # ---- Group 3: DNA Damage (main types) ----
    data_dna_damage = []
    dna_damage_types = {
        'DSB': 'Double Strand Breaks',
        'SSB': 'Single Strand Breaks',
        'SB': 'Strand Breaks',
        'BD': 'Base Damage'
    }
    
    for damage_type, display_name in dna_damage_types.items():
        if damage_type in enhancement_results['DNADamage']:
            result = enhancement_results['DNADamage'][damage_type]
            if result['ratio'] is not None:
                data_dna_damage.append([
                    display_name,
                    result['ratio'],
                    result['uncertainty']
                ])
    
    # ---- Group 4: Complexity ----
    data_complexity = []
    complexity_pattern = re.compile(r'Complexity\d+')
    
    for damage_type in enhancement_results['DNADamage']:
        if complexity_pattern.match(damage_type):
            result = enhancement_results['DNADamage'][damage_type]
            if result['ratio'] is not None:
                data_complexity.append([
                    f'{damage_type} Damage',
                    result['ratio'],
                    result['uncertainty']
                ])
    
    # ---- Group 5: DNA Damage per Gy ----
    data_dna_damage_per_gy = []
    if 'DNADamage_per_Gy' in enhancement_results:
        for damage_type, display_name in dna_damage_types.items():
            if damage_type in enhancement_results['DNADamage_per_Gy']:
                result = enhancement_results['DNADamage_per_Gy'][damage_type]
                if result['ratio'] is not None:
                    data_dna_damage_per_gy.append([
                        f'{display_name} per Gy',
                        result['ratio'],
                        result['uncertainty']
                    ])
    
    # ---- Group 6: Complexity per Gy ----
    data_complexity_per_gy = []
    if 'DNADamage_per_Gy' in enhancement_results:
        for damage_type in enhancement_results['DNADamage_per_Gy']:
            if complexity_pattern.match(damage_type):
                result = enhancement_results['DNADamage_per_Gy'][damage_type]
                if result['ratio'] is not None:
                    data_complexity_per_gy.append([
                        f'{damage_type} Damage per Gy',
                        result['ratio'],
                        result['uncertainty']
                    ])
    
    # Sort complexity by number
    data_complexity.sort(key=lambda x: int(re.search(r'\d+', x[0]).group()))
    if data_complexity_per_gy:
        data_complexity_per_gy.sort(key=lambda x: int(re.search(r'\d+', x[0]).group()))
    
    # Function to format and style dataframes
    def format_and_style_df(data, title):
        if not data:  # Skip empty data
            return None
            
        df = pd.DataFrame(data, columns=columns)
        
        # Format the ratios and uncertainties
        def format_ratio(x):
            if isinstance(x, float):
                return f"{x:.3f}"
            return str(x)
        
        styled_df = df.style.format({
            'Enhancement Ratio': format_ratio,
            'Uncertainty': format_ratio
        })
        
        # Add coloring based on enhancement (>1 is blue, <1 is red)
        def color_enhancement(val):
            try:
                val = float(val)
                if val > 1:
                    return 'color: blue'
                elif val < 1:
                    return 'color: red'
            except:
                pass
            return ''
        
        styled_df = styled_df.applymap(color_enhancement, subset=['Enhancement Ratio'])
        
        # Display the table with title
        print(f"\n\n{title}")
        print("-" * len(title))
        display(styled_df)
        
        return df
    
    # Display all tables
    dfs = {
        "Dose and Energy Enhancement": format_and_style_df(data_dose_energy, "Dose and Energy Enhancement"),
        "G-Values Enhancement": format_and_style_df(data_gvalues, "G-Values Enhancement"),
        "DNA Damage Enhancement": format_and_style_df(data_dna_damage, "DNA Damage Enhancement"),
        "Complexity Enhancement": format_and_style_df(data_complexity, "Complexity Enhancement"),
        "DNA Damage per Gy Enhancement": format_and_style_df(data_dna_damage_per_gy, "DNA Damage per Gy Enhancement"),
        "Complexity per Gy Enhancement": format_and_style_df(data_complexity_per_gy, "Complexity per Gy Enhancement")
    }
    
    return dfs

# Maintain backward compatibility
def display_enhancement_table(enhancement_results):
    """Original function that displays all enhancement ratios in a single table."""
    data = []
    columns = ['Quantity', 'Enhancement Ratio', 'Uncertainty']
    
    # Physical quantities
    for key in ['DoseToNucl_ph2', 'DoseToNucl_ph3', 'Ecell']:
        if key in enhancement_results['simple_quantities']:
            display_name = {
                'DoseToNucl_ph2': 'Dose to Nucleus (Phase 2)',
                'DoseToNucl_ph3': 'Dose to Nucleus (Phase 3)',
                'Ecell': 'Energy to Cell'
            }[key]
            result = enhancement_results['simple_quantities'][key]
            data.append([
                display_name,
                result['ratio'],
                result['uncertainty']
            ])
    
    # DNA Damage
    damage_display = {
        'DSB': 'Double Strand Breaks',
        'SSB': 'Single Strand Breaks',
        'SB': 'Strand Breaks',
        'BD': 'Base Damage',
        'Complexity2': 'Complexity 2',
        'Complexity3': 'Complexity 3',
        'Complexity4': 'Complexity 4',
        'Complexity5': 'Complexity 5',
        'Complexity6': 'Complexity 6',
        'Complexity7': 'Complexity 7',
        'Complexity8': 'Complexity 8',
        'Complexity9': 'Complexity 9',
        'Complexity10': 'Complexity 10',
        'Complexity11': 'Complexity 11',
        'Complexity12': 'Complexity 12',
        'Complexity13': 'Complexity 13',
        'Complexity14': 'Complexity 14'      
    }
    
    for damage_type, display_name in damage_display.items():
        if damage_type in enhancement_results['DNADamage']:
            result = enhancement_results['DNADamage'][damage_type]
            if result['ratio'] is not None:
                data.append([
                    display_name,
                    result['ratio'],
                    result['uncertainty']
            ])
    
    # DNA Damage per Gy
    if 'DNADamage_per_Gy' in enhancement_results:
        for damage_type, display_name in damage_display.items():
            if damage_type in enhancement_results['DNADamage_per_Gy']:
                result = enhancement_results['DNADamage_per_Gy'][damage_type]
                if result['ratio'] is not None:
                    data.append([
                        f'{display_name} per Gy',
                        result['ratio'],
                        result['uncertainty']
                ])
    
    # G-Values
    for species in enhancement_results['GValues']:
        result = enhancement_results['GValues'][species]
        data.append([
            f'G-Value ({species})',
            result['ratio'],
            result['uncertainty']
        ])
    
    # Create DataFrame
    df = pd.DataFrame(data, columns=columns)
    
    # Format the ratios and uncertainties
    def format_ratio(x):
        if isinstance(x, float):
            return f"{x:.3f}"
        return str(x)
    
    styled_df = df.style.format({
        'Enhancement Ratio': format_ratio,
        'Uncertainty': format_ratio
    })
    
    # Add coloring based on enhancement (>1 is blue, <1 is red)
    def color_enhancement(val):
        try:
            val = float(val)
            if val > 1:
                return 'color: blue'
            elif val < 1:
                return 'color: red'
        except:
            pass
        return ''
    
    styled_df = styled_df.applymap(color_enhancement, subset=['Enhancement Ratio'])
    
    display(styled_df)

In [7]:
# Enhancement visualization functions

def create_enhancement_bar_plot(data_list, labels, errors_list, title, colors=None, scenario_labels=None):
    """Create a bar plot for a specific enhancement category with multiple scenarios.
    
    Args:
        data_list: List of lists, where each inner list contains enhancement ratio values for a scenario
        labels: List of labels for each bar
        errors_list: List of lists, where each inner list contains error values for a scenario
        title: Title for the plot
        colors: List of colors for each scenario (will use default colors if None)
        scenario_labels: List of labels for each scenario (will use "Scenario X" if None)
    """
    if not isinstance(data_list[0], list):
        # Handle the case of a single scenario (backward compatibility)
        data_list = [data_list]
        errors_list = [errors_list]
        if scenario_labels is None:
            scenario_labels = [""]
    
    n_scenarios = len(data_list)
    n_categories = len(labels)
    
    # Set up colors if not provided
    if colors is None:
        cmap = plt.cm.tab10
        colors = [cmap(i/10) for i in range(n_scenarios)]
    
    # Set up scenario labels if not provided
    if scenario_labels is None:
        scenario_labels = [f"Scenario {i+1}" for i in range(n_scenarios)]
    
    # Bar width based on number of scenarios
    bar_width = 0.7 / n_scenarios
    
    # Set up the figure
    fig, ax = plt.subplots(figsize=(12, 6))
    
    # Position adjustment for each scenario's bars
    positions = [np.arange(n_categories) - 0.35 + (i + 0.5) * bar_width for i in range(n_scenarios)]
    
    # Plot bars for each scenario
    bars_list = []
    for i in range(n_scenarios):
        data = data_list[i]
        errors = errors_list[i]
        pos = positions[i]
        
        # Skip scenarios with no data
        if len(data) == 0:
            continue
            
        bars = ax.bar(pos, data, bar_width, yerr=errors, capsize=3, 
                     color=colors[i], alpha=0.7, label=scenario_labels[i])
        bars_list.append(bars)
    
    # Add horizontal line at y=1
    ax.axhline(y=1, color='k', linestyle='--', alpha=0.3)
    
    # Customize plot
    ax.set_xticks(np.arange(n_categories))
    ax.set_xticklabels(labels, rotation=45, ha='right')
    ax.set_ylabel('Enhancement Ratio', fontsize=12)
    ax.set_title(f'{title} Enhancement', fontsize=14)
    
    # Add legend if multiple scenarios
    if n_scenarios > 1:
        ax.legend(fontsize=10)
    
    # Add value labels for each bar
    for i, bars in enumerate(bars_list):
        for j, (bar, v, err) in enumerate(zip(bars, data_list[i], errors_list[i])):
            y_pos = v + err + 0.05
            ax.text(bar.get_x() + bar.get_width()/2, y_pos, 
                   f'{v:.2f}', ha='center', va='bottom', fontsize=8,
                   rotation=45 if n_scenarios > 1 else 0)
    
    # Add grid
    ax.grid(axis='y', linestyle='--', alpha=0.3)
    
    # Adjust layout
    plt.tight_layout()
    plt.show()


def extract_enhancement_data(enhancement_results, category):
    """Extract enhancement data for a specific category.
    
    Args:
        enhancement_results: The output from compute_enhancement_ratios
        category: One of 'dose_energy', 'gvalues', 'dna_damage', 'complexity',
                 'dna_damage_per_gy', or 'complexity_per_gy'
        
    Returns:
        Tuple of (data, labels, errors)
    """
    import re
    
    if category == 'dose_energy':
        # Extract dose and energy data
        data = []
        labels = []
        errors = []
        
        for key in ['DoseToNucl_ph2', 'DoseToNucl_ph3', 'Ecell']:
            if key in enhancement_results['simple_quantities']:
                result = enhancement_results['simple_quantities'][key]
                data.append(result['ratio'])
                errors.append(result['uncertainty'])
                
                # Use more descriptive labels
                display_name = {
                    'DoseToNucl_ph2': 'Dose to Nucleus\n(Phase 2)',
                    'DoseToNucl_ph3': 'Dose to Nucleus\n(Phase 3)',
                    'Ecell': 'Energy to Cell'
                }[key]
                labels.append(display_name)
        
    elif category == 'gvalues':
        # Extract G-Values data
        data = []
        labels = []
        errors = []
        
        for species in enhancement_results['GValues']:
            result = enhancement_results['GValues'][species]
            data.append(result['ratio'])
            errors.append(result['uncertainty'])
            labels.append(f'G({species})')
    
    elif category == 'dna_damage':
        # Extract DNA Damage data
        data = []
        labels = []
        errors = []
        
        for key in ['DSB', 'SSB', 'SB', 'BD']:
            if key in enhancement_results['DNADamage']:
                result = enhancement_results['DNADamage'][key]
                data.append(result['ratio'])
                errors.append(result['uncertainty'])
                
                # Use more descriptive labels
                display_name = {
                    'DSB': 'Double Strand\nBreaks',
                    'SSB': 'Single Strand\nBreaks',
                    'SB': 'Strand\nBreaks',
                    'BD': 'Base\nDamage'
                }[key]
                labels.append(display_name)
    
    elif category == 'complexity':
        # Extract Complexity data
        data = []
        labels = []
        errors = []
        complexity_pattern = re.compile(r'Complexity\d+')
        
        # Collect complexity data
        for damage_type in enhancement_results['DNADamage']:
            if complexity_pattern.match(damage_type):
                result = enhancement_results['DNADamage'][damage_type]
                if result['ratio'] is not None:
                    data.append(result['ratio'])
                    errors.append(result['uncertainty'])
                    complexity_number = re.search(r'\d+', damage_type).group()
                    labels.append(f'Complexity {complexity_number}')
        
        # Sort by complexity number
        if data:
            # Sort all lists by the complexity number
            sort_indices = sorted(range(len(labels)), 
                                key=lambda i: int(re.search(r'\d+', labels[i]).group()))
            
            data = [data[i] for i in sort_indices]
            labels = [labels[i] for i in sort_indices]
            errors = [errors[i] for i in sort_indices]
    
    elif category == 'dna_damage_per_gy':
        # Extract DNA Damage per Gy data
        data = []
        labels = []
        errors = []
        
        if 'DNADamage_per_Gy' in enhancement_results:
            for key in ['DSB', 'SSB', 'SB', 'BD']:
                if key in enhancement_results['DNADamage_per_Gy']:
                    result = enhancement_results['DNADamage_per_Gy'][key]
                    data.append(result['ratio'])
                    errors.append(result['uncertainty'])
                    
                    # Use more descriptive labels
                    display_name = {
                        'DSB': 'Double Strand\nBreaks/Gy',
                        'SSB': 'Single Strand\nBreaks/Gy',
                        'SB': 'Strand\nBreaks/Gy',
                        'BD': 'Base\nDamage/Gy'
                    }[key]
                    labels.append(display_name)
    
    elif category == 'complexity_per_gy':
        # Extract Complexity per Gy data
        data = []
        labels = []
        errors = []
        complexity_pattern = re.compile(r'Complexity\d+')
        
        if 'DNADamage_per_Gy' in enhancement_results:
            # Collect complexity data
            for damage_type in enhancement_results['DNADamage_per_Gy']:
                if complexity_pattern.match(damage_type):
                    result = enhancement_results['DNADamage_per_Gy'][damage_type]
                    if result['ratio'] is not None:
                        data.append(result['ratio'])
                        errors.append(result['uncertainty'])
                        complexity_number = re.search(r'\d+', damage_type).group()
                        labels.append(f'Complexity {complexity_number}/Gy')
            
            # Sort by complexity number
            if data:
                # Sort all lists by the complexity number
                sort_indices = sorted(range(len(labels)), 
                                    key=lambda i: int(re.search(r'\d+', labels[i]).group()))
                
                data = [data[i] for i in sort_indices]
                labels = [labels[i] for i in sort_indices]
                errors = [errors[i] for i in sort_indices]
    
    return data, labels, errors


def plot_multi_enhancement_categories(enhancement_results_list):
    """Create plots for all enhancement categories showing multiple scenarios.
    
    Args:
        enhancement_results_list: List of outputs from compute_enhancement_ratios
    """
    # Define categories and their colors
    categories = [
        ('dose_energy', 'Dose and Energy'),
        ('gvalues', 'G-Values'),
        ('dna_damage', 'DNA Damage'),
        ('complexity', 'Complexity'),
        ('dna_damage_per_gy', 'DNA Damage per Gy'),
        ('complexity_per_gy', 'Complexity per Gy')
    ]
    
    # Get scenario labels
    scenario_labels = [er.get('scenario_label', f'Scenario {i+1}') 
                      for i, er in enumerate(enhancement_results_list)]
    
    # Set colors using colormap
    cmap = plt.cm.tab10
    colors = [cmap(i/10) for i in range(len(enhancement_results_list))]
    
    # Process each category
    for category_key, title in categories:
        all_data = []
        all_errors = []
        all_labels = set()
        
        # Collect data from each scenario for the current category
        for er in enhancement_results_list:
            data, labels, errors = extract_enhancement_data(er, category_key)
            all_data.append(data)
            all_errors.append(errors)
            all_labels.update(labels)
        
        # If no data for this category, skip
        if all(len(data) == 0 for data in all_data):
            continue
            
        # Get common labels across all scenarios (to ensure consistent ordering)
        if category_key in ['complexity', 'complexity_per_gy']:
            # For complexity categories, sort by number
            import re
            common_labels = sorted(list(all_labels), 
                                 key=lambda x: int(re.search(r'\d+', x).group()))
        else:
            # For other categories, use the predefined order
            predefined_labels = {
                'dose_energy': ['Dose to Nucleus\n(Phase 2)', 'Dose to Nucleus\n(Phase 3)', 'Energy to Cell'],
                'dna_damage': ['Double Strand\nBreaks', 'Single Strand\nBreaks', 'Strand\nBreaks', 'Base\nDamage'],
                'dna_damage_per_gy': ['Double Strand\nBreaks/Gy', 'Single Strand\nBreaks/Gy', 
                                     'Strand\nBreaks/Gy', 'Base\nDamage/Gy'],
                'gvalues': sorted(list(all_labels))  # Sort alphabetically for G-values
            }
            common_labels = predefined_labels.get(category_key, sorted(list(all_labels)))
        
        # Reorder and align data for each scenario based on common labels
        aligned_data = []
        aligned_errors = []
        
        for i, (data, errors, labels_used) in enumerate(zip(all_data, all_errors, 
                                                 [extract_enhancement_data(er, category_key)[1] 
                                                  for er in enhancement_results_list])):
            scenario_data = []
            scenario_errors = []
            
            # For each common label, find corresponding data or use NaN
            for label in common_labels:
                if label in labels_used:
                    idx = labels_used.index(label)
                    scenario_data.append(data[idx])
                    scenario_errors.append(errors[idx])
                else:
                    scenario_data.append(float('nan'))
                    scenario_errors.append(0)
            
            aligned_data.append(scenario_data)
            aligned_errors.append(scenario_errors)
        
        # Create the multi-scenario bar plot for this category
        create_enhancement_bar_plot(aligned_data, common_labels, aligned_errors, 
                                  title, colors=colors, scenario_labels=scenario_labels)

# Original plot_all_enhancement_categories function - keep for backward compatibility
def plot_all_enhancement_categories(enhancement_results):
    """Create plots for all enhancement categories for a single scenario.
    
    Args:
        enhancement_results: The output from compute_enhancement_ratios
    """
    # Define categories and their colors
    categories = [
        ('dose_energy', '#3498db', 'Dose and Energy'),            # blue
        ('gvalues', '#2ecc71', 'G-Values'),                       # green
        ('dna_damage', '#e74c3c', 'DNA Damage'),                  # red
        ('complexity', '#9b59b6', 'Complexity'),                  # purple
        ('dna_damage_per_gy', '#e67e22', 'DNA Damage per Gy'),    # orange
        ('complexity_per_gy', '#8e44ad', 'Complexity per Gy')     # dark purple
    ]
    
    # Get scenario label if available
    scenario = enhancement_results.get('scenario_label', '')
    
    # Create plots for each category
    for category, color, title in categories:
        data, labels, errors = extract_enhancement_data(enhancement_results, category)
        if data:  # Only create plot if we have data
            create_enhancement_bar_plot(data, labels, errors, title, 
                                       colors=[color], 
                                       scenario_labels=[scenario] if scenario else None)

In [8]:
from analize_cell_sim_results import compute_enhancement_ratios

# Get enhancement ratios with scenario label
enhancement_results = compute_enhancement_ratios(multicell_stats_med1_cell1, multicell_stats_med0_cell0, 
                                               scenario_label="1mg/ml NPs")


# Display enhancement tables with the new grouped approach
print(f"Enhancement Ratios: {enhancement_results['scenario_label']}")
print("-" * (len("Enhancement Ratios: ") + len(enhancement_results['scenario_label'])))
import re  # Import needed for regex pattern matching
enhancement_tables = display_enhancement_table_grouped(enhancement_results)

Enhancement Ratios: 1mg/ml NPs
------------------------------


Dose and Energy Enhancement
---------------------------


  styled_df = styled_df.applymap(color_enhancement, subset=['Enhancement Ratio'])


Unnamed: 0,Quantity,Enhancement Ratio,Uncertainty
0,Dose to Nucleus (Phase 2),1.052,0.02
1,Dose to Nucleus (Phase 3),1.086,0.017
2,Energy to Cell,1.123,0.011




G-Values Enhancement
--------------------


  styled_df = styled_df.applymap(color_enhancement, subset=['Enhancement Ratio'])


Unnamed: 0,Quantity,Enhancement Ratio,Uncertainty
0,G-Value (OH^-1),0.659,0.724
1,G-Value (H2O2^0),0.561,0.723
2,G-Value (e_aq^-1),0.603,0.615
3,G-Value (H^0),0.546,0.768
4,G-Value (H3O^1),0.609,0.631
5,G-Value (OH^0),0.627,0.673
6,G-Value (H_2^0),0.632,0.773




DNA Damage Enhancement
----------------------


  styled_df = styled_df.applymap(color_enhancement, subset=['Enhancement Ratio'])


Unnamed: 0,Quantity,Enhancement Ratio,Uncertainty
0,Double Strand Breaks,1.038,0.037
1,Single Strand Breaks,1.075,0.023
2,Strand Breaks,1.072,0.022
3,Base Damage,1.069,0.019




Complexity Enhancement
----------------------


  styled_df = styled_df.applymap(color_enhancement, subset=['Enhancement Ratio'])


Unnamed: 0,Quantity,Enhancement Ratio,Uncertainty
0,Complexity2 Damage,1.017,0.053
1,Complexity3 Damage,0.964,0.073
2,Complexity4 Damage,1.185,0.157
3,Complexity5 Damage,1.581,0.337
4,Complexity6 Damage,1.308,0.558
5,Complexity7 Damage,0.429,0.319
6,Complexity8 Damage,1.0,1.414




DNA Damage per Gy Enhancement
-----------------------------


  styled_df = styled_df.applymap(color_enhancement, subset=['Enhancement Ratio'])


Unnamed: 0,Quantity,Enhancement Ratio,Uncertainty
0,Double Strand Breaks per Gy,0.956,0.037
1,Single Strand Breaks per Gy,0.99,0.026
2,Strand Breaks per Gy,0.987,0.025
3,Base Damage per Gy,0.985,0.023




Complexity per Gy Enhancement
-----------------------------


  styled_df = styled_df.applymap(color_enhancement, subset=['Enhancement Ratio'])


Unnamed: 0,Quantity,Enhancement Ratio,Uncertainty
0,Complexity2 Damage per Gy,0.937,0.051
1,Complexity3 Damage per Gy,0.887,0.068
2,Complexity4 Damage per Gy,1.091,0.145
3,Complexity5 Damage per Gy,1.456,0.311
4,Complexity6 Damage per Gy,1.204,0.514
5,Complexity7 Damage per Gy,0.395,0.294
6,Complexity8 Damage per Gy,0.921,1.302


In [9]:
# Visualize enhancement ratios for 1mg/ml NP concentration vs control
scenario_label = enhancement_results.get('scenario_label')
print(f"\nVisualization of Enhancement Ratios: {scenario_label}")
print("-" * (len("Visualization of Enhancement Ratios: ") + len(scenario_label)))
# Can use either the old function or the new one with a single scenario
plot_all_enhancement_categories(enhancement_results)
# Equivalent using new function:
# plot_multi_enhancement_categories([enhancement_results])


Visualization of Enhancement Ratios: 1mg/ml NPs
-----------------------------------------------


In [10]:
# Get enhancement ratios with scenario label
enhancement_results = compute_enhancement_ratios(multicell_stats_med5_cell5, multicell_stats_med0_cell0,
                                               scenario_label="5mg/ml NPs")


# Display enhancement tables with the new grouped approach
print(f"Enhancement Ratios: {enhancement_results['scenario_label']}")
print("-" * (len("Enhancement Ratios: ") + len(enhancement_results['scenario_label'])))
import re  # Import needed for regex pattern matching
enhancement_tables = display_enhancement_table_grouped(enhancement_results)

Enhancement Ratios: 5mg/ml NPs
------------------------------


Dose and Energy Enhancement
---------------------------


  styled_df = styled_df.applymap(color_enhancement, subset=['Enhancement Ratio'])


Unnamed: 0,Quantity,Enhancement Ratio,Uncertainty
0,Dose to Nucleus (Phase 2),1.329,0.019
1,Dose to Nucleus (Phase 3),1.36,0.017
2,Energy to Cell,1.651,0.014




G-Values Enhancement
--------------------


  styled_df = styled_df.applymap(color_enhancement, subset=['Enhancement Ratio'])


Unnamed: 0,Quantity,Enhancement Ratio,Uncertainty
0,G-Value (OH^-1),0.0,0.0
1,G-Value (H2O2^0),0.0,0.0
2,G-Value (e_aq^-1),0.0,0.0
3,G-Value (H^0),0.0,0.0
4,G-Value (H3O^1),0.0,0.0
5,G-Value (OH^0),0.0,0.0
6,G-Value (H_2^0),0.0,0.0




DNA Damage Enhancement
----------------------


  styled_df = styled_df.applymap(color_enhancement, subset=['Enhancement Ratio'])


Unnamed: 0,Quantity,Enhancement Ratio,Uncertainty
0,Double Strand Breaks,1.31,0.059
1,Single Strand Breaks,1.335,0.025
2,Strand Breaks,1.332,0.024
3,Base Damage,1.336,0.02




Complexity Enhancement
----------------------


  styled_df = styled_df.applymap(color_enhancement, subset=['Enhancement Ratio'])


Unnamed: 0,Quantity,Enhancement Ratio,Uncertainty
0,Complexity2 Damage,1.245,0.066
1,Complexity3 Damage,1.309,0.099
2,Complexity4 Damage,1.521,0.237
3,Complexity5 Damage,1.677,0.368
4,Complexity6 Damage,1.385,0.531
5,Complexity7 Damage,1.143,0.57
6,Complexity8 Damage,0.0,0.0




DNA Damage per Gy Enhancement
-----------------------------


  styled_df = styled_df.applymap(color_enhancement, subset=['Enhancement Ratio'])


Unnamed: 0,Quantity,Enhancement Ratio,Uncertainty
0,Double Strand Breaks per Gy,0.963,0.045
1,Single Strand Breaks per Gy,0.981,0.022
2,Strand Breaks per Gy,0.979,0.022
3,Base Damage per Gy,0.982,0.019




Complexity per Gy Enhancement
-----------------------------


  styled_df = styled_df.applymap(color_enhancement, subset=['Enhancement Ratio'])


Unnamed: 0,Quantity,Enhancement Ratio,Uncertainty
0,Complexity2 Damage per Gy,0.915,0.05
1,Complexity3 Damage per Gy,0.962,0.074
2,Complexity4 Damage per Gy,1.118,0.175
3,Complexity5 Damage per Gy,1.233,0.271
4,Complexity6 Damage per Gy,1.018,0.391
5,Complexity7 Damage per Gy,0.84,0.419


In [11]:
# Visualize enhancement ratios for 5mg/ml NP concentration vs control
scenario_label = enhancement_results.get('scenario_label')
print(f"\nVisualization of Enhancement Ratios: {scenario_label}")
print("-" * (len("Visualization of Enhancement Ratios: ") + len(scenario_label)))
# Can use either the old function or the new one with a single scenario
plot_all_enhancement_categories(enhancement_results)
# Equivalent using new function:
# plot_multi_enhancement_categories([enhancement_results])


Visualization of Enhancement Ratios: 5mg/ml NPs
-----------------------------------------------


## Multi-Scenario Comparison

Compare enhancements from different nanoparticle concentrations in the same plots.

In [None]:
# Store the enhancement results in a list for comparison
all_enhancement_results = []

# Compute enhancement ratios for 1mg/ml NPs vs Control
enhancement_1mg = compute_enhancement_ratios(
    multicell_stats_med1_cell1, 
    multicell_stats_med0_cell0,
    scenario_label="1mg/ml NPs"
)
all_enhancement_results.append(enhancement_1mg)

# Compute enhancement ratios for 5mg/ml NPs vs Control
enhancement_5mg = compute_enhancement_ratios(
    multicell_stats_med5_cell5, 
    multicell_stats_med0_cell0,
    scenario_label="5mg/ml NPs"
)
all_enhancement_results.append(enhancement_5mg)

# Plot all categories with both scenarios in the same plots
print("\nMulti-Scenario Enhancement Comparison")
print("------------------------------------")
plot_multi_enhancement_categories(all_enhancement_results)


Multi-Scenario Enhancement Comparison
------------------------------------


posx and posy should be finite values
