# ER_compare
This is the code used for extracting the data from RDME-ODE simulation results and comparing various trajectories of species with and without ER.

In [6]:
%run env.ipynb
import pickle
import os
import numpy as np
from jLM.RDME import File as RDMEFile
import jLM
import json
import matplotlib.pyplot as plt
import seaborn as sns
from traj_analysis_rdme import *
from tqdm import tqdm
import pandas as pd
import logging

er_sim1_traj_dir = "/data2/2024_Yeast_GS/my_current_code/rdme_ode_results/20250322_wtER_60min"
noer_traj_dir = "/data2/2024_Yeast_GS/my_current_code/rdme_ode_results/20250310_wtnoer_60min"
er_sim2_traj_dir = "/data2/2024_Yeast_GS/my_current_code/rdme_ode_results/20250323_ERriboMarie"
fig_dir = os.path.join(er_sim2_traj_dir, 'figures_diffriboMarie_comparison/')

if not os.path.exists(fig_dir):
    os.makedirs(fig_dir)
# Configure logging
log_file = os.path.join(fig_dir, 'run_log.log')
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler(log_file),
        logging.StreamHandler()
    ]
)


logging.info(f"This is the file to compare between ER and nonER data: {er_sim1_traj_dir}, {er_sim2_traj_dir}, and {noer_traj_dir}")



2025-03-26 18:56:39,007 - INFO - This is the file to compare between ER and nonER data: /data2/2024_Yeast_GS/my_current_code/rdme_ode_results/20250322_wtER_60min, /data2/2024_Yeast_GS/my_current_code/rdme_ode_results/20250323_ERriboMarie, and /data2/2024_Yeast_GS/my_current_code/rdme_ode_results/20250310_wtnoer_60min


The Cython extension is already loaded. To reload it, use:
  %reload_ext Cython
The line_profiler extension is already loaded. To reload it, use:
  %reload_ext line_profiler


default, get data required

In [7]:


er_sim1_files = [f for f in os.listdir(er_sim1_traj_dir) if f.startswith('yeast') and f.endswith('.lm')]
er_sim2_files = [f for f in os.listdir(er_sim2_traj_dir) if f.startswith('yeast') and f.endswith('.lm')]
noer_files = [f for f in os.listdir(noer_traj_dir) if f.startswith('yeast') and f.endswith('.lm')]
traj_suff = "_ode.jsonl"

logging.info(f"ER files: {er_sim1_files}, {er_sim2_files}")
logging.info(f"NOER files: {noer_files}")

# Initialize dictionaries to store data for each species
er_sim1_species_data = {}
noer_species_data = {}
er_sim2_species_data = {}
er_sim1_ode_data = {}
noer_ode_data = {}
er_sim2_ode_data = {}
rdmeTs = None
odeTs = None
er_sim1_files = [f for f in os.listdir(er_sim1_traj_dir) if f.startswith('yeast') and f.endswith('.lm')]
# Process ER sim1 files
for traj_file in tqdm(er_sim1_files, desc="Processing ER sim1 files", unit="file"):
    logging.info(f"Processing ER sim1 file: {traj_file}")
    traj, odeTraj, region_traj = get_traj(er_sim1_traj_dir, traj_file, traj_suff)
    curr_rdmeTs, rdmeYs, curr_odeTs, odeYs, _, _ = get_data_for_plot(traj, odeTraj, region_traj=None, sparse_factor=1)
    NAV_ER_SIM1 = 6.022e23 * (traj.reg.cytoplasm.volume + traj.reg.nucleoplasm.volume + traj.reg.plasmaMembrane.volume)
    if rdmeTs is None:
        rdmeTs = curr_rdmeTs
        odeTs = curr_odeTs

    for species, data in rdmeYs.items():
        if species not in er_sim1_species_data:
            er_sim1_species_data[species] = []
        er_sim1_species_data[species].append(data)

    for species, data in odeYs.items():
        if species not in er_sim1_ode_data:
            er_sim1_ode_data[species] = []
        er_sim1_ode_data[species].append(data)

# Process ER sim2 files
er_sim2_files = [f for f in os.listdir(er_sim2_traj_dir) if f.startswith('yeast') and f.endswith('.lm')]
for traj_file in tqdm(er_sim2_files, desc="Processing ER sim2 files", unit="file"):
    logging.info(f"Processing ER sim2 file: {traj_file}")
    traj, odeTraj, region_traj = get_traj(er_sim2_traj_dir, traj_file, traj_suff)
    _, rdmeYs, _, odeYs, _, _ = get_data_for_plot(traj, odeTraj, region_traj=None, sparse_factor=1)
    NAV_ER_SIM2 = 6.022e23 * (traj.reg.cytoplasm.volume + traj.reg.nucleoplasm.volume + traj.reg.plasmaMembrane.volume)
    
    for species, data in rdmeYs.items():
        if species not in er_sim2_species_data:
            er_sim2_species_data[species] = []
        er_sim2_species_data[species].append(data)

    for species, data in odeYs.items():
        if species not in er_sim2_ode_data:
            er_sim2_ode_data[species] = []
        er_sim2_ode_data[species].append(data)
    

# Process NOER files
for traj_file in tqdm(noer_files, desc="Processing NOER files", unit="file"):
    logging.info(f"Processing NOER file: {traj_file}")
    traj, odeTraj, _ = get_traj(noer_traj_dir, traj_file, traj_suff)
    _, rdmeYs, _, odeYs, _, _ = get_data_for_plot(traj, odeTraj, region_traj=None, sparse_factor=1)
    NAV_NOER = 6.022e23 * (traj.reg.cytoplasm.volume + traj.reg.nucleoplasm.volume)
    for species, data in rdmeYs.items():
        if species not in noer_species_data:
            noer_species_data[species] = []
        noer_species_data[species].append(data)

    for species, data in odeYs.items():
        if species not in noer_ode_data:
            noer_ode_data[species] = []
        noer_ode_data[species].append(data)

# Calculate and save ER statistics
er_sim1_results = []
er_sim2_results = []
noer_results = []

# Process ER sim1 data
for species, trajectories in er_sim1_species_data.items():
    trajectories_array = np.array(trajectories)
    avg = np.mean(trajectories_array, axis=0)
    std = np.std(trajectories_array, axis=0)
    
    er_sim1_results.append({
        'Species': f"RDME_{species}",
        'Time': ','.join(map(str, rdmeTs)),
        'Average': ','.join(map(str, avg)),
        'Std': ','.join(map(str, std))
    })

for species, trajectories in er_sim1_ode_data.items():
    trajectories_array = np.array(trajectories)
    avg = np.mean(trajectories_array, axis=0)
    std = np.std(trajectories_array, axis=0)
    
    er_sim1_results.append({
        'Species': f"ODE_{species}",
        'Time': ','.join(map(str, odeTs)),
        'Average': ','.join(map(str, avg)),
        'Std': ','.join(map(str, std))
    })

# Process ER sim2 data
for species, trajectories in er_sim2_species_data.items():
    trajectories_array = np.array(trajectories)
    avg = np.mean(trajectories_array, axis=0)
    std = np.std(trajectories_array, axis=0)
    
    er_sim2_results.append({
        'Species': f"RDME_{species}",
        'Time': ','.join(map(str, rdmeTs)),
        'Average': ','.join(map(str, avg)),
        'Std': ','.join(map(str, std))
    })

for species, trajectories in er_sim2_ode_data.items():
    trajectories_array = np.array(trajectories)
    avg = np.mean(trajectories_array, axis=0)
    std = np.std(trajectories_array, axis=0)
    
    er_sim2_results.append({
        'Species': f"ODE_{species}",
        'Time': ','.join(map(str, odeTs)),
        'Average': ','.join(map(str, avg)),
        'Std': ','.join(map(str, std))
    })
    
# Calculate and save NOER statistics
noer_results = []
for species, trajectories in noer_species_data.items():
    trajectories_array = np.array(trajectories)
    avg = np.mean(trajectories_array, axis=0)
    std = np.std(trajectories_array, axis=0)
    
    noer_results.append({
        'Species': f"RDME_{species}",
        'Time': ','.join(map(str, rdmeTs)),
        'Average': ','.join(map(str, avg)),
        'Std': ','.join(map(str, std))
    })

for species, trajectories in noer_ode_data.items():
    trajectories_array = np.array(trajectories)
    avg = np.mean(trajectories_array, axis=0)
    std = np.std(trajectories_array, axis=0)
    
    noer_results.append({
        'Species': f"ODE_{species}",
        'Time': ','.join(map(str, odeTs)),
        'Average': ','.join(map(str, avg)),
        'Std': ','.join(map(str, std))
    })

# Save to CSV files
er_sim1_df = pd.DataFrame(er_sim1_results)
er_sim2_df = pd.DataFrame(er_sim2_results)
noer_df = pd.DataFrame(noer_results)

er_sim1_csv_path = os.path.join(fig_dir, 'er_sim1_species_statistics.csv')
er_sim2_csv_path = os.path.join(fig_dir, 'er_sim2_species_statistics.csv')
noer_csv_path = os.path.join(fig_dir, 'noer_species_statistics.csv')

er_sim1_df.to_csv(er_sim1_csv_path, index=False)
er_sim2_df.to_csv(er_sim2_csv_path, index=False)
noer_df.to_csv(noer_csv_path, index=False)

logging.info(f"ER statistics saved to: {er_sim1_csv_path}, {er_sim2_csv_path}")
logging.info(f"NOER statistics saved to: {noer_csv_path}")


2025-03-26 18:56:39,047 - INFO - ER files: ['yeastER1.13er_mt_multi_20250320_12_t60.0minGAE11.1mMERwt_gpu4.lm', 'yeastER1.13er_mt_multi_20250320_14_t60.0minGAE11.1mMERwt_gpu4.lm', 'yeastER1.13er_mt_multi_20250320_13_t60.0minGAE11.1mMERwt_gpu4.lm', 'yeastER1.13er_mt_multi_20250316_11_t60.0minGAE11.1mMERwt_gpu4.lm', 'yeastER1.13er_mt_multi_20250320_15_t60.0minGAE11.1mMERwt_gpu4.lm'], ['yeastER1.13er_mt_multi_20250323_94_t60.0minGAE11.1mMERwtMarie_gpu4.lm', 'yeastER1.13er_mt_multi_20250323_93_t60.0minGAE11.1mMERwtMarie_gpu4.lm', 'yeastER1.13er_mt_multi_20250323_92_t60.0minGAE11.1mMERwtMarie_gpu4.lm', 'yeastER1.13er_mt_multi_20250323_91_t60.0minGAE11.1mMERwtMarie_gpu4.lm', 'yeastER1.13er_mt_multi_20250323_95_t60.0minGAE11.1mMERwtMarie_gpu4.lm']
2025-03-26 18:56:39,047 - INFO - NOER files: ['yeast1.14.3mt20250304_4_t60.0minGAE11.1mMnoERwt_gpu4.lm', 'yeast1.14.3mt20250304_5_t60.0minGAE11.1mMnoERwt_gpu4.lm', 'yeast1.14.3mt20250304_3_t60.0minGAE11.1mMnoERwt_gpu4.lm', 'yeast1.14.3mt20250303_1_t

no region data


Processing ER sim1 files:  40%|████      | 2/5 [00:04<00:06,  2.11s/file]2025-03-26 18:56:43,238 - INFO - Processing ER sim1 file: yeastER1.13er_mt_multi_20250320_13_t60.0minGAE11.1mMERwt_gpu4.lm


no region data


Processing ER sim1 files:  60%|██████    | 3/5 [00:05<00:03,  1.94s/file]2025-03-26 18:56:44,969 - INFO - Processing ER sim1 file: yeastER1.13er_mt_multi_20250316_11_t60.0minGAE11.1mMERwt_gpu4.lm


no region data


Processing ER sim1 files:  80%|████████  | 4/5 [00:07<00:01,  1.87s/file]2025-03-26 18:56:46,742 - INFO - Processing ER sim1 file: yeastER1.13er_mt_multi_20250320_15_t60.0minGAE11.1mMERwt_gpu4.lm


no region data


Processing ER sim1 files: 100%|██████████| 5/5 [00:09<00:00,  1.88s/file]


no region data


Processing ER sim2 files:   0%|          | 0/5 [00:00<?, ?file/s]2025-03-26 18:56:48,445 - INFO - Processing ER sim2 file: yeastER1.13er_mt_multi_20250323_94_t60.0minGAE11.1mMERwtMarie_gpu4.lm
Processing ER sim2 files:  20%|██        | 1/5 [00:01<00:07,  1.78s/file]2025-03-26 18:56:50,229 - INFO - Processing ER sim2 file: yeastER1.13er_mt_multi_20250323_93_t60.0minGAE11.1mMERwtMarie_gpu4.lm


no region data


Processing ER sim2 files:  40%|████      | 2/5 [00:03<00:05,  1.79s/file]2025-03-26 18:56:52,018 - INFO - Processing ER sim2 file: yeastER1.13er_mt_multi_20250323_92_t60.0minGAE11.1mMERwtMarie_gpu4.lm


no region data


Processing ER sim2 files:  60%|██████    | 3/5 [00:05<00:03,  1.80s/file]2025-03-26 18:56:53,839 - INFO - Processing ER sim2 file: yeastER1.13er_mt_multi_20250323_91_t60.0minGAE11.1mMERwtMarie_gpu4.lm


no region data


Processing ER sim2 files:  80%|████████  | 4/5 [00:07<00:01,  1.78s/file]2025-03-26 18:56:55,577 - INFO - Processing ER sim2 file: yeastER1.13er_mt_multi_20250323_95_t60.0minGAE11.1mMERwtMarie_gpu4.lm


no region data


Processing ER sim2 files: 100%|██████████| 5/5 [00:08<00:00,  1.80s/file]


no region data


Processing NOER files:   0%|          | 0/5 [00:00<?, ?file/s]2025-03-26 18:56:57,424 - INFO - Processing NOER file: yeast1.14.3mt20250304_4_t60.0minGAE11.1mMnoERwt_gpu4.lm
Processing NOER files:  20%|██        | 1/5 [00:01<00:06,  1.70s/file]2025-03-26 18:56:59,123 - INFO - Processing NOER file: yeast1.14.3mt20250304_5_t60.0minGAE11.1mMnoERwt_gpu4.lm


no region data


Processing NOER files:  40%|████      | 2/5 [00:03<00:05,  1.71s/file]2025-03-26 18:57:00,834 - INFO - Processing NOER file: yeast1.14.3mt20250304_3_t60.0minGAE11.1mMnoERwt_gpu4.lm


no region data


Processing NOER files:  60%|██████    | 3/5 [00:05<00:03,  1.70s/file]2025-03-26 18:57:02,526 - INFO - Processing NOER file: yeast1.14.3mt20250303_1_t60.0minGAE11.1mMnoERwt_gpu4.lm


no region data


Processing NOER files:  80%|████████  | 4/5 [00:06<00:01,  1.70s/file]2025-03-26 18:57:04,222 - INFO - Processing NOER file: yeast1.14.3mt20250303_2_t60.0minGAE11.1mMnoERwt_gpu4.lm


no region data


Processing NOER files: 100%|██████████| 5/5 [00:08<00:00,  1.70s/file]

no region data



2025-03-26 18:57:06,617 - INFO - ER statistics saved to: /data2/2024_Yeast_GS/my_current_code/rdme_ode_results/20250323_ERriboMarie/figures_diffriboMarie_comparison/er_sim1_species_statistics.csv, /data2/2024_Yeast_GS/my_current_code/rdme_ode_results/20250323_ERriboMarie/figures_diffriboMarie_comparison/er_sim2_species_statistics.csv
2025-03-26 18:57:06,618 - INFO - NOER statistics saved to: /data2/2024_Yeast_GS/my_current_code/rdme_ode_results/20250323_ERriboMarie/figures_diffriboMarie_comparison/noer_species_statistics.csv


plot comparison graphs

In [8]:
# Read the saved statistics
er_sim1_df = pd.read_csv(os.path.join(fig_dir, 'er_sim1_species_statistics.csv'))
er_sim2_df = pd.read_csv(os.path.join(fig_dir, 'er_sim2_species_statistics.csv'))
noer_df = pd.read_csv(os.path.join(fig_dir, 'noer_species_statistics.csv'))

# Function to convert string of comma-separated values to numpy array
def str_to_array(s):
    return np.array([float(x) for x in s.split(',')])

# Debug: logging.info available species
logging.info(f"Available species in ER:  {er_sim1_df['Species'].tolist()}")
logging.info(f"Available species in ER:  {er_sim2_df['Species'].tolist()}")
logging.info(f"Available species in NOER:  {noer_df['Species'].tolist()}")

# Get unique species names directly from the CSV
unique_species = set(er_sim1_df['Species'].unique()) | set(er_sim2_df['Species'].unique()) | set(noer_df['Species'].unique())
logging.info(f"\nUnique species:  {unique_species}")

# Plot settings
# Common plot settings for poster presentation
plt.style.use('default')
plt.rcParams['figure.figsize'] = [10, 6]
plt.rcParams['figure.dpi'] = 600
plt.rcParams['font.size'] = 18  # Increase base font size
plt.rcParams['axes.titlesize'] = 28  # Increase title font size
plt.rcParams['axes.labelsize'] = 18  # Increase axis label font size
plt.rcParams['xtick.labelsize'] = 18  # Increase tick label font size
plt.rcParams['ytick.labelsize'] = 18  # Increase tick label font size
plt.rcParams['legend.fontsize'] = 18  # Increase legend font size

# Create plots for each species
for species_name in unique_species:
    fig, ax = plt.subplots()
    
    # Safely get data
    er_sim1_species_data = er_sim1_df[er_sim1_df['Species'] == species_name]
    er_sim2_species_data = er_sim2_df[er_sim2_df['Species'] == species_name]
    noer_species_data = noer_df[noer_df['Species'] == species_name]
    
    if len(er_sim1_species_data) == 0 or len(er_sim2_species_data) == 0 or len(noer_species_data) == 0:
        logging.info(f"Skipping {species_name} - data not found")
        continue
        
    er_sim1_data = er_sim1_species_data.iloc[0]
    er_sim2_data = er_sim2_species_data.iloc[0]
    noer_data = noer_species_data.iloc[0]
    # er_sim1
    time = str_to_array(er_sim1_data['Time'])
    er_sim1_avg = str_to_array(er_sim1_data['Average'])
    er_sim1_std = str_to_array(er_sim1_data['Std'])
    # er_sim2
    er_sim2_avg = str_to_array(er_sim2_data['Average'])
    er_sim2_std = str_to_array(er_sim2_data['Std'])
    # noer
    noer_avg = str_to_array(noer_data['Average'])
    noer_std = str_to_array(noer_data['Std'])
    
    # display name
    display_name = species_name.split('_', 1)[1] if '_' in species_name else species_name
    # Replace any subsequent underscores with colons
    display_name = display_name.replace('_', ':')
    noer_color = '#0072B2'    # Blue for no ER
    er_sim1_color = '#D55E00'  # Vermillion/Orange-red for ER sim1
    er_sim2_color = '#009E73'  # Bluish green for ER sim2 (ERriboMarie)

    # Plot ER sim1
    ax.plot(time, er_sim1_avg, label='ER sim1', linestyle='-', color=er_sim1_color)
    ax.fill_between(time, er_sim1_avg - er_sim1_std, 
                   er_sim1_avg + er_sim1_std, 
                   alpha=0.2, 
                   color=er_sim1_color)
    
    # Plot ER sim2 (ERriboMarie)
    ax.plot(time, er_sim2_avg, label='ER sim2', linestyle='-', color=er_sim2_color)
    ax.fill_between(time, er_sim2_avg - er_sim2_std, 
                   er_sim2_avg + er_sim2_std, 
                   alpha=0.2, 
                   color=er_sim2_color)
    
    # Plot NOER
    ax.plot(time, noer_avg, label='NOER', linestyle='--', color=noer_color)
    ax.fill_between(time, noer_avg - noer_std, 
                   noer_avg + noer_std, 
                   alpha=0.2, 
                   color=noer_color)
    # Customize plot
    ax.set_xlabel('Time (min)')
    if species_name.startswith('RDME_DG'):  # Change y-label for species starting with 'DG'
        ax.set_ylabel('Probability')
    else:
        ax.set_ylabel('Counts')
    # ax.set_title(f'{species_name} Comparison')
    ax.legend(framealpha=0.3, loc='upper right')
    ax.grid(False)
    
    # Save figure
    plt.tight_layout()
    fig_path = os.path.join(fig_dir, f'{species_name}_comparison.png')
    plt.savefig(fig_path, dpi=600, bbox_inches='tight')
    logging.info(f"Saved plot for {display_name}")
    plt.close()

logging.info(f"\nPlots saved in: {fig_dir}")
logging.getLogger().handlers[0].flush() 

2025-03-26 18:57:06,690 - INFO - Available species in ER:  ['RDME_DGrep', 'RDME_DGrep_G4d', 'RDME_DGrep_G4d_G80d', 'RDME_Rrep', 'RDME_Grep', 'RDME_DG1', 'RDME_DG1_G4d', 'RDME_DG1_G4d_G80d', 'RDME_R1', 'RDME_G1', 'RDME_DG2', 'RDME_DG2_G4d', 'RDME_DG2_G4d_G80d', 'RDME_R2', 'RDME_G2', 'RDME_DG3', 'RDME_DG3_G4d', 'RDME_DG3_G4d_G80d', 'RDME_R3', 'RDME_G3', 'RDME_G3i', 'RDME_DG4', 'RDME_R4', 'RDME_G4', 'RDME_G4d', 'RDME_DG80', 'RDME_DG80_G4d', 'RDME_DG80_G4d_G80d', 'RDME_R80', 'RDME_G80', 'RDME_G80d', 'RDME_G80d_G3i', 'RDME_ribosome', 'RDME_ribosomeR1', 'RDME_ribosomeR2', 'RDME_ribosomeR3', 'RDME_ribosomeR4', 'RDME_ribosomeR80', 'RDME_ribosomeGrep', 'ODE_GAI', 'ODE_G1', 'ODE_G1GAI', 'ODE_G2GAI', 'ODE_G2GAE', 'ODE_G2']
2025-03-26 18:57:06,690 - INFO - Available species in ER:  ['RDME_DGrep', 'RDME_DGrep_G4d', 'RDME_DGrep_G4d_G80d', 'RDME_Rrep', 'RDME_Grep', 'RDME_DG1', 'RDME_DG1_G4d', 'RDME_DG1_G4d_G80d', 'RDME_R1', 'RDME_G1', 'RDME_DG2', 'RDME_DG2_G4d', 'RDME_DG2_G4d_G80d', 'RDME_R2', 'RDME_

This is for G2 in the membrane


In [9]:
# Read the saved statistics
er_sim1_df = pd.read_csv(os.path.join(fig_dir, 'er_sim1_species_statistics.csv'))
er_sim2_df = pd.read_csv(os.path.join(fig_dir, 'er_sim2_species_statistics.csv'))
noer_df = pd.read_csv(os.path.join(fig_dir, 'noer_species_statistics.csv'))

def str_to_array(s):
    return np.array([float(x) for x in s.split(',')])

# Calculate G2 totals for ER data
er_sim1_g2_data = er_sim1_df[er_sim1_df['Species'].isin(['ODE_G2', 'ODE_G2GAE', 'ODE_G2GAI'])].copy()
if len(er_sim1_g2_data) > 0:
    time = str_to_array(er_sim1_g2_data.iloc[0]['Time'])
    er_sim1_total = np.zeros_like(str_to_array(er_sim1_g2_data.iloc[0]['Average']))
    er_sim1_std_squared = np.zeros_like(er_sim1_total)
    
    for _, row in er_sim1_g2_data.iterrows():
        er_sim1_total += str_to_array(row['Average'])
        er_sim1_std_squared += str_to_array(row['Std'])**2
    er_sim1_total_std = np.sqrt(er_sim1_std_squared)
# Calculate G2 totals for ER sim2 data
er_sim2_g2_data = er_sim2_df[er_sim2_df['Species'].isin(['ODE_G2', 'ODE_G2GAE', 'ODE_G2GAI'])].copy()
if len(er_sim2_g2_data) > 0:
    time = str_to_array(er_sim1_g2_data.iloc[0]['Time'])
    er_sim2_total = np.zeros_like(str_to_array(er_sim1_g2_data.iloc[0]['Average']))
    er_sim2_std_squared = np.zeros_like(er_sim2_total)
    
    for _, row in er_sim1_g2_data.iterrows():
        er_sim2_total += str_to_array(row['Average'])
        er_sim2_std_squared += str_to_array(row['Std'])**2
    er_sim2_total_std = np.sqrt(er_sim2_std_squared)
# Calculate G2 totals for NOER data
noer_g2_data = noer_df[noer_df['Species'].isin(['ODE_G2', 'ODE_G2GAE', 'ODE_G2GAI'])].copy()
if len(noer_g2_data) > 0:
    noer_total = np.zeros_like(str_to_array(noer_g2_data.iloc[0]['Average']))
    noer_std_squared = np.zeros_like(noer_total)
    
    for _, row in noer_g2_data.iterrows():
        noer_total += str_to_array(row['Average'])
        noer_std_squared += str_to_array(row['Std'])**2
    noer_total_std = np.sqrt(noer_std_squared)


# Create the plot
plt.figure(figsize=(10, 6))
noer_color = '#0072B2'    # Blue for no ER
er_sim1_color = '#D55E00'  # Vermillion/Orange-red for ER sim1
er_sim2_color = '#009E73'  # Bluish green for ER sim2 (ERriboMarie)
# ER sim1
plt.plot(time, er_sim1_total, label='ER sim1', linestyle='-', color=er_sim1_color)
plt.fill_between(time, er_sim1_total - er_sim1_total_std, er_sim1_total + er_sim1_total_std, alpha=0.2, color=er_sim1_color)
# ER sim2
plt.plot(time, er_sim2_total, label='ER sim2', linestyle='-', color=er_sim2_color)
plt.fill_between(time, er_sim2_total - er_sim2_total_std, er_sim2_total + er_sim2_total_std, alpha=0.2, color=er_sim2_color)
# NOER
plt.plot(time, noer_total, label='NOER', linestyle='--', color=noer_color)
plt.fill_between(time, noer_total - noer_total_std, noer_total + noer_total_std, alpha=0.2, color=noer_color)

plt.xlabel('Time (min)')
plt.ylabel('Counts')
# plt.title('Total G2 Comparison (G2 + G2GAE + G2GAI)')
plt.legend(framealpha=0.3, loc='upper right')
plt.grid(False)

# Save figure
plt.tight_layout()
fig_path = os.path.join(fig_dir, 'G2_membrane_comparison.png')
plt.savefig(fig_path, dpi=600, bbox_inches='tight')
logging.info(f"Saved plot for G2 membrane")
plt.close()
logging.getLogger().handlers[0].flush() 

2025-03-26 18:58:01,962 - INFO - Saved plot for G2 membrane


This is for GAI total

In [10]:
def str_to_array(s):
    return np.array([float(x) for x in s.split(',')])

# Create combined GAI species plot
fig, ax = plt.subplots(figsize=(10, 6))

# Define Nature-standard colors
noer_color = '#0072B2'    # Blue for no ER
er_sim1_color = '#D55E00'  # Vermillion for ER sim1
er_sim2_color = '#009E73'  # Bluish green for ER sim2 (ERriboMarie)

# List of species to combine
gai_species = ['GAI', 'G1GAI', 'G3i', 'G2GAI']

# Initialize arrays for all three simulations
er_sim1_combined_avg = None
er_sim1_combined_var = None
er_sim2_combined_avg = None
er_sim2_combined_var = None
noer_combined_avg = None
noer_combined_var = None
time = None

# For tracking which species are actually used
er_sim1_species_used = []
er_sim2_species_used = []
noer_species_used = []

# Combine ER sim1 data
for species_name in gai_species:
    matching_rows = er_sim1_df[er_sim1_df['Species'].str.contains(species_name)]
    
    if not matching_rows.empty:
        er_species_data = matching_rows[matching_rows['Species'].str.startswith('ODE')]
        if er_species_data.empty:
            er_species_data = matching_rows
            
        if len(er_species_data) > 0:
            er_data = er_species_data.iloc[0]
            er_sim1_species_used.append(er_data['Species'])
            
            curr_avg = str_to_array(er_data['Average']) / NAV_ER_SIM1 * 1e3
            curr_std = str_to_array(er_data['Std']) / NAV_ER_SIM1 * 1e3
            curr_var = curr_std ** 2
            
            if er_sim1_combined_avg is None:
                time = str_to_array(er_data['Time'])
                er_sim1_combined_avg = curr_avg
                er_sim1_combined_var = curr_var
            else:
                er_sim1_combined_avg += curr_avg
                er_sim1_combined_var += curr_var

# Combine ER sim2 data
for species_name in gai_species:
    matching_rows = er_sim2_df[er_sim2_df['Species'].str.contains(species_name)]
    
    if not matching_rows.empty:
        er_species_data = matching_rows[matching_rows['Species'].str.startswith('ODE')]
        if er_species_data.empty:
            er_species_data = matching_rows
            
        if len(er_species_data) > 0:
            er_data = er_species_data.iloc[0]
            er_sim2_species_used.append(er_data['Species'])
            
            curr_avg = str_to_array(er_data['Average']) / NAV_ER_SIM2 * 1e3
            curr_std = str_to_array(er_data['Std']) / NAV_ER_SIM2 * 1e3
            curr_var = curr_std ** 2
            
            if er_sim2_combined_avg is None:
                er_sim2_combined_avg = curr_avg
                er_sim2_combined_var = curr_var
            else:
                er_sim2_combined_avg += curr_avg
                er_sim2_combined_var += curr_var

# Combine NOER data
for species_name in gai_species:
    matching_rows = noer_df[noer_df['Species'].str.contains(species_name)]
    
    if not matching_rows.empty:
        noer_species_data = matching_rows[matching_rows['Species'].str.startswith('ODE')]
        if noer_species_data.empty:
            noer_species_data = matching_rows
            
        if len(noer_species_data) > 0:
            noer_data = noer_species_data.iloc[0]
            noer_species_used.append(noer_data['Species'])
            
            curr_avg = str_to_array(noer_data['Average']) / NAV_NOER * 1e3
            curr_std = str_to_array(noer_data['Std']) / NAV_NOER * 1e3
            curr_var = curr_std ** 2
            
            if noer_combined_avg is None:
                noer_combined_avg = curr_avg
                noer_combined_var = curr_var
            else:
                noer_combined_avg += curr_avg
                noer_combined_var += curr_var

# Print which species were actually used
logging.info("ER sim1 species used in GAI total: " + str(er_sim1_species_used))
logging.info("ER sim2 species used in GAI total: " + str(er_sim2_species_used))
logging.info("NOER species used in GAI total: " + str(noer_species_used))

# Convert combined variances back to standard deviations
if er_sim1_combined_var is not None:
    er_sim1_combined_std = np.sqrt(er_sim1_combined_var)
if er_sim2_combined_var is not None:
    er_sim2_combined_std = np.sqrt(er_sim2_combined_var)
if noer_combined_var is not None:
    noer_combined_std = np.sqrt(noer_combined_var)

# Plot ER sim1 if data exists
if er_sim1_combined_avg is not None and time is not None:
    ax.plot(time, er_sim1_combined_avg, label='ER sim1', linestyle='-', color=er_sim1_color)
    ax.fill_between(time, er_sim1_combined_avg - er_sim1_combined_std, 
                    er_sim1_combined_avg + er_sim1_combined_std, 
                    alpha=0.2, color=er_sim1_color)

# Plot ER sim2 if data exists
if er_sim2_combined_avg is not None:
    ax.plot(time, er_sim2_combined_avg, label='ER sim2', linestyle='-', color=er_sim2_color)
    ax.fill_between(time, er_sim2_combined_avg - er_sim2_combined_std, 
                    er_sim2_combined_avg + er_sim2_combined_std, 
                    alpha=0.2, color=er_sim2_color)

# Plot NOER if data exists
if noer_combined_avg is not None:
    ax.plot(time, noer_combined_avg, label='NOER', linestyle='--', color=noer_color)
    ax.fill_between(time, noer_combined_avg - noer_combined_std, 
                    noer_combined_avg + noer_combined_std, 
                    alpha=0.2, color=noer_color)

# Add horizontal line for GAE = 11.1mM
ax.axhline(y=11.1, color='gray', linestyle='-.', linewidth=1.5, label='GAE')
ax.text(time[0]*1.05, 10.8, '11.1 mM', color='gray', fontsize=16, va='top', ha='left')

# Customize plot
ax.set_xlabel('Time (min)')
ax.set_ylabel('Concentration (mM)')
ax.legend(framealpha=0.3, loc='upper right')
ax.grid(False)

# Save figure
plt.tight_layout()
fig_path = os.path.join(fig_dir, 'GAI_total_comparison.png')
plt.savefig(fig_path, dpi=600, bbox_inches='tight')
logging.info(f"Saved plot for GAI total")
plt.close()
logging.getLogger().handlers[0].flush()

2025-03-26 18:58:02,026 - INFO - ER sim1 species used in GAI total: ['ODE_GAI', 'ODE_G1GAI', 'RDME_G3i', 'ODE_G2GAI']
2025-03-26 18:58:02,027 - INFO - ER sim2 species used in GAI total: ['ODE_GAI', 'ODE_G1GAI', 'RDME_G3i', 'ODE_G2GAI']
2025-03-26 18:58:02,027 - INFO - NOER species used in GAI total: ['ODE_GAI', 'ODE_G1GAI', 'RDME_G3i', 'ODE_G2GAI']


2025-03-26 18:58:03,070 - INFO - Saved plot for GAI total
