# RDME/CME_compare
This is the code used for extracting the data from RDME-ODE simulation results and comparing various trajectories of species with and without ER.

In [1]:
%run env.ipynb
import pickle
import os
import numpy as np
from jLM.RDME import File as RDMEFile
import jLM
import json
import matplotlib.pyplot as plt
import seaborn as sns
from traj_analysis_rdme import *
from tqdm import tqdm
import pandas as pd
import logging
from pyLM import *
from pyLM.units import *
from pySTDLM import *
from pySTDLM import PostProcessing
cme_traj_dir = "/data2/2024_Yeast_GS/my_current_code/my_cme_ode/output/03232025/"
rdme_traj_dir = "/data2/2024_Yeast_GS/my_current_code/rdme_ode_results/20250310_wtnoer_60min"
fig_dir = os.path.join(rdme_traj_dir, 'figures_rdmecme_comparison/')


if not os.path.exists(fig_dir):
    os.makedirs(fig_dir)
# Configure logging
log_file = os.path.join(fig_dir, 'run_log.log')
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler(log_file),
        logging.StreamHandler()
    ]
)


logging.info(f"This is the file to compare between RDME-ODE and CME-ODE data: {rdme_traj_dir} and {cme_traj_dir}")

2025-03-26 19:27:22,135 - INFO - This is the file to compare between RDME-ODE and CME-ODE data: /data2/2024_Yeast_GS/my_current_code/rdme_ode_results/20250310_wtnoer_60min and /data2/2024_Yeast_GS/my_current_code/my_cme_ode/output/03232025/


default, get data required

In [2]:
rdme_files = [f for f in os.listdir(rdme_traj_dir) if f.startswith('yeast') and f.endswith('.lm')]
cme_files = ['gal_cme_ode_gae11.1mM_11.1_gai0_rep50_delta1_time60.lm']
traj_suff = "_ode.jsonl"

logging.info(f"RDME-ODE files: {rdme_files}")
logging.info(f"CME-ODE files: {cme_files}")

# Initialize dictionaries to store data for each species
rdme_species_data = {}
rmde_species_region_data = {}
rdme_ode_data = {}
rdmeTs = None
odeTs = None
cmeTs = None

# Process RDME files
for traj_file in tqdm(rdme_files, desc="Processing RDME files", unit="file"):
    logging.info(f"Processing RDME file: {traj_file}")
    traj, odeTraj, region_traj = get_traj(rdme_traj_dir, traj_file, traj_suff,region_suff='_region.jsonl')

    curr_rdmeTs, rdmeYs, curr_odeTs, odeYs, regionTs, regionYs = get_data_for_plot(traj, odeTraj, region_traj=region_traj, sparse_factor=1)
    NAV = 6.022e23 * (traj.reg.cytoplasm.volume + traj.reg.nucleoplasm.volume + traj.reg.plasmaMembrane.volume)
    if rdmeTs is None:
        rdmeTs = curr_rdmeTs
        odeTs = curr_odeTs

    for species, data in rdmeYs.items():

        if species not in rdme_species_data:
            rdme_species_data[species] = []
        rdme_species_data[species].append(data)

    for species, data in odeYs.items():
        if species not in rdme_ode_data:
            rdme_ode_data[species] = []
        rdme_ode_data[species].append(data)
    # Process region data
    if regionYs is not None:
        regions = region_traj['regions']
     
         # Initialize the nested dictionary structure if needed
        for species, region_data in regionYs.items():
            if species not in rmde_species_region_data:
                rmde_species_region_data[species] = {}
            
            # Initialize lists for each region if they don't exist
            for region in regions:
                if region not in rmde_species_region_data[species]:
                    rmde_species_region_data[species][region] = []
            
            # Now append the data
            for i in range(len(regions)):
                rmde_species_region_data[species][regions[i]].append(regionYs[species][i])
                
                
                
logging.info(f"the length of rdme_species_data: {len(rdme_species_data)}")
logging.info(f"the length of rdme_ode_data: {len(rdme_ode_data)}")
logging.info(f"the length of rmde_species_region_data: {len(rmde_species_region_data)}")

2025-03-26 19:27:22,164 - INFO - RDME-ODE files: ['yeast1.14.3mt20250304_4_t60.0minGAE11.1mMnoERwt_gpu4.lm', 'yeast1.14.3mt20250304_5_t60.0minGAE11.1mMnoERwt_gpu4.lm', 'yeast1.14.3mt20250304_3_t60.0minGAE11.1mMnoERwt_gpu4.lm', 'yeast1.14.3mt20250303_1_t60.0minGAE11.1mMnoERwt_gpu4.lm', 'yeast1.14.3mt20250303_2_t60.0minGAE11.1mMnoERwt_gpu4.lm']
2025-03-26 19:27:22,165 - INFO - CME-ODE files: ['gal_cme_ode_gae11.1mM_11.1_gai0_rep50_delta1_time60.lm']
Processing RDME files:   0%|          | 0/5 [00:00<?, ?file/s]2025-03-26 19:27:22,168 - INFO - Processing RDME file: yeast1.14.3mt20250304_4_t60.0minGAE11.1mMnoERwt_gpu4.lm


loading region data


Processing RDME files:  20%|██        | 1/5 [00:02<00:10,  2.61s/file]2025-03-26 19:27:24,779 - INFO - Processing RDME file: yeast1.14.3mt20250304_5_t60.0minGAE11.1mMnoERwt_gpu4.lm


loading region data


Processing RDME files:  40%|████      | 2/5 [00:04<00:07,  2.44s/file]2025-03-26 19:27:27,098 - INFO - Processing RDME file: yeast1.14.3mt20250304_3_t60.0minGAE11.1mMnoERwt_gpu4.lm


loading region data


Processing RDME files:  60%|██████    | 3/5 [00:07<00:04,  2.41s/file]2025-03-26 19:27:29,480 - INFO - Processing RDME file: yeast1.14.3mt20250303_1_t60.0minGAE11.1mMnoERwt_gpu4.lm


loading region data


Processing RDME files:  80%|████████  | 4/5 [00:09<00:02,  2.47s/file]2025-03-26 19:27:32,042 - INFO - Processing RDME file: yeast1.14.3mt20250303_2_t60.0minGAE11.1mMnoERwt_gpu4.lm


loading region data


Processing RDME files: 100%|██████████| 5/5 [00:12<00:00,  2.47s/file]
2025-03-26 19:27:34,525 - INFO - the length of rdme_species_data: 39
2025-03-26 19:27:34,526 - INFO - the length of rdme_ode_data: 6
2025-03-26 19:27:34,527 - INFO - the length of rmde_species_region_data: 39


In [3]:
print(rdme_species_data.keys())

dict_keys(['DGrep', 'DGrep_G4d', 'DGrep_G4d_G80d', 'Rrep', 'Grep', 'DG1', 'DG1_G4d', 'DG1_G4d_G80d', 'R1', 'G1', 'DG2', 'DG2_G4d', 'DG2_G4d_G80d', 'R2', 'G2', 'DG3', 'DG3_G4d', 'DG3_G4d_G80d', 'R3', 'G3', 'G3i', 'DG4', 'R4', 'G4', 'G4d', 'DG80', 'DG80_G4d', 'DG80_G4d_G80d', 'R80', 'G80', 'G80d', 'G80d_G3i', 'ribosome', 'ribosomeR1', 'ribosomeR2', 'ribosomeR3', 'ribosomeR4', 'ribosomeR80', 'ribosomeGrep'])


In [4]:
rdme_results = []  # List to store overall species statistics (across all regions)
rdme_results_region = []  # List to store region-specific species statistics

# Process overall species data (summed across all regions)
for species, trajectories in rdme_species_data.items():
    # print(trajectories)
    trajectories_array = np.array(trajectories)  # Convert list of trajectories to numpy array
    avg = np.mean(trajectories_array, axis=0)    # Calculate mean across all trajectories for each timepoint
    std = np.std(trajectories_array, axis=0)     # Calculate standard deviation across trajectories
    
    # Store results as dictionary with comma-separated strings for time series data
    rdme_results.append({
        'Species': f"RDME_{species}",            # Prefix species name with RDME for identification
        'Time': ','.join(map(str, rdmeTs)),      # Convert time points to comma-separated string
        'Average': ','.join(map(str, avg)),      # Convert average values to comma-separated string
        'Std': ','.join(map(str, std))           # Convert standard deviation values to comma-separated string
    })
    
# Process region-specific species data
for species, regions in rmde_species_region_data.items():
    for region, trajectories in regions.items():
        trajectories_array = np.array(trajectories)  # Convert list of trajectories to numpy array
        avg = np.mean(trajectories_array, axis=0)    # Calculate mean across all trajectories for each timepoint
        std = np.std(trajectories_array, axis=0)     # Calculate standard deviation across trajectories
        
        # Store region-specific results as dictionary
        rdme_results_region.append({
            'Species': species,                       # Species name (without prefix)
            'Region': region,                         # Region name (e.g., cytoplasm, nucleus, etc.)
            'Time': ','.join(map(str, regionTs if regionTs is not None else rdmeTs)),  # Use region-specific time if available
            'Average': ','.join(map(str, avg)),       # Convert average values to comma-separated string
            'Std': ','.join(map(str, std))            # Convert standard deviation values to comma-separated string
        })
        # save a copy in rdme_results, with the species name as f"RDME_{species}_{region}"
        rdme_results.append({
        'Species': f"RDME_{species}_{region}",
        'Time': ','.join(map(str, regionTs if regionTs is not None else rdmeTs)),
        'Average': ','.join(map(str, avg)),
        'Std': ','.join(map(str, std))
    })
        
for species, trajectories in rdme_ode_data.items():
    trajectories_array = np.array(trajectories)
    avg = np.mean(trajectories_array, axis=0)
    std = np.std(trajectories_array, axis=0)
    
    rdme_results.append({
        'Species': f"ODE_{species}",
        'Time': ','.join(map(str, odeTs)),
        'Average': ','.join(map(str, avg)),
        'Std': ','.join(map(str, std))
    })

logging.info(f"the length of rdme_results: {len(rdme_results)}")
logging.info(f"the length of rdme_results_region: {len(rdme_results_region)}")


2025-03-26 19:27:36,748 - INFO - the length of rdme_results: 396
2025-03-26 19:27:36,749 - INFO - the length of rdme_results_region: 351


In [5]:
# Calculate and save CME statistics
cme_results = []
cme_traj = PostProcessing.openLMFile(os.path.join(cme_traj_dir + cme_files[0]))
cme_species_list = PostProcessing.getSpecies(cme_traj)


# Reorganize the species list based on the given criteria
# Reorganize the species list based on the given criteria
cme_species_list = sorted(cme_species_list, key=lambda x: (
    not x[0].startswith('DG'),                                # Sort DG species first
    not (x[0].startswith('R') or x[0] == 'reporter_rna'),     # Then R species and reporter_rna
    not (x[0].startswith('G') and not x[0].startswith('GA')   # Then G species (except GA)
        or x[0] == 'reporter'), 
    x[0].startswith('GA')                                     # GA species last
))
GA_species_list = ['GAI']
# build general species list based on the name of cme_species_list except GAI 
general_species_list = [species for species in cme_species_list if species not in GA_species_list]


# Compare species between RDME and CME models
# Extract species names from RDME data
rdme_species_names = set( species for species in rdme_species_data.keys())
print(rdme_species_names)
# Extract species names from CME data
cme_species_names = set(species[0] if isinstance(species, list) else species 
                        for species in general_species_list)

# Find species in RDME but not in CME
rdme_only_species = rdme_species_names - cme_species_names
if rdme_only_species:
    logging.info(f"Species in RDME but not in CME: {sorted(rdme_only_species)}")

# Find species in CME but not in RDME
cme_only_species = cme_species_names - rdme_species_names
if cme_only_species:
    logging.info(f"Species in CME but not in RDME: {sorted(cme_only_species)}")

# Note: Species naming may differ between models. Manual mapping might be needed.
# Example mapping dictionary for differently named species
species_mapping = {
    # 'rdme_name': 'cme_name',
    'G80d_G3i': 'G80G3i', 
    'Grep': 'reporter', 
    'Rrep': 'reporter_rna'
    # Add mappings as needed based on the differences found
}
logging.info("Note: Species may have different names in RDME vs CME models. Check for potential matches.")

# logging.info("CME species list:")
# logging.info(general_species_list)
# logging.info(GA_species_list)
# logging.info(f"total number of species: {len(GA_species_list)} + {len(general_species_list)}")


2025-03-26 19:27:36,778 - INFO - Species in RDME but not in CME: ['DG4', 'G80d_G3i', 'Grep', 'Rrep', 'ribosome', 'ribosomeGrep', 'ribosomeR1', 'ribosomeR2', 'ribosomeR3', 'ribosomeR4', 'ribosomeR80']
2025-03-26 19:27:36,779 - INFO - Species in CME but not in RDME: ['G1GAI', 'G2GAE', 'G2GAI', 'G80C', 'G80Cd', 'G80G3i', 'reporter', 'reporter_rna']
2025-03-26 19:27:36,780 - INFO - Note: Species may have different names in RDME vs CME models. Check for potential matches.


{'DG80_G4d_G80d', 'ribosomeR1', 'DG1_G4d_G80d', 'R4', 'DGrep_G4d', 'ribosomeR3', 'G80', 'R3', 'G80d_G3i', 'ribosomeR4', 'DG80_G4d', 'R1', 'DG2_G4d_G80d', 'DGrep', 'DG3_G4d', 'DG1_G4d', 'G80d', 'DG80', 'DG2', 'ribosomeR80', 'DG3_G4d_G80d', 'G3i', 'DG3', 'R80', 'DGrep_G4d_G80d', 'Grep', 'G1', 'DG1', 'DG2_G4d', 'G3', 'Rrep', 'DG4', 'G4', 'ribosomeGrep', 'ribosomeR2', 'G4d', 'G2', 'R2', 'ribosome'}


In [6]:
avg_list_general = []
var_list_general = []
time_list_general = []

avg_list_GA = []
var_list_GA = []
time_list_GA = []

for species in general_species_list:
    avg, var, times = PostProcessing.getAvgVarTrace(cme_traj, species)
    avg_list_general.append(avg)
    var_list_general.append(np.sqrt(var))
    time_list_general.append(times)
# this unit conversion somehow not working for GAI
if len(GA_species_list) == 1:
    species = GA_species_list[0]
    avg, var, times = PostProcessing.getAvgVarTrace(cme_traj, species)
    count2concentration = 4.65e-8  #molecule/cell to mM
    avg_list_GA.append(avg*count2concentration)
    var_list_GA.append(np.sqrt(var)*count2concentration)
    time_list_GA.append(times)
else:
    for species in GA_species_list:
        avg, var, times = PostProcessing.getAvgVarTrace(cme_traj, species)
        count2concentration = 4.65e-8  #molecule/cell to mM
        avg_list_GA.append(avg*count2concentration)
        var_list_GA.append(np.sqrt(var)*count2concentration)
        time_list_GA.append(times)
        
        
for species, avg, std, times in zip(general_species_list + GA_species_list, 
                                    avg_list_general + avg_list_GA, 
                                    var_list_general + var_list_GA, 
                                    time_list_general + time_list_GA):
    species_name = species[0] if isinstance(species, list) else species
    cme_results.append({
                        'Species': species_name,
                        'Time': ','.join(map(str, times)),
                        'Average': ','.join(map(str, avg)),
                        'Std': ','.join(map(str, std))})

2025-03-26 19:27:36,890 - INFO - names: ['R1', 'R2', 'R3', 'R4', 'reporter_rna', 'R80', 'G1', 'G2', 'G3', 'G3i', 'G4', 'G4d', 'reporter', 'G80', 'G80C', 'G80d', 'G80Cd', 'G80G3i', 'GAI', 'DG1', 'DG1_G4d', 'DG1_G4d_G80d', 'DG2', 'DG2_G4d', 'DG2_G4d_G80d', 'DG3', 'DG3_G4d', 'DG3_G4d_G80d', 'DGrep', 'DGrep_G4d', 'DGrep_G4d_G80d', 'DG80', 'DG80_G4d', 'DG80_G4d_G80d', 'G2GAI', 'G2GAE', 'G1GAI']
2025-03-26 19:27:36,894 - INFO - names: ['R1', 'R2', 'R3', 'R4', 'reporter_rna', 'R80', 'G1', 'G2', 'G3', 'G3i', 'G4', 'G4d', 'reporter', 'G80', 'G80C', 'G80d', 'G80Cd', 'G80G3i', 'GAI', 'DG1', 'DG1_G4d', 'DG1_G4d_G80d', 'DG2', 'DG2_G4d', 'DG2_G4d_G80d', 'DG3', 'DG3_G4d', 'DG3_G4d_G80d', 'DGrep', 'DGrep_G4d', 'DGrep_G4d_G80d', 'DG80', 'DG80_G4d', 'DG80_G4d_G80d', 'G2GAI', 'G2GAE', 'G1GAI']
2025-03-26 19:27:36,914 - INFO - names: ['R1', 'R2', 'R3', 'R4', 'reporter_rna', 'R80', 'G1', 'G2', 'G3', 'G3i', 'G4', 'G4d', 'reporter', 'G80', 'G80C', 'G80d', 'G80Cd', 'G80G3i', 'GAI', 'DG1', 'DG1_G4d', 'DG1_G4d_

In [7]:
# Save to CSV files
rdme_df = pd.DataFrame(rdme_results)
cme_df = pd.DataFrame(cme_results)

rdme_csv_path = os.path.join(fig_dir, 'rdme_species_statistics.csv')
cme_csv_path = os.path.join(fig_dir, 'cme_species_statistics.csv')

rdme_df.to_csv(rdme_csv_path, index=False)
cme_df.to_csv(cme_csv_path, index=False)

logging.info(f"RDME statistics saved to: {rdme_csv_path}")
logging.info(f"CME statistics saved to: {cme_csv_path}")

2025-03-26 19:27:40,426 - INFO - RDME statistics saved to: /data2/2024_Yeast_GS/my_current_code/rdme_ode_results/20250310_wtnoer_60min/figures_rdmecme_comparison/rdme_species_statistics.csv
2025-03-26 19:27:40,426 - INFO - CME statistics saved to: /data2/2024_Yeast_GS/my_current_code/rdme_ode_results/20250310_wtnoer_60min/figures_rdmecme_comparison/cme_species_statistics.csv


plot comparison graphs, this part can run separately

In [8]:
# Read the saved statistics
rdme_df = pd.read_csv(os.path.join(fig_dir, 'rdme_species_statistics.csv'))
cme_df = pd.read_csv(os.path.join(fig_dir, 'cme_species_statistics.csv'))

# Function to convert string of comma-separated values to numpy array
def str_to_array(s):
    return np.array([float(x) for x in s.split(',')])

# Debug: Print available species
print("Available species in RDME:", rdme_df['Species'].tolist())
print("Available species in CME:", cme_df['Species'].tolist())

# plot based on CME species
cme_species = set(cme_df['Species'].unique())


# Plot settings
plt.style.use('default')
plt.rcParams['figure.figsize'] = [10, 6]
plt.rcParams['figure.dpi'] = 600
plt.rcParams['font.size'] = 18  # Increase base font size
plt.rcParams['axes.titlesize'] = 28  # Increase title font size
plt.rcParams['axes.labelsize'] = 18  # Increase axis label font size
plt.rcParams['xtick.labelsize'] = 18  # Increase tick label font size
plt.rcParams['ytick.labelsize'] = 18  # Increase tick label font size
plt.rcParams['legend.fontsize'] = 18  # Increase legend font size

Available species in RDME: ['RDME_DGrep', 'RDME_DGrep_G4d', 'RDME_DGrep_G4d_G80d', 'RDME_Rrep', 'RDME_Grep', 'RDME_DG1', 'RDME_DG1_G4d', 'RDME_DG1_G4d_G80d', 'RDME_R1', 'RDME_G1', 'RDME_DG2', 'RDME_DG2_G4d', 'RDME_DG2_G4d_G80d', 'RDME_R2', 'RDME_G2', 'RDME_DG3', 'RDME_DG3_G4d', 'RDME_DG3_G4d_G80d', 'RDME_R3', 'RDME_G3', 'RDME_G3i', 'RDME_DG4', 'RDME_R4', 'RDME_G4', 'RDME_G4d', 'RDME_DG80', 'RDME_DG80_G4d', 'RDME_DG80_G4d_G80d', 'RDME_R80', 'RDME_G80', 'RDME_G80d', 'RDME_G80d_G3i', 'RDME_ribosome', 'RDME_ribosomeR1', 'RDME_ribosomeR2', 'RDME_ribosomeR3', 'RDME_ribosomeR4', 'RDME_ribosomeR80', 'RDME_ribosomeGrep', 'RDME_DG80_G4d_G80d_extracellular', 'RDME_DG80_G4d_G80d_cellWall', 'RDME_DG80_G4d_G80d_nuclearEnvelope', 'RDME_DG80_G4d_G80d_mitochondria', 'RDME_DG80_G4d_G80d_vacuole', 'RDME_DG80_G4d_G80d_plasmaMembrane', 'RDME_DG80_G4d_G80d_cytoplasm', 'RDME_DG80_G4d_G80d_nucleoplasm', 'RDME_DG80_G4d_G80d_ribosomes', 'RDME_ribosomeR1_extracellular', 'RDME_ribosomeR1_cellWall', 'RDME_ribosome

In [9]:
print(cme_species)

{'DG80_G4d_G80d', 'DG1_G4d_G80d', 'R4', 'reporter_rna', 'G80Cd', 'DGrep_G4d', 'G80', 'R3', 'DG80_G4d', 'R1', 'DG2_G4d_G80d', 'DGrep', 'reporter', 'DG3_G4d', 'G1GAI', 'DG1_G4d', 'G80d', 'DG80', 'DG2', 'G80C', 'DG3_G4d_G80d', 'G3i', 'R80', 'DG3', 'DGrep_G4d_G80d', 'GAI', 'G2GAI', 'G80G3i', 'G1', 'DG1', 'DG2_G4d', 'G3', 'G4', 'G2GAE', 'G4d', 'G2', 'R2'}


In [10]:
for species_name in cme_species:
    species_mapping = {
        # 'cme_name': ['rdme_name1', 'rdme_name2'] - map to multiple RDME names for separate graphs
        'G80G3i': ['G80d_G3i'], 
        'reporter': ['Grep'], 
        'reporter_rna': ['Rrep'],
        'G80': ['G80_nucleoplasm'],  # Create separate graphs for each mapping
        'G80C': ['G80_cytoplasm'],
        'G80d': ['G80d_nucleoplasm'],
        'G80Cd': ['G80d_cytoplasm'],
        'G2': ['G2', 'G2_plasmaMembrane', 'G2_cytoplasm'],  # Create separate graphs for each mapping
        'G4': ['G4_nucleoplasm', 'G4_cytoplasm'],
        'G4d': ['G4d_nucleoplasm', 'G4d_cytoplasm']
        # Add mappings as needed based on the differences found
    }
    
    # Get the list of RDME species to check for this CME species
    rdme_species_list = [species_name]  # Default to the same name
    if species_name in species_mapping:
        rdme_species_list = species_mapping[species_name]
    
    # Function to extract the actual species name after RDME_ or ODE_ prefix
    def extract_species_name(full_name):
        if full_name.startswith('RDME_') or full_name.startswith('ODE_'):
            return full_name.split('_', 1)[1]  # Split only on the first underscore
        return full_name
    
    # Create a separate plot for each RDME mapping
    for rdme_species_to_check in rdme_species_list:
        fig, ax = plt.subplots()
        
        # Match species after the RDME_ or ODE_ prefix
        matching_rows = rdme_df[rdme_df['Species'].apply(
            lambda x: extract_species_name(x) == rdme_species_to_check
        )]

        # If multiple rows are found, prioritize the one starting with 'RDME'
        if not matching_rows.empty:
            rdme_species_rows = matching_rows[matching_rows['Species'].str.startswith('RDME')]
            # If no match starts with 'RDME', default to the full list of matches
            if rdme_species_rows.empty:
                rdme_species_rows = matching_rows
        else:
            rdme_species_rows = pd.DataFrame()  # Empty DataFrame if no matches
        
        cme_species_rows = cme_df[cme_df['Species'] == species_name]
        
        if len(rdme_species_rows) == 0 or len(cme_species_rows) == 0:
            print(f"Skipping {species_name} -> {rdme_species_to_check} - data not found")
            plt.close()
            continue
            
        er_data = rdme_species_rows.iloc[0]
        noer_data = cme_species_rows.iloc[0]
        
        time = str_to_array(er_data['Time'])
        er_avg = str_to_array(er_data['Average'])
        er_std = str_to_array(er_data['Std'])
        noer_avg = str_to_array(noer_data['Average'])
        noer_std = str_to_array(noer_data['Std'])
        
        # Extract the part after underscore for display
        display_name = species_name
        if species_name == 'GAI':
            er_avg = er_avg / NAV * 1e3
            er_std = er_std / NAV * 1e3
        # Plot ER
        ax.plot(time, er_avg, label=f'RDME-ODE', linestyle='-')
        ax.fill_between(time, er_avg - er_std, er_avg + er_std, alpha=0.2)
        
        # Plot NOER
        ax.plot(time, noer_avg, label=f'CME-ODE', linestyle='--')
        ax.fill_between(time, noer_avg - noer_std, noer_avg + noer_std, alpha=0.2)
        
        # Customize plot
        ax.set_xlabel('Time (min)')
        if species_name == 'GAI':
            ax.set_ylabel('Concentration (mM)')
        elif "DG" in species_name:
            ax.set_ylabel('Probability')
        else:
            ax.set_ylabel('Counts')
        # ax.set_title(f'{display_name} Comparison')
        ax.legend(framealpha=0.3, loc='upper right')
        ax.grid(False)
        
        # Save figure with specific RDME mapping in filename
        fig_path = os.path.join(fig_dir, f'{species_name}_vs_{rdme_species_to_check}_comparison.png')
        plt.savefig(fig_path, dpi=600, bbox_inches='tight')
        print(f"Saved plot for {display_name} vs {rdme_species_to_check}")
        plt.close()

print(f"\nPlots saved in: {fig_dir}")

Saved plot for DG80_G4d_G80d vs DG80_G4d_G80d
Saved plot for DG1_G4d_G80d vs DG1_G4d_G80d
Saved plot for R4 vs R4
Saved plot for reporter_rna vs Rrep
Saved plot for G80Cd vs G80d_cytoplasm
Saved plot for DGrep_G4d vs DGrep_G4d
Saved plot for G80 vs G80_nucleoplasm
Saved plot for R3 vs R3
Saved plot for DG80_G4d vs DG80_G4d
Saved plot for R1 vs R1
Saved plot for DG2_G4d_G80d vs DG2_G4d_G80d
Saved plot for DGrep vs DGrep
Saved plot for reporter vs Grep
Saved plot for DG3_G4d vs DG3_G4d
Saved plot for G1GAI vs G1GAI
Saved plot for DG1_G4d vs DG1_G4d
Saved plot for G80d vs G80d_nucleoplasm
Saved plot for DG80 vs DG80
Saved plot for DG2 vs DG2
Saved plot for G80C vs G80_cytoplasm
Saved plot for DG3_G4d_G80d vs DG3_G4d_G80d
Saved plot for G3i vs G3i
Saved plot for R80 vs R80
Saved plot for DG3 vs DG3
Saved plot for DGrep_G4d_G80d vs DGrep_G4d_G80d
Saved plot for GAI vs GAI
Saved plot for G2GAI vs G2GAI
Saved plot for G80G3i vs G80d_G3i
Saved plot for G1 vs G1
Saved plot for DG1 vs DG1
Saved 

This is for G2 total

In [11]:
# Create combined G2 species plot
fig, ax = plt.subplots()

# List of species to combine
g2_species = ['G2', 'G2GAE', 'G2GAI']

# Initialize arrays for RDME and CME data
rdme_combined_avg = None
rdme_combined_var = None
cme_combined_avg = None
cme_combined_var = None
time = None

# For tracking which species are actually used
rdme_species_used = []
cme_species_used = []

# Combine RDME data
for species_name in g2_species:
    matching_rows = rdme_df[rdme_df['Species'].str.contains(species_name)]
    if not matching_rows.empty:
        # Modified to use ODE data instead of RDME data
        rdme_species_data = matching_rows[matching_rows['Species'].str.startswith('ODE')]
        
        if len(rdme_species_data) > 0:
            er_data = rdme_species_data.iloc[0]
            # Track which species are being used
            rdme_species_used.append(er_data['Species'])
            
            curr_avg = str_to_array(er_data['Average'])
            curr_std = str_to_array(er_data['Std'])
            curr_var = curr_std ** 2  # Convert std to variance
            
            if rdme_combined_avg is None:
                time = str_to_array(er_data['Time'])
                rdme_combined_avg = curr_avg
                rdme_combined_var = curr_var
            else:
                rdme_combined_avg += curr_avg
                rdme_combined_var += curr_var  # Variances add for independent variables

# Combine CME data
for species_name in g2_species:
    cme_species_data = cme_df[cme_df['Species'] == species_name]
    
    if len(cme_species_data) > 0:
        noer_data = cme_species_data.iloc[0]
        # Track which species are being used
        cme_species_used.append(noer_data['Species'])
        
        curr_avg = str_to_array(noer_data['Average'])
        curr_std = str_to_array(noer_data['Std'])
        curr_var = curr_std ** 2  # Convert std to variance
        
        if cme_combined_avg is None:
            cme_combined_avg = curr_avg
            cme_combined_var = curr_var
        else:
            cme_combined_avg += curr_avg
            cme_combined_var += curr_var  # Variances add for independent variables

# Print which species were actually used
print("RDME species used in G2 total:", rdme_species_used)
print("CME species used in G2 total:", cme_species_used)

# Convert combined variances back to standard deviations
rdme_combined_std = np.sqrt(rdme_combined_var)
cme_combined_std = np.sqrt(cme_combined_var)

# Plot RDME
ax.plot(time, rdme_combined_avg, label='RDME-ODE', linestyle='-')
ax.fill_between(time, rdme_combined_avg - rdme_combined_std, 
                rdme_combined_avg + rdme_combined_std, alpha=0.2)

# Plot CME
ax.plot(time, cme_combined_avg, label='CME-ODE', linestyle='--')
ax.fill_between(time, cme_combined_avg - cme_combined_std, 
                cme_combined_avg + cme_combined_std, alpha=0.2)

# Customize plot
ax.set_xlabel('Time (min)')
ax.set_ylabel('Counts')
# ax.set_title('Total G2 Species Comparison (G2 + G2GAE + G2GAI)')
ax.legend(framealpha=0.3, loc='upper right')
ax.grid(False)

# Save figure
plt.tight_layout()
fig_path = os.path.join(fig_dir, 'G2_total_comparison.png')
plt.savefig(fig_path, dpi=600, bbox_inches='tight')
print(f"Saved combined G2 total plot")
plt.close()

RDME species used in G2 total: ['ODE_G2GAI', 'ODE_G2GAE', 'ODE_G2GAI']
CME species used in G2 total: ['G2', 'G2GAE', 'G2GAI']
Saved combined G2 total plot


Save GAI total plot 

In [12]:
# Create combined GAI species plot
fig, ax = plt.subplots()

# List of species to combine
gai_species = ['GAI', 'G1GAI', 'G3i', 'G2GAI']

# Initialize arrays for RDME and CME data
rdme_combined_avg = None
rdme_combined_var = None
cme_combined_avg = None
cme_combined_var = None
time = None

# For tracking which species are actually used
rdme_species_used = []
cme_species_used = []

# Combine RDME data
for species_name in gai_species:
    # Match species after the RDME_ or ODE_ prefix
    matching_rows = rdme_df[rdme_df['Species'].apply(
        lambda x: extract_species_name(x) == species_name
    )]
  
    if not matching_rows.empty:
        
        
        rdme_species_data = matching_rows
            
        if len(rdme_species_data) > 0:
            er_data = rdme_species_data.iloc[0]
            # Track which species are being used
            rdme_species_used.append(er_data['Species'])
            
            curr_avg = str_to_array(er_data['Average'])
            curr_std = str_to_array(er_data['Std'])
            # Convert counts to mM
            curr_avg = curr_avg / NAV * 1e3  # NAV*1e3 for RDME conversion
            curr_std = curr_std / NAV * 1e3
            curr_var = curr_std ** 2  # Convert std to variance
            
            if rdme_combined_avg is None:
                time = str_to_array(er_data['Time'])
                rdme_combined_avg = curr_avg
                rdme_combined_var = curr_var
            else:
                rdme_combined_avg += curr_avg
                rdme_combined_var += curr_var  # Variances add for independent variables

# Combine CME data
for species_name in gai_species:
    cme_species_data = cme_df[cme_df['Species'] == species_name]
    
    if len(cme_species_data) > 0:
        noer_data = cme_species_data.iloc[0]
        # Track which species are being used
        cme_species_used.append(noer_data['Species'])
        
        curr_avg = str_to_array(noer_data['Average'])
        curr_std = str_to_array(noer_data['Std'])
        # Convert counts to mM if not already converted
        # if species_name != 'GAI':  # GAI is already converted in the CME processing
        if species_name != 'GAI':
            count2concentration = 4.65e-8  # molecule/cell to mM
            curr_avg = curr_avg * count2concentration
            curr_std = curr_std * count2concentration
            curr_var = curr_std ** 2  # Convert std to variance
        else:
            curr_avg = curr_avg 
            curr_std = curr_std 
            curr_var = curr_std ** 2  # Convert std to variance
        
        if cme_combined_avg is None:
            cme_time = str_to_array(noer_data['Time'])
            cme_combined_avg = curr_avg
            cme_combined_var = curr_var
        else:
            cme_combined_avg += curr_avg
            cme_combined_var += curr_var  # Variances add for independent variables

# Print which species were actually used
print("RDME species used in GAI total:", rdme_species_used)
print("CME species used in GAI total:", cme_species_used)

# Convert combined variances back to standard deviations
if rdme_combined_var is not None:
    rdme_combined_std = np.sqrt(rdme_combined_var)
if cme_combined_var is not None:
    cme_combined_std = np.sqrt(cme_combined_var)

# Plot RDME if data exists
if rdme_combined_avg is not None and time is not None:
    ax.plot(time, rdme_combined_avg, label='RDME-ODE', linestyle='-')
    ax.fill_between(time, rdme_combined_avg - rdme_combined_std, 
                    rdme_combined_avg + rdme_combined_std, alpha=0.2)

# Plot CME if data exists
if cme_combined_avg is not None and 'cme_time' in locals():
    ax.plot(cme_time, cme_combined_avg, label='CME-ODE', linestyle='--')
    ax.fill_between(cme_time, cme_combined_avg - cme_combined_std, 
                    cme_combined_avg + cme_combined_std, alpha=0.2)

# Add horizontal line for GAE = 11.1mM with a more fitting color
ax.axhline(y=11.1, color='gray', linestyle='-.', linewidth=2, label='GAE')
ax.text(time[0]*1.05, 10.8, '11.1 mM', color='gray', fontsize=16, va='top', ha='left')
# Customize plot
ax.set_xlabel('Time (min)')
ax.set_ylabel('Concentration (mM)')
# ax.set_title('Total GAI Species Comparison (GAI + G1GAI + G3i + G2GAI)')
ax.legend(framealpha=0.3, loc='upper right')
ax.grid(False)

# Save figure
# plt.tight_layout()
fig_path = os.path.join(fig_dir, 'GAI_total_comparison.png')
plt.savefig(fig_path, dpi=600, bbox_inches='tight')
print(f"Saved combined GAI total plot")
plt.close()

RDME species used in GAI total: ['ODE_GAI', 'ODE_G1GAI', 'RDME_G3i', 'ODE_G2GAI']
CME species used in GAI total: ['GAI', 'G1GAI', 'G3i', 'G2GAI']
Saved combined GAI total plot
