# ER_compare
This is the code used for extracting the data from RDME-ODE simulation results and comparing various trajectories of species with and without ER.

In [1]:
%run env.ipynb
import pickle
import os
import numpy as np
from jLM.RDME import File as RDMEFile
import jLM
import json
import matplotlib.pyplot as plt
import seaborn as sns
from traj_analysis_rdme import *
from tqdm import tqdm
import pandas as pd
import logging

er_traj_dir = "/data2/2024_Yeast_GS/my_current_code/rdme_ode_results/20250224_ER_pmaside_ribo"
noer_traj_dir = "/data2/2024_Yeast_GS/my_current_code/rdme_ode_results/20250120_11_1_60_Normal_newtomo"
fig_dir = os.path.join(er_traj_dir, 'figures_ernoer_comparison/')

if not os.path.exists(fig_dir):
    os.makedirs(fig_dir)
# Configure logging
log_file = os.path.join(fig_dir, 'run_log.log')
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler(log_file),
        logging.StreamHandler()
    ]
)


logging.info(f"This is the file to compare between ER and nonER data: {er_traj_dir} and {noer_traj_dir}")



2025-02-27 19:37:10,256 - INFO - This is the file to compare between ER and nonER data: /data2/2024_Yeast_GS/my_current_code/rdme_ode_results/20250224_ER_pmaside_ribo and /data2/2024_Yeast_GS/my_current_code/rdme_ode_results/20250120_11_1_60_Normal_newtomo


default, get data required

In [2]:


er_files = [f for f in os.listdir(er_traj_dir) if f.startswith('yeastER') and f.endswith('.lm')]
noer_files = [f for f in os.listdir(noer_traj_dir) if f.startswith('yeast1.1') and f.endswith('.lm')]
traj_suff = "_ode.jsonl"

logging.info(f"ER files: {er_files}")
logging.info(f"NOER files: {noer_files}")

# Initialize dictionaries to store data for each species
er_species_data = {}
noer_species_data = {}
er_ode_data = {}
noer_ode_data = {}
rdmeTs = None
odeTs = None

# Process ER files
for traj_file in tqdm(er_files, desc="Processing ER files", unit="file"):
    logging.info(f"Processing ER file: {traj_file}")
    traj, odeTraj, region_traj = get_traj(er_traj_dir, traj_file, traj_suff)
    curr_rdmeTs, rdmeYs, curr_odeTs, odeYs, _, _ = get_data_for_plot(traj, odeTraj, region_traj=None, sparse_factor=1)
    NAV_ER = 6.022e23 * (traj.reg.cytoplasm.volume + traj.reg.nucleoplasm.volume + traj.reg.plasmaMembrane.volume)
    if rdmeTs is None:
        rdmeTs = curr_rdmeTs
        odeTs = curr_odeTs

    for species, data in rdmeYs.items():
        if species not in er_species_data:
            er_species_data[species] = []
        er_species_data[species].append(data)

    for species, data in odeYs.items():
        if species not in er_ode_data:
            er_ode_data[species] = []
        er_ode_data[species].append(data)

# Process NOER files
for traj_file in tqdm(noer_files, desc="Processing NOER files", unit="file"):
    logging.info(f"Processing NOER file: {traj_file}")
    traj, odeTraj, _ = get_traj(noer_traj_dir, traj_file, traj_suff)
    _, rdmeYs, _, odeYs, _, _ = get_data_for_plot(traj, odeTraj, region_traj=None, sparse_factor=1)
    NAV_NOER = 6.022e23 * (traj.reg.cytoplasm.volume + traj.reg.nucleoplasm.volume)
    for species, data in rdmeYs.items():
        if species not in noer_species_data:
            noer_species_data[species] = []
        noer_species_data[species].append(data)

    for species, data in odeYs.items():
        if species not in noer_ode_data:
            noer_ode_data[species] = []
        noer_ode_data[species].append(data)

# Calculate and save ER statistics
er_results = []
for species, trajectories in er_species_data.items():
    trajectories_array = np.array(trajectories)
    avg = np.mean(trajectories_array, axis=0)
    std = np.std(trajectories_array, axis=0)
    
    er_results.append({
        'Species': f"RDME_{species}",
        'Time': ','.join(map(str, rdmeTs)),
        'Average': ','.join(map(str, avg)),
        'Std': ','.join(map(str, std))
    })

for species, trajectories in er_ode_data.items():
    trajectories_array = np.array(trajectories)
    avg = np.mean(trajectories_array, axis=0)
    std = np.std(trajectories_array, axis=0)
    
    er_results.append({
        'Species': f"ODE_{species}",
        'Time': ','.join(map(str, odeTs)),
        'Average': ','.join(map(str, avg)),
        'Std': ','.join(map(str, std))
    })

# Calculate and save NOER statistics
noer_results = []
for species, trajectories in noer_species_data.items():
    trajectories_array = np.array(trajectories)
    avg = np.mean(trajectories_array, axis=0)
    std = np.std(trajectories_array, axis=0)
    
    noer_results.append({
        'Species': f"RDME_{species}",
        'Time': ','.join(map(str, rdmeTs)),
        'Average': ','.join(map(str, avg)),
        'Std': ','.join(map(str, std))
    })

for species, trajectories in noer_ode_data.items():
    trajectories_array = np.array(trajectories)
    avg = np.mean(trajectories_array, axis=0)
    std = np.std(trajectories_array, axis=0)
    
    noer_results.append({
        'Species': f"ODE_{species}",
        'Time': ','.join(map(str, odeTs)),
        'Average': ','.join(map(str, avg)),
        'Std': ','.join(map(str, std))
    })

# Save to CSV files
er_df = pd.DataFrame(er_results)
noer_df = pd.DataFrame(noer_results)

er_csv_path = os.path.join(fig_dir, 'er_species_statistics.csv')
noer_csv_path = os.path.join(fig_dir, 'noer_species_statistics.csv')

er_df.to_csv(er_csv_path, index=False)
noer_df.to_csv(noer_csv_path, index=False)

logging.info(f"ER statistics saved to: {er_csv_path}")
logging.info(f"NOER statistics saved to: {noer_csv_path}")


2025-02-27 19:37:10,272 - INFO - ER files: ['yeastER1.12er_mt_multi_20250222_13_t60.0minGAE11.1mMpmaER_topm_no_ribo_gpu4.lm', 'yeastER1.12er_mt_multi_20250222_12_t60.0minGAE11.1mMpmaER_topm_no_ribo_gpu4.lm', 'yeastER1.12er_mt_multi_20250222_11_t60.0minGAE11.1mMpmaER_topm_no_ribo_gpu4.lm']
2025-02-27 19:37:10,273 - INFO - NOER files: ['yeast1.13.1mt_multi_20250105_2_t60.0minGAE11.1mMdt_4gpu_gpu4.lm', 'yeast1.13.1mt_multi_20250115_3_t60.0minGAE11.1mMdt_4gpu_gpu4.lm', 'yeast1.13.1mt_multi_20241229_1_t60.0minGAE11.1mMdt_4gpu_gpu4.lm']
Processing ER files:   0%|          | 0/3 [00:00<?, ?file/s]2025-02-27 19:37:10,275 - INFO - Processing ER file: yeastER1.12er_mt_multi_20250222_13_t60.0minGAE11.1mMpmaER_topm_no_ribo_gpu4.lm
Processing ER files:  33%|███▎      | 1/3 [00:01<00:03,  1.75s/file]2025-02-27 19:37:12,021 - INFO - Processing ER file: yeastER1.12er_mt_multi_20250222_12_t60.0minGAE11.1mMpmaER_topm_no_ribo_gpu4.lm
Processing ER files:  67%|██████▋   | 2/3 [00:03<00:01,  1.73s/file]202

plot comparison graphs

In [3]:
# Read the saved statistics
er_df = pd.read_csv(os.path.join(fig_dir, 'er_species_statistics.csv'))
noer_df = pd.read_csv(os.path.join(fig_dir, 'noer_species_statistics.csv'))

# Function to convert string of comma-separated values to numpy array
def str_to_array(s):
    return np.array([float(x) for x in s.split(',')])

# Debug: logging.info available species
logging.info(f"Available species in ER:  {er_df['Species'].tolist()}")
logging.info(f"Available species in NOER:  {noer_df['Species'].tolist()}")

# Get unique species names directly from the CSV
unique_species = set(er_df['Species'].unique()) | set(noer_df['Species'].unique())
logging.info(f"\nUnique species:  {unique_species}")

# Plot settings
# Common plot settings for poster presentation
plt.style.use('default')
plt.rcParams['figure.figsize'] = [10, 6]
plt.rcParams['figure.dpi'] = 600
plt.rcParams['font.size'] = 18  # Increase base font size
plt.rcParams['axes.titlesize'] = 28  # Increase title font size
plt.rcParams['axes.labelsize'] = 18  # Increase axis label font size
plt.rcParams['xtick.labelsize'] = 18  # Increase tick label font size
plt.rcParams['ytick.labelsize'] = 18  # Increase tick label font size
plt.rcParams['legend.fontsize'] = 18  # Increase legend font size

# Create plots for each species
for species_name in unique_species:
    fig, ax = plt.subplots()
    
    # Safely get data
    er_species_data = er_df[er_df['Species'] == species_name]
    noer_species_data = noer_df[noer_df['Species'] == species_name]
    
    if len(er_species_data) == 0 or len(noer_species_data) == 0:
        logging.info(f"Skipping {species_name} - data not found")
        continue
        
    er_data = er_species_data.iloc[0]
    noer_data = noer_species_data.iloc[0]
    
    time = str_to_array(er_data['Time'])
    er_avg = str_to_array(er_data['Average'])
    er_std = str_to_array(er_data['Std'])
    noer_avg = str_to_array(noer_data['Average'])
    noer_std = str_to_array(noer_data['Std'])
    
    # display name
    display_name = species_name.split('_', 1)[1] if '_' in species_name else species_name
    # Replace any subsequent underscores with colons
    display_name = display_name.replace('_', ':')
    # Plot ER
    ax.plot(time, er_avg, label=f'ER', linestyle='-')
    ax.fill_between(time, er_avg - er_std, er_avg + er_std, alpha=0.2)
    
    # Plot NOER
    ax.plot(time, noer_avg, label=f'NOER', linestyle='--')
    ax.fill_between(time, noer_avg - noer_std, noer_avg + noer_std, alpha=0.2)
    
    # Customize plot
    ax.set_xlabel('Time (min)')
    ax.set_ylabel('Counts')
    # ax.set_title(f'{species_name} Comparison')
    ax.legend(framealpha=0.3, loc='upper right')
    ax.grid(False)
    
    # Save figure
    plt.tight_layout()
    fig_path = os.path.join(fig_dir, f'{species_name}_comparison.png')
    plt.savefig(fig_path, dpi=600, bbox_inches='tight')
    logging.info(f"Saved plot for {display_name}")
    plt.close()

logging.info(f"\nPlots saved in: {fig_dir}")
logging.getLogger().handlers[0].flush() 

2025-02-27 19:37:21,182 - INFO - Available species in ER:  ['RDME_DGrep', 'RDME_DGrep_G4d', 'RDME_DGrep_G4d_G80d', 'RDME_Rrep', 'RDME_Grep', 'RDME_DG1', 'RDME_DG1_G4d', 'RDME_DG1_G4d_G80d', 'RDME_R1', 'RDME_G1', 'RDME_DG2', 'RDME_DG2_G4d', 'RDME_DG2_G4d_G80d', 'RDME_R2', 'RDME_G2', 'RDME_DG3', 'RDME_DG3_G4d', 'RDME_DG3_G4d_G80d', 'RDME_R3', 'RDME_G3', 'RDME_G3i', 'RDME_DG4', 'RDME_R4', 'RDME_G4', 'RDME_G4d', 'RDME_DG80', 'RDME_DG80_G4d', 'RDME_DG80_G4d_G80d', 'RDME_R80', 'RDME_G80', 'RDME_G80d', 'RDME_G80d_G3i', 'RDME_ribosome', 'RDME_ribosomeR1', 'RDME_ribosomeR2', 'RDME_ribosomeR3', 'RDME_ribosomeR4', 'RDME_ribosomeR80', 'RDME_ribosomeGrep', 'ODE_GAI', 'ODE_G1', 'ODE_G1GAI', 'ODE_G2GAI', 'ODE_G2GAE', 'ODE_G2']
2025-02-27 19:37:21,183 - INFO - Available species in NOER:  ['RDME_DGrep', 'RDME_DGrep_G4d', 'RDME_DGrep_G4d_G80d', 'RDME_Rrep', 'RDME_Grep', 'RDME_DG1', 'RDME_DG1_G4d', 'RDME_DG1_G4d_G80d', 'RDME_R1', 'RDME_G1', 'RDME_DG2', 'RDME_DG2_G4d', 'RDME_DG2_G4d_G80d', 'RDME_R2', 'RDM

This is for G2 in the membrane


In [4]:
# Read the saved statistics
er_df = pd.read_csv(os.path.join(fig_dir, 'er_species_statistics.csv'))
noer_df = pd.read_csv(os.path.join(fig_dir, 'noer_species_statistics.csv'))

def str_to_array(s):
    return np.array([float(x) for x in s.split(',')])

# Calculate G2 totals for ER data
er_g2_data = er_df[er_df['Species'].isin(['ODE_G2', 'ODE_G2GAE', 'ODE_G2GAI'])].copy()
if len(er_g2_data) > 0:
    time = str_to_array(er_g2_data.iloc[0]['Time'])
    er_total = np.zeros_like(str_to_array(er_g2_data.iloc[0]['Average']))
    er_std_squared = np.zeros_like(er_total)
    
    for _, row in er_g2_data.iterrows():
        er_total += str_to_array(row['Average'])
        er_std_squared += str_to_array(row['Std'])**2
    er_total_std = np.sqrt(er_std_squared)

# Calculate G2 totals for NOER data
noer_g2_data = noer_df[noer_df['Species'].isin(['ODE_G2', 'ODE_G2GAE', 'ODE_G2GAI'])].copy()
if len(noer_g2_data) > 0:
    noer_total = np.zeros_like(str_to_array(noer_g2_data.iloc[0]['Average']))
    noer_std_squared = np.zeros_like(noer_total)
    
    for _, row in noer_g2_data.iterrows():
        noer_total += str_to_array(row['Average'])
        noer_std_squared += str_to_array(row['Std'])**2
    noer_total_std = np.sqrt(noer_std_squared)

# Create the plot
plt.figure(figsize=(10, 6))
plt.plot(time, er_total, label='ER', linestyle='-')
plt.fill_between(time, er_total - er_total_std, er_total + er_total_std, alpha=0.2)

plt.plot(time, noer_total, label='NOER', linestyle='--')
plt.fill_between(time, noer_total - noer_total_std, noer_total + noer_total_std, alpha=0.2)

plt.xlabel('Time (min)')
plt.ylabel('Counts')
# plt.title('Total G2 Comparison (G2 + G2GAE + G2GAI)')
plt.legend(framealpha=0.3, loc='upper right')
plt.grid(False)

# Save figure
plt.tight_layout()
fig_path = os.path.join(fig_dir, 'G2_membrane_comparison.png')
plt.savefig(fig_path, dpi=600, bbox_inches='tight')
logging.info(f"Saved plot for G2 membrane")
plt.close()
logging.getLogger().handlers[0].flush() 

2025-02-27 19:38:10,698 - INFO - Saved plot for G2 membrane


This is for GAI total

In [5]:

def str_to_array(s):
    return np.array([float(x) for x in s.split(',')])

# Create combined GAI species plot
fig, ax = plt.subplots(figsize=(10, 6))

# List of species to combine
gai_species = ['GAI', 'G1GAI', 'G3i', 'G2GAI']

# Initialize arrays for ER and NOER data
er_combined_avg = None
er_combined_var = None
noer_combined_avg = None
noer_combined_var = None
time = None

# For tracking which species are actually used
er_species_used = []
noer_species_used = []

# Combine ER data
for species_name in gai_species:
    # Look for both ODE and RDME versions of the species
    matching_rows = er_df[er_df['Species'].str.contains(species_name)]
    
    if not matching_rows.empty:
        # Prefer ODE data if available
        er_species_data = matching_rows[matching_rows['Species'].str.startswith('ODE')]
        if er_species_data.empty:
            er_species_data = matching_rows
            
        if len(er_species_data) > 0:
            er_data = er_species_data.iloc[0]
            # Track which species are being used
            er_species_used.append(er_data['Species'])
            
            curr_avg = str_to_array(er_data['Average']) / NAV_ER * 1e3
            curr_std = str_to_array(er_data['Std']) / NAV_ER * 1e3
            curr_var = curr_std ** 2  # Convert std to variance
            
            if er_combined_avg is None:
                time = str_to_array(er_data['Time'])
                er_combined_avg = curr_avg
                er_combined_var = curr_var
            else:
                er_combined_avg += curr_avg
                er_combined_var += curr_var  # Variances add for independent variables

# Combine NOER data
for species_name in gai_species:
    # Look for both ODE and RDME versions of the species
    matching_rows = noer_df[noer_df['Species'].str.contains(species_name)]
    
    if not matching_rows.empty:
        # Prefer ODE data if available
        noer_species_data = matching_rows[matching_rows['Species'].str.startswith('ODE')]
        if noer_species_data.empty:
            noer_species_data = matching_rows
            
        if len(noer_species_data) > 0:
            noer_data = noer_species_data.iloc[0]
            # Track which species are being used
            noer_species_used.append(noer_data['Species'])
            
            curr_avg = str_to_array(noer_data['Average']) / NAV_NOER * 1e3
            curr_std = str_to_array(noer_data['Std']) / NAV_NOER * 1e3
            curr_var = curr_std ** 2  # Convert std to variance
            
            if noer_combined_avg is None:
                noer_combined_avg = curr_avg
                noer_combined_var = curr_var
            else:
                noer_combined_avg += curr_avg
                noer_combined_var += curr_var  # Variances add for independent variables

# Print which species were actually used
logging.info("ER species used in GAI total: " + str(er_species_used))
logging.info("NOER species used in GAI total: " + str(noer_species_used))

# Convert combined variances back to standard deviations
if er_combined_var is not None:
    er_combined_std = np.sqrt(er_combined_var)
if noer_combined_var is not None:
    noer_combined_std = np.sqrt(noer_combined_var)

# Plot ER if data exists
if er_combined_avg is not None and time is not None:
    ax.plot(time, er_combined_avg, label='ER', linestyle='-')
    ax.fill_between(time, er_combined_avg - er_combined_std, 
                    er_combined_avg + er_combined_std, alpha=0.2)

# Plot NOER if data exists
if noer_combined_avg is not None:
    ax.plot(time, noer_combined_avg, label='NOER', linestyle='--')
    ax.fill_between(time, noer_combined_avg - noer_combined_std, 
                    noer_combined_avg + noer_combined_std, alpha=0.2)

# Add horizontal line for GAE = 11.1mM with a more fitting color
ax.axhline(y=11.1, color='gray', linestyle='-.', linewidth=1.5, label='GAE')
ax.text(time[0]*1.05, 10.8, '11.1 mM', color='gray', fontsize=16, va='top', ha='left')

# Customize plot
ax.set_xlabel('Time (min)')
ax.set_ylabel('Counts')
# ax.set_title('Total GAI Species Comparison (GAI + G1GAI + G3i + G2GAI)')
ax.legend(framealpha=0.3, loc='upper right')
ax.grid(False)

# Save figure
plt.tight_layout()
fig_path = os.path.join(fig_dir, 'GAI_total_comparison.png')
plt.savefig(fig_path, dpi=600, bbox_inches='tight')
logging.info(f"Saved plot for GAI total")
plt.close()
logging.getLogger().handlers[0].flush()

2025-02-27 19:38:10,750 - INFO - ER species used in GAI total: ['ODE_GAI', 'ODE_G1GAI', 'RDME_G3i', 'ODE_G2GAI']
2025-02-27 19:38:10,751 - INFO - NOER species used in GAI total: ['ODE_GAI', 'ODE_G1GAI', 'RDME_G3i', 'ODE_G2GAI']
2025-02-27 19:38:11,726 - INFO - Saved plot for GAI total
