### README
Jupyter Notebook to 

1. Print the time-dependent traces of all metabolites and fluxes into PDFs
2. Output time-averaged concs to Excel sheet
3. Output Statistics in GIP

### Set up analysis environment

#### Import Packages

In [None]:
import numpy as np
import os
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
from matplotlib.backends.backend_pdf import PdfPages
import numpy as np

# Custom Python Script
import WCM_analysis_4DWCM
import WCM_gene as gene
import WCM_diagnosis as diagnosis
import WCM_metabolites as metabolites
import WCM_math as math

#### Set up paths

In [None]:
simulationdate = "April2025"

# Read in pkl file
pkl_dir = f'/data/enguang/4DWCM/{simulationdate}/'
pkl_label = '4DWCMensemble_{0}'.format(simulationdate)
# create a WCM_analysis class
w = WCM_analysis_4DWCM.WCM_ensemble()
w.read_merged_ensemble(pkl_dir,pkl_label)

#### Initialize Parameters

In [None]:
gbfile_path = '/data/enguang/CMEODE/input_data/syn3A.gb'
genomeDict = gene.mapDNA(gbfile_path)
TypetoLocusNums, geneTypes = gene.categorizeGenes(gbfile_path)
w.read_genome(gbfile_path)

healthy_indices = np.arange(0,w.N_reps)
healthy_list = [index +1 for index in healthy_indices]
healthyTag = 'healthy'

# Cell Volume Doubling time
volume_doubling_times, not_doubled_volume_reps = diagnosis.get_volume_doubling_times(w, healthy_list)
# Cell Surface Doubling time
surface_doubling_times, not_doubled_surface_reps = diagnosis.get_surface_doubling_times(w, healthy_list)

### Plot Concentrations and Fluxes

In [None]:
# Set up Working folder directory
work_dir = f'/data/enguang/4DWCM/{simulationdate}/SI_concs_Fluxes/'
if not os.path.exists(work_dir):
        os.makedirs(work_dir)

        
intra_names = w.get_species_names_starts('M_')


intra_names.extend(['ptsi','ptsh','crr','ptsg','ptsi_P','ptsh_P','crr_P','ptsg_P'])

intra_names = [_ for _ in intra_names if _.find('shortage') == -1] # exclude M_atp_shortage etc

intra_names.sort()

# Copied from CMEODE output
CMEODE =  ['M_10fthfglu3_c', 'M_12dgr_c', 'M_13dpg_c', 'M_1ag3p_c', 'M_2dr1p_c', 'M_2dr5p_c', 'M_2pg_c', 'M_3pg_c', 'M_5fthf_c', 'M_5fthfglu3_c', 'M_ACP_R_c', 'M_ACP_c', 'M_ac_c', 'M_acald_c', 'M_accoa_c', 'M_actp_c', 'M_ade_c', 'M_adn_c', 'M_adp_c', 'M_ala__L_c', 'M_amp_c', 'M_ap_c', 'M_apoACP_c', 'M_arg__L_c', 'M_asn__L_c', 'M_asp__L_c', 'M_atp_c', 'M_ca2_c', 'M_cdp_c', 'M_cdpdag_c', 'M_chsterol_c', 'M_clpn_c', 'M_cmp_c', 'M_coa_c', 'M_ctp_c', 'M_cys__L_c', 'M_dad_2_c', 'M_dadp_c', 'M_damp_c', 'M_datp_c', 'M_dcdp_c', 'M_dcmp_c', 'M_dctp_c', 'M_dcyt_c', 'M_dgdp_c', 'M_dgmp_c', 'M_dgsn_c', 'M_dgtp_c', 'M_dhap_c', 'M_dhlpl_PdhC_c', 'M_dnad_c', 'M_dtdp_c', 'M_dtmp_c', 'M_dttp_c', 'M_dudp_c', 'M_dump_c', 'M_duri_c', 'M_dutp_c', 'M_e4p_c', 'M_f6p_c', 'M_fa_c', 'M_fad_c', 'M_fdp_c', 'M_fmettrna_c', 'M_fmn_c', 'M_g1p_c', 'M_g3p_c', 'M_g6p_c', 'M_galfur12dgr_c', 'M_gdp_c', 'M_gln__L_c', 'M_glu__L_c', 'M_gly_c', 'M_glyc3p_c', 'M_glyc_c', 'M_gmp_c', 'M_gsn_c', 'M_gtp_c', 'M_gua_c', 'M_his__L_c', 'M_ile__L_c', 'M_k_c', 'M_lac__L_c', 'M_leu__L_c', 'M_lpl_PdhC_c', 'M_lys__L_c', 'M_met__L_c', 'M_methfglu3_c', 'M_mettrna_c', 'M_mg2_c', 'M_mlthfglu3_c', 'M_na1_c', 'M_nac_c', 'M_nad_c', 'M_nadh_c', 'M_nadp_c', 'M_nadph_c', 'M_nh3_c', 'M_nicrnt_c', 'M_o2_c', 'M_pa_c', 'M_pap_c', 'M_pc_c', 'M_pep_c', 'M_pg3p_c', 'M_pg_c', 'M_phe__L_c', 'M_pi_c', 'M_ppi_c', 'M_pro__L_c', 'M_prpp_c', 'M_pydx5p_c', 'M_pyr_c', 'M_r1p_c', 'M_r5p_c', 'M_ribflv_c', 'M_ru5p__D_c', 'M_s7p_c', 'M_ser__L_c', 'M_sm_c', 'M_sprm_c', 'M_tag_c', 'M_thfglu3_c', 'M_thmpp_c', 'M_thr__L_c', 'M_thymd_c', 'M_trdox_c', 'M_trdrd_c', 'M_trp__L_c', 'M_tyr__L_c', 'M_udp_c', 'M_udpg_c', 'M_udpgal_c', 'M_udpgalfur_c', 'M_ump_c', 'M_ura_c', 'M_uri_c', 'M_utp_c', 'M_val__L_c', 'M_xu5p__D_c', 'crr', 'crr_P', 'ptsg', 'ptsg_P', 'ptsh', 'ptsh_P', 'ptsi', 'ptsi_P']

diff = set(intra_names) - set(CMEODE)

diff2 = set(CMEODE) -set(intra_names)

print(f"In 4DWCM but not in CMEODE: {diff}")
print(f"In CMEODE but not in 4DWCM: {diff2}")


tRNA_species = [_ for _ in intra_names if _.find('trna') != -1]
print(f"tRNA Charging related species: {tRNA_species}")
print(f"Apart from tRNA charging related species: {diff - set(tRNA_species)}")


extra_names = ['glc__D_e', 'lac__L_e', 'pyr_e', 'ac_e', 'glyc_e', 'fa_e', 
                  'chsterol_e', 'tag_e', 'ribflv_e', 'pydx5p_e', '5fthf_e', 'nac_e', 
                  'coa_e', 'thmpp_e', 'sprm_e', 'dcyt_e', 'uri_e', 'thymd_e', 'adn_e', 
                  'dad_2_e', 'gsn_e', 'dgsn_e', 'na1_e', 'k_e', 'mg2_e', 'ca2_e', 'pi_e', 
                  'arg__L_e', 'asp__L_e', 'cys__L_e', 'glu__L_e', 'gly_e', 'ile__L_e', 'ala__L_e', 'asn__L_e', 'leu__L_e',
                   'gln__L_e', 'his__L_e', 'lys__L_e', 'pro__L_e', 'phe__L_e', 'thr__L_e', 
                   'trp__L_e', 'tyr__L_e', 'val__L_e', 'ser__L_e', 'met__L_e', 'sm_e', 'pc_e'] # 49 simulated extracellular metabolites

extra_names.sort()

# metabolites_names = intra_names + extra_names

CMEODE.sort()

metabolites_names = CMEODE

print(f"Total {len(metabolites_names)} species will be plotted: \n", metabolites_names)


extra_concs_df = pd.read_excel('/data/enguang/CMEODE/input_data/initial_concentrations.xlsx', sheet_name='Simulation Medium')

In [None]:
def get_trace(met_ID, w, extra_concs_df):

    # if met_ID.endswith('_c'):
    #     if met_ID == 'M_mg2_c':
    #         bound_Mg_count, extracted_Mg_count, free_Mg_count = metabolites.cal_bound_Mg(w)
    #         trace = free_Mg_count
    #     else:
    #         trace = w.get_specie_trace(met_ID)

    if met_ID.endswith('_e'):
        conc = extra_concs_df[extra_concs_df['Met ID'] == met_ID]['Conc (mM)'].values
        if conc.size > 0:  # Ensure conc is not empty
            conc_value = conc[0]  # Extract the scalar value
        else:
            conc_value = 0  # Default value if not found
        trace = conc_value * np.ones((len(w.t), w.N_reps))

    else:
        if met_ID == 'M_mg2_c':
            bound_Mg_count, extracted_Mg_count, free_Mg_count = metabolites.cal_bound_Mg(w, ribo_init=500)
            count = free_Mg_count
            met_ID = met_ID + ' (Corrected)'
        else:
            count = w.get_specie_trace(met_ID)
        
        trace = count*w.conc_factors*1e3 # count to mM
        

    return met_ID, trace

In [None]:
nrows, ncols = 6, 3  # 3 rows × 3 columns per page = 9 plots per page

plots_per_page = nrows * ncols

time_interval = 20 # minute
figsize = (8.27, 11.69) # A4 paper

pdf_filename = work_dir+'concs_time_traces_4DWCM.pdf'


ylabel = 'mM'
xlabel = 'Time / Minute'

with PdfPages(pdf_filename) as pdf:
    for i in range(0, len(metabolites_names), plots_per_page):
        fig, axes = plt.subplots(nrows=nrows, ncols=ncols, figsize=(8.27, 11.69))
        axes = axes.flatten()  # Flatten to iterate easily

        for j in range(plots_per_page):
            idx = i + j
            ax = axes[j]


            if idx >= len(metabolites_names):
                ax.axis('off')  # Hide unused subplot
                continue
            else:
                met_ID = metabolites_names[idx]
                met_ID, trace = get_trace(met_ID, w, extra_concs_df)

            ax.plot(w.t/60, np.mean(trace, axis=1))

            ax.set_title(met_ID.replace('_', '\_'))
            
            ax.set_ylabel(ylabel)

            ax.set_xticks(np.arange(0, w.cell_cycle/60, time_interval))
            # Only fill if there is variation
            low = np.percentile(trace, 0, axis=1)
            high = np.percentile(trace, 100, axis=1)
            
            if not np.allclose(low, high):
                ax.fill_between(w.t/60, low, high, color='green', alpha=0.25, rasterized=True)

            # ax.fill_between(w.t/60, np.percentile(trace,0, axis=1), np.percentile(trace,100, axis=1),
                            # color='green', alpha=0.25)
            
            if j >= plots_per_page - ncols: # Only show y axis label at the bottom of each page
                ax.set_xlabel(xlabel)

        plt.tight_layout()
        pdf.savefig(fig, dpi=300)
        plt.close(fig)

print(f"Saved {len(metabolites_names)} time-dependent traces into '{pdf_filename}' with {nrows}×{ncols} plots per page.")


In [None]:
nrows, ncols = 6, 3  # 3 rows × 3 columns per page = 9 plots per page

plots_per_page = nrows * ncols

time_interval = 20 # minute
figsize = (8.27, 11.69) # A4 paper

pdf_filename = work_dir+'fluxes_time_traces_4DWCM.pdf'


ylabel = 'mM/s'
xlabel = 'Time / Minute'

rxns = w.rxns

rxns.sort()
print(f"Total {len(rxns)} will be plotted: \n", rxns)


with PdfPages(pdf_filename) as pdf:
    for i in range(0, len(rxns), plots_per_page):
        fig, axes = plt.subplots(nrows=nrows, ncols=ncols, figsize=(8.27, 11.69))
        axes = axes.flatten()  # Flatten to iterate easily

        for j in range(plots_per_page):
            idx = i + j
            ax = axes[j]


            if idx >= len(rxns):
                ax.axis('off')  # Hide unused subplot
                continue
            else:
                rxn = rxns[idx]
                trace = w.get_rxn_trace(rxn)

            ax.plot(w.t/60, np.mean(trace, axis=1))

            ax.set_title(rxn.replace('_', '\_'))
    
            ax.set_ylabel(ylabel)
            ax.yaxis.set_major_formatter(ticker.FuncFormatter(lambda x, _: f'{x:.1E}'))

            ax.set_xticks(np.arange(0, w.cell_cycle/60, time_interval))

            # Only fill if there is variation
            low = np.percentile(trace, 0, axis=1)
            high = np.percentile(trace, 100, axis=1)
            
            if not np.allclose(low, high):
                ax.fill_between(w.t/60, low, high, color='green', alpha=0.25, rasterized=True)
            # ax.fill_between(w.t/60, np.percentile(trace,0, axis=1), np.percentile(trace,100, axis=1),
            #                 color='green', alpha=0.25)
            
            if j >= plots_per_page - ncols: # Only show y axis label at the bottom of each page
                ax.set_xlabel(xlabel)

        plt.tight_layout()
        pdf.savefig(fig, dpi=300)
        plt.close(fig)

print(f"Saved {len(rxns)} time-dependent fluxes into '{pdf_filename}' with {nrows}×{ncols} plots per page.")


### Add Average Concentration to Excel Sheet

In [None]:
excel_path = '/data/enguang/4DWCM/April2025/SI_files/Medium.xlsx'

intra_concs_df = pd.read_excel(excel_path, sheet_name='Intracellular Metabolites')

start_time = 1*60 # 1 minute

rep_avg_concs = []

for i_row, row in intra_concs_df.iterrows():

    met_ID = 'M_' + row['Met ID']

    met_ID, trace = get_trace(met_ID, w, extra_concs_df)
    
    time_avg_concs = []

    for i_rep in range(w.N_reps):
        conc_trace = trace[:,i_rep]

        time_avg_concs.append(np.mean(conc_trace[start_time:int(surface_doubling_times[i_rep])]))

    rep_avg_conc = np.mean(time_avg_concs)

    rep_avg_concs.append(rep_avg_conc)

intra_concs_df['Avg Conc (mM)'] = rep_avg_concs

intra_concs_df.to_excel('/data/enguang/4DWCM/April2025/SI_files/Avg_concs.xlsx', index=False)

### Output Statistics in GIP

In [None]:
entire_ptn_locusNums = TypetoLocusNums['protein']

init_conc_path = '/data/enguang/4DWCM/mc4d-main/input_data/initial_concentrations.xlsx'

In [None]:
from math import floor
from math import log10
def round_sig(x, sig=2):
    negative = False
    if x < 0:
        negative = True
    x = abs(x)
    if negative:
        return -1*round(x, sig-int(floor(log10(abs(x))))-1)
    elif x==0.0:
        return 0.0
    else:
        return round(x, sig-int(floor(log10(abs(x))))-1)
    

In [None]:
def get_ptns_made(w, locusNums, surface_doubling_times):

    avg_ptns_made = []

    for locusNum in locusNums:
        specie = 'PM_' + locusNum

        count_ptn_made = w.get_specie_trace(specie)

        count_ptn_SA = math.get_doubling_moments_value(surface_doubling_times, count_ptn_made)

        avg_ptns_made.append(int(np.mean(count_ptn_SA)))

    return avg_ptns_made

def get_ptns_inits(locusNums, init_conc_path):

    ptn_inits = []

    proteomics = pd.read_excel(init_conc_path, sheet_name='Comparative Proteomics', skiprows=[1])

    locusTags = ['JCVISYN3A_' + _ for _ in locusNums]

    ptn_inits = proteomics.set_index('Locus Tag').loc[locusTags, 'Sim. Initial Ptn Cnt'].values

    # for index, row in proteomics.iterrows():
    #     locusTag = row['Locus Tag']
    #     locusNum = locusTag.split('_')[1]

    #     init_count = row['Sim. Initial Ptn Cnt']
    #     locusNumtoPtnInitCount[locusNum] = init_count

    # print(f"{len(locusNumtoPtnInitCount)} Proteins in proteomics Excel Sheet")

    return ptn_inits

def get_ptns_geneNames(locusNums, init_conc_path):
        
    proteomics = pd.read_excel(init_conc_path, sheet_name='Comparative Proteomics', skiprows=[1])

    locusTags = ['JCVISYN3A_' + _ for _ in locusNums]

    gene_names = proteomics.set_index('Locus Tag').loc[locusTags, 'Gene Name'].values

    return gene_names

def get_ptns_geneProducts(locusNums, init_conc_path):

    proteomics = pd.read_excel(init_conc_path, sheet_name='Comparative Proteomics', skiprows=[1])

    locusTags = ['JCVISYN3A_' + _ for _ in locusNums]

    gene_products = proteomics.set_index('Locus Tag').loc[locusTags, 'Gene Product'].values

    return gene_products

In [None]:
ptn_init = get_ptns_inits(entire_ptn_locusNums, init_conc_path)
gene_names = get_ptns_geneNames(entire_ptn_locusNums, init_conc_path)
gene_products = get_ptns_geneProducts(entire_ptn_locusNums, init_conc_path)
avg_ptns_made = get_ptns_made(w, entire_ptn_locusNums, surface_doubling_times)

In [None]:
def get_avg_mRNA(w, locusNums, surface_doubling_times):
    
    mrna_counts = []

    for locusNum in locusNums:

        riboTrace = w.get_specie_trace(f'RB_{locusNum}') #2D
        rnaTrace = w.get_specie_trace(f'R_{locusNum}')
        
        # First take time average
        avg_riboTrace = math.get_mean_upto_moments(surface_doubling_times, 
                                                   riboTrace, 
                                                   start=10*60)
        
        avg_rnaTrace = math.get_mean_upto_moments(surface_doubling_times, 
                                                   rnaTrace, 
                                                   start=10*60)

        rnaTotTrace = np.add(avg_riboTrace, avg_rnaTrace)
        
        # then ensemble average
        rna_avg_count = np.average(rnaTotTrace)

        mrna_counts.append(round_sig(rna_avg_count, 3))

    return mrna_counts


def get_mRNA_halflives(w, locusNums, surface_doubling_times):

    half_lives = []
    
    for locusNum in locusNums:

        Degradated_mRNA = 'DM_' + locusNum
    
        degTrace = w.get_specie_trace(Degradated_mRNA)
        
        degRates = math.get_doubling_moments_value(surface_doubling_times, 
                                                   degTrace/w.t[:, np.newaxis])
        
        degRate = np.average(degRates)
        

        riboTrace = w.get_specie_trace(f'RB_{locusNum}') #2D
        rnaTrace = w.get_specie_trace(f'R_{locusNum}')
        
        # First take time average
        avg_riboTrace = math.get_mean_upto_moments(surface_doubling_times, 
                                                   riboTrace, 
                                                   start=10*60)
        
        avg_rnaTrace = math.get_mean_upto_moments(surface_doubling_times, 
                                                   rnaTrace, 
                                                   start=10*60)

        rnaTotTrace = np.add(avg_riboTrace, avg_rnaTrace)
        
        # then ensemble average
        rna_avg_count = np.average(rnaTotTrace)


        half_life = np.log(2)/(degRate/rna_avg_count)/60
        
        half_lives.append(round_sig(half_life, 3))


    return half_lives


def get_translation_per_mRNA(w, locusNums, surface_doubling_times):
    
    trans_per_mRNA = []
    
    zero_mRNA_list = []
    # mRNA_prefix = 'R_'

    produced_mRNA = 'RPM_'
    produced_ptn = 'PM_'

    for locusNum in locusNums:

        trans_reps = []
        for rep, SA_time in zip(healthy_list, surface_doubling_times):
            
            P_R = w.get_specie_trace(produced_mRNA+locusNum)[int(SA_time),rep-1]

            P_P = w.get_specie_trace(produced_ptn+locusNum)[int(SA_time),rep-1]
            
            if P_R != 0:
                trans_reps.append(P_P/P_R)
            else:
                zero_mRNA_list.append(locusNum)
        
        trans_per_mRNA.append(round_sig(np.mean(trans_reps), 3))

    return  np.array(trans_per_mRNA), list(set(zero_mRNA_list))



# def get_translation_per_mRNA(w, locusNums):

#     protPerMrna = []

#     for locusNum in locusNums:
#         protID = 'PM_' + locusNum
#         RPMID = 'RPM_' + locusNum

#         protTrace = w.get_avg_species_traces([protID])
#         RPMTrace = w.get_avg_species_traces([RPMID])
        
#         protPerMrna.append(round_sig(protTrace[0][-1]/RPMTrace[0][-1],3))
        

#     return protPerMrna


In [None]:
avg_mrna_counts = get_avg_mRNA(w, entire_ptn_locusNums, surface_doubling_times)
mrna_halflives  = get_mRNA_halflives(w, entire_ptn_locusNums, surface_doubling_times)

trans_per_mRNA, zero_mRNA_list = get_translation_per_mRNA(w, entire_ptn_locusNums, surface_doubling_times)

In [None]:
def get_gene_reads(w, locusNums, genomeDict, surface_doubling_times ):


    geneReads = {}

    for locusNum in locusNums:

        locusTag = 'JCVISYN3A_' + locusNum
          
        geneReads[locusTag] = {}
        geneReads[locusTag]['rp1'] = []
        geneReads[locusTag]['rp2'] = []
        
#         print(gene)
        locus = locusTag.split('_')[1]
        
        g1 = f'G_{locus}_C1'
        g2 = f'G_{locus}_C2'
        
        rp1 = f'RP_{locus}_C1'
        rp2 = f'RP_{locus}_C2'
        
        rpf1 = f'RP_{locus}_f_C1'
        rpf2 = f'RP_{locus}_f_C2'
        
#         totGeneTrace = np.sum(w.get_avg_species_traces([g1,g2,rp1,rp2,rpf1,rpf2]),axis=0)
#         print(np.min(np.argwhere(totGeneTrace>=1.99)))

        g1Traces = w.get_species_traces([g1])[0].T
        g2Traces = w.get_species_traces([g2])[0].T
        
        rp1Traces = w.get_species_traces([rp1])[0].T
        rp2Traces = w.get_species_traces([rp2])[0].T
        
        for rep in range(len(healthy_list)):
        
            totG = np.array([g1Traces[rep],g2Traces[rep],rp1Traces[rep],rp2Traces[rep]]).sum(axis=0)
            g2t = int(np.argwhere(totG>1.99)[0])
            
            SA_double_time = surface_doubling_times[rep]

            rp1Times = 0
            rp2Times = 0
            
            for ct in range(g2t,int(SA_double_time)):
                
                if rp1Traces[rep][ct]>rp1Traces[rep][ct-1]:
                    
                    rp1Times+=1
                    
                if rp2Traces[rep][ct]>rp2Traces[rep][ct-1]:
                    
                    rp2Times+=1
                    
            geneReads[locusTag]['rp1'].append(rp1Times)
            geneReads[locusTag]['rp2'].append(rp2Times)
                    
    return geneReads

def get_trsc_event_diff(geneReads):

    gene_trsc_diffs = []

    for gene, gd in geneReads.items():
        
        totDiff = 0
        totEvents = 0
        
        for i in range(len(healthy_list)):
            
            diff = (gd['rp1'][i] - gd['rp2'][i])
            events = (gd['rp1'][i] + gd['rp2'][i])
            
            totDiff += diff
            totEvents += events
            
        fracDiff = totDiff/totEvents
        totDiff = totDiff/len(healthy_list)
        
        if totDiff>2 or totDiff<-2:
            
            print(gene, totDiff)
        
        gene_trsc_diffs.append(totDiff)

    return gene_trsc_diffs


In [None]:
geneReads = get_gene_reads(w, entire_ptn_locusNums, genomeDict, surface_doubling_times)
gene_trsc_diffs = get_trsc_event_diff(geneReads)

In [None]:
from collections import defaultdict, OrderedDict

def RNA_diff_coeff(rnasequence):
    """
    Inputs:
    rnasequence - (string) nucleotide sequence of an RNA, capitalized (AUCG)
    
    Returns:
    mrna_diff_coeff - estimated diffusion coeficient of RNA with input sequence rnasequence (micron^2/s)
    
    Called by:
    
    Description:
    Calculates estimated diffusion coefficient of input rnasequence by estimating its hydrodynamic radius
    """
    
    # Count how many times each base is used
    baseCount = defaultdict(int)
    for base in set(rnasequence):
        baseCount[base] = rnasequence.count(base)
        
    n_tot = sum(list(baseCount.values()))
    
    N_A = baseCount["A"]

    N_U = baseCount["U"]

    N_C = baseCount["C"]

    N_G = baseCount["G"]
    
    molec_mass = 337 #309 #g/mol/nucleotide
    density = 1.75*1000000 #g/m^3
    N_A = 6.023e23 #mol^-1
    
    R_H = ((3*molec_mass*n_tot)/(4*np.pi*N_A*density))**(1/3)

    visc = 1.17 #0.15 #7.1 #17.5 #0.05 #0.001 #Pa*s
    kB = 1.380e-23
    Temp = 310
    
    mrna_diff_coeff = kB*Temp/(6*np.pi*visc*R_H)
#     print(mrna_diff_coeff)
    
    return mrna_diff_coeff

def get_mRNA_diff(locusNums, genomeDict):
    
    mRNA_diff = []
    
    for locusNum in locusNums:
        rnasequence = genomeDict['JCVISYN3A_'+locusNum]['RNAsequence']
        diff = RNA_diff_coeff(rnasequence)
        mRNA_diff.append(round_sig(diff, 3))

    return mRNA_diff

In [None]:
mRNA_diff = get_mRNA_diff(entire_ptn_locusNums, genomeDict)

In [None]:
df = pd.DataFrame()
df['Locus Tag'] = ['JCVISYN3A_' + _ for _ in entire_ptn_locusNums]
df['Gene Name'] = gene_names
df['Gene Product'] = gene_products
df['Sim. Initial Ptn Cnt'] = ptn_init
df['Avg Generated Ptn Cnt'] = avg_ptns_made
df['Avg mRNA Cnt'] = avg_mrna_counts
df['Avg mRNA Halflive (Min)'] = mrna_halflives
df['Avg Translation per mRNA'] = trans_per_mRNA
df['Trsc Event Diff'] = gene_trsc_diffs
df['mRNA Diffusion Coeff. (m^2/s)'] = mRNA_diff

In [None]:
df

In [None]:
work_dir = f'/data/enguang/4DWCM/{simulationdate}/SI_GIP/'
if not os.path.exists(work_dir):
        os.makedirs(work_dir)

excel_path = work_dir + '4DWCM_GIP_statistics.xlsx'
df.to_excel(excel_path, index=False)

#### Extracellular 
Not plotted in the PDFs since assumed constant during the simulation
Concentration of medium shown in the Medium Excel sheet

In [None]:
extra_names = ['glc__D_e', 'lac__L_e', 'pyr_e', 'ac_e', 'glyc_e', 'fa_e', 
                  'chsterol_e', 'tag_e', 'ribflv_e', 'pydx5p_e', '5fthf_e', 'nac_e', 
                  'coa_e', 'thmpp_e', 'sprm_e', 'dcyt_e', 'uri_e', 'thymd_e', 'adn_e', 
                  'dad_2_e', 'gsn_e', 'dgsn_e', 'na1_e', 'k_e', 'mg2_e', 'ca2_e', 'pi_e', 
                  'arg__L_e', 'asp__L_e', 'cys__L_e', 'glu__L_e', 'gly_e', 'ile__L_e', 'ala__L_e', 'asn__L_e', 'leu__L_e',
                   'gln__L_e', 'his__L_e', 'lys__L_e', 'pro__L_e', 'phe__L_e', 'thr__L_e', 
                   'trp__L_e', 'tyr__L_e', 'val__L_e', 'ser__L_e', 'met__L_e', 'sm_e', 'pc_e']       