In [None]:
# User Inputs

simulation_folder_names =  '../simulations/new_folder_' # Name scheme of the directories used to save simulations

configs = 1 # number of replicate cells

simtime = 1200 # total time simulated, equal to simulation_time in the program. Must be integer.

## WARNING: Some pieces of code can takes several minutes to evaluate, the data files are large and processing 
##          genome-wide data takes some time.

In [None]:
### General Naming Scheme ###

# Proteins: P_gene number Examples: for gene JCVISYN3A_0002, the protein in P_2 for gene JCVISYN3A_0131, the protein is 0131
        
# mRNA: R_gene number
    
# RNAP bound to gene: RP_gene number
    
# mRNA bound to degradosome: D_gene number
    
# mRNA bound to ribosomes:
    
#     to single ribosome: RB_gene number
        
#     to start of a polysome: PS_gene number
        
#     to middle of a polysome: PM_gene number
        
#     to end of a polysome: PE_gene number
        
#     If mRNA is too short for a polysome:
        
#         RBS_, RBM_ and RBE_ instead of PS_, PM_, and PE_
        
# For special particle names, see files in model data:
    
#     protein_metabolites_frac.csv
    
#     ribo_protein_metabolites.csv
    
#     trna_metabolites_synthase.csv

In [None]:
from jLM.RegionBuilder import RegionBuilder
from jLM.RDME import Sim as RDMESim
from jLM.RDME import File as RDMEFile
import jLM

from pyLM.units import *

import lm

from lm import MpdRdmeSolver
from lm import IntMpdRdmeSolver

import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import numpy  as np

import os

import scipy.ndimage as spnd
import ipywidgets as ipw
import h5py
import itertools
import random
import copy

# import ipyvolume
# from sidecar import Sidecar
# import numpy as np
# from ipywebrtc import WidgetStream, VideoRecorder

import math
import scipy as sp
import scipy.spatial

# import seaborn as sns

import csv
import pandas as pd
from Bio import SeqIO
from Bio.Seq import Seq
import importlib
from collections import defaultdict, OrderedDict

import time

In [None]:
%matplotlib inline
plt.rcParams.update({'font.size': 18})

In [None]:
# traj = RDMEFile('MinCell_jLM_RDME_CME_ODE_2DNA.lm',replicate=1)

In [None]:
workDir = simulation_folder_names

In [None]:

Max_Poly_Count = 10
ht = np.arange(0,simtime+1,1)
ft = np.arange(1,int(simtime/60),1)

In [None]:
# Loading multiple trajectories can take several minutes

trajs = []

for rep in range(configs):

    filename = workDir + str(rep+1) + '/MinCell_jLM_RDME_CME_ODE_polysomes.lm'
    traj = RDMEFile(filename,replicate=1)
    trajs.append(traj)
    

In [None]:

def avgHybridCnt(specID):
    
    avgParts = np.zeros(simtime+1)
    
    for i in range(configs):
        PCDF = pd.read_csv(workDir + str(i+1) + '/particle_counts.csv')
        partTrace = PCDF.loc[ PCDF.Time == specID ].values[0]
    #     print(partTrace)
        partTrace = np.delete(partTrace,0)
    #     print(partTrace)
        partTrace.astype('float64')
        
        avgParts = avgParts + partTrace
    
    avgParts = avgParts/configs
    return avgParts

def repHybridCnt(specID,rep):
    
    avgParts = np.zeros(simtime+1)
    
    for i in range(configs):
        if i==rep:
            PCDF = pd.read_csv(workDir + str(i+1) + '/particle_counts.csv')
            partTrace = PCDF.loc[ PCDF.Time == specID ].values[0]
        #     print(partTrace)
            partTrace = np.delete(partTrace,0)
        #     print(partTrace)
            partTrace.astype('float64')

            avgParts = avgParts + partTrace
    
#     avgParts = avgParts/configs
    return avgParts

def avgHybridConc(specID):
    
    avgConc = np.zeros(simtime+1)
    
    for i in range(configs):
        concTrace = []
        PCDF = pd.read_csv(workDir + str(i+1) + '/particle_counts.csv')
#         print(workDir + str(i+1) + '/particle_counts.csv')
        partTrace = PCDF.loc[ PCDF.Time == specID ].values[0]
    #     print(partTrace)
        partTrace = np.delete(partTrace,0)
    #     print(partTrace)
        partTrace.astype('float64')

        SATrace = PCDF.loc[ PCDF.Time == 'CellSA' ].values[0]
    #     print(partTrace)
        SATrace = np.delete(SATrace,0)
    #     print(partTrace)
        SATrace.astype('float64')
    
        for i in range(len(partTrace)):

            cellRadius = ((SATrace[i]/4/np.pi)**(1/2))*1e-9
            cellVolume = ((4/3)*np.pi*(cellRadius)**3)*(1000)

            conc = (partTrace[i]*1000.0)/((6.02e23)*cellVolume)

            concTrace.append(conc)
        
        avgConc += concTrace
#         print(avgConc)
    avgConc = avgConc/configs
    return avgConc

def repHybridConc(specID,rep):
    
    avgConc = np.zeros(simtime+1)
    
    for i in range(configs):
        if i==rep:
            concTrace = []
            PCDF = pd.read_csv(workDir + str(i+1) + '/particle_counts.csv')
            partTrace = PCDF.loc[ PCDF.Time == specID ].values[0]
        #     print(partTrace)
            partTrace = np.delete(partTrace,0)
        #     print(partTrace)
            partTrace.astype('float64')

            SATrace = PCDF.loc[ PCDF.Time == 'CellSA' ].values[0]
        #     print(partTrace)
            SATrace = np.delete(SATrace,0)
        #     print(partTrace)
            SATrace.astype('float64')

            for i in range(len(partTrace)):

                cellRadius = ((SATrace[i]/4/np.pi)**(1/2))*1e-9
                cellVolume = ((4/3)*np.pi*(cellRadius)**3)*(1000)

                conc = (partTrace[i]*1000.0)/((6.02e23)*cellVolume)

                concTrace.append(conc)

            avgConc = avgConc + concTrace
#     avgConc = avgConc/configs
    return avgConc

In [None]:
def pltAvgRna(RnaName,RiboName,DegName):
    
    RNAcount = np.zeros(simtime+1)
    
    for i in range(configs):
        
        t, Rcount = trajs[i].getNumberTrajectory(species = trajs[i].species(RnaName))
        
        RNAcount = RNAcount + Rcount
    
        t, RRcount = trajs[i].getNumberTrajectory(species = trajs[i].species(RiboName))
        
        RNAcount = RNAcount + RRcount
    
        t, RDEGcount = trajs[i].getNumberTrajectory(species = trajs[i].species(DegName))
    
        RNAcount = RNAcount + RDEGcount
    
    RNAcount = RNAcount/configs
    
    return t, RNAcount

def pltAvgSpec(specName):
    
    Count = np.zeros(simtime+1)
    
    for i in range(configs):
        
        t, Rcount = trajs[i].getNumberTrajectory(species = trajs[i].species(specName))
        
        Count = Count + Rcount
    
    Count = Count/configs
    
    return t, Count    


def pltAvgPtn(specName):
    
    Count = np.zeros(simtime+1)
    
    for i in range(configs):
        
        t, Rcount = trajs[i].getNumberTrajectory(species = trajs[i].species(specName))
        
        Count = Count + Rcount
    
    Count = Count/configs
    
    return t, Count 
    

In [None]:
def pltAvgRna(locusTag):
    
    locusNum = locusTag.split('_')[1].lstrip('0')
    
    rnasequence, aasequence = getSequences(locusTag)
    
    aaCount = defaultdict(int)
    for aa in set(aasequence):
        aaCount[aa] = aasequence.count(aa)
    ptn_len = sum(list(aaCount.values()))

    if ptn_len <= 125:
        max_poly_size = 1
    elif ptn_len > 125:
        max_poly_size = min(Max_Poly_Count,int(ptn_len/42))
    
    RNAcount = np.zeros(simtime+1)
    
    for i in range(configs):
        
        RnaName = 'R_' + locusNum
        
        t, Rcount = trajs[i].getNumberTrajectory(species = trajs[i].species(RnaName))
        
        RNAcount = RNAcount + Rcount
        
        DegName = 'D_' + locusNum
        
        t, RDEGcount = trajs[i].getNumberTrajectory(species = trajs[i].species(DegName))
    
        RNAcount = RNAcount + RDEGcount
        
        if max_poly_size == 1:
            
            RiboID = 'RB_' + locusNum
            
            t, RBcount = trajs[i].getNumberTrajectory(species = trajs[i].species(RiboID))
            
            RNAcount = RNAcount + RBcount
            
            RiboID = 'RBS_' + locusNum
            
            t, RBcount = trajs[i].getNumberTrajectory(species = trajs[i].species(RiboID))
            
            RNAcount = RNAcount + RBcount
            
            RiboID = 'RBM_' + locusNum
            
            t, RBcount = trajs[i].getNumberTrajectory(species = trajs[i].species(RiboID))
            
            RNAcount = RNAcount + RBcount
            
            RiboID = 'RBE_' + locusNum
            
            t, RBcount = trajs[i].getNumberTrajectory(species = trajs[i].species(RiboID))
            
            RNAcount = RNAcount + RBcount
            
        else:
            
            RiboID = 'RB_' + locusNum
            
            t, RBcount = trajs[i].getNumberTrajectory(species = trajs[i].species(RiboID))
            
            RNAcount = RNAcount + RBcount
            
            RiboID = 'PS_' + locusNum
            
            t, RBcount = trajs[i].getNumberTrajectory(species = trajs[i].species(RiboID))
            
            RNAcount = RNAcount + RBcount
            
            RiboID = 'PS_' + locusNum + '_d'
            
            t, RBcount = trajs[i].getNumberTrajectory(species = trajs[i].species(RiboID))
            
            RNAcount = RNAcount + RBcount
            
            RiboID = 'PM_' + locusNum
            
            t, RBcount = trajs[i].getNumberTrajectory(species = trajs[i].species(RiboID))
            
            RNAcount = RNAcount + RBcount
            
            RiboID = 'PM_' + locusNum + '_s'
            
            t, RBcount = trajs[i].getNumberTrajectory(species = trajs[i].species(RiboID))
            
            RNAcount = RNAcount + RBcount
            
            RiboID = 'PM_' + locusNum + '_d'
            
            t, RBcount = trajs[i].getNumberTrajectory(species = trajs[i].species(RiboID))
            
            RNAcount = RNAcount + RBcount
            
            RiboID = 'PE_' + locusNum
            
            t, RBcount = trajs[i].getNumberTrajectory(species = trajs[i].species(RiboID))
            
            RNAcount = RNAcount + RBcount
            
            RiboID = 'PE_' + locusNum + '_s'
            
            t, RBcount = trajs[i].getNumberTrajectory(species = trajs[i].species(RiboID))
            
            RNAcount = RNAcount + RBcount
        
    
    RNAcount = RNAcount/configs
    
    return t, RNAcount


def pltAvgRibos(locusTag):
    
    locusNum = locusTag.split('_')[1].lstrip('0')
    
    rnasequence, aasequence = getSequences(locusTag)
    
    aaCount = defaultdict(int)
    for aa in set(aasequence):
        aaCount[aa] = aasequence.count(aa)
    ptn_len = sum(list(aaCount.values()))

    if ptn_len <= 125:
        max_poly_size = 1
    elif ptn_len > 125:
        max_poly_size = min(Max_Poly_Count,int(ptn_len/42))
    
    RIBOcount = np.zeros(simtime+1)
    
    for i in range(configs):
        
        if max_poly_size == 1:
            
            RiboID = 'RB_' + locusNum
            
            t, RBcount = trajs[i].getNumberTrajectory(species = trajs[i].species(RiboID))
            
            RIBOcount = RIBOcount + RBcount
            
            RiboID = 'RBS_' + locusNum
            
            t, RBcount = trajs[i].getNumberTrajectory(species = trajs[i].species(RiboID))
            
            RIBOcount = RIBOcount + RBcount #+ RBcount
            
            RiboID = 'RBM_' + locusNum
            
            t, RBcount = trajs[i].getNumberTrajectory(species = trajs[i].species(RiboID))
            
            RIBOcount = RIBOcount + RBcount 
            
            RiboID = 'RBE_' + locusNum
            
            t, RBcount = trajs[i].getNumberTrajectory(species = trajs[i].species(RiboID))
            
            RIBOcount = RIBOcount + RBcount
            
        else:
            
            RiboID = 'RB_' + locusNum
            
            t, RBcount = trajs[i].getNumberTrajectory(species = trajs[i].species(RiboID))
            
            RIBOcount = RIBOcount + RBcount
            
            RiboID = 'PS_' + locusNum
            
            t, RBcount = trajs[i].getNumberTrajectory(species = trajs[i].species(RiboID))
            
            RIBOcount = RIBOcount + RBcount #+ RBcount
            
            RiboID = 'PS_' + locusNum + '_d'
            
            t, RBcount = trajs[i].getNumberTrajectory(species = trajs[i].species(RiboID))
            
            RIBOcount = RIBOcount + RBcount
            
            RiboID = 'PM_' + locusNum
            
            t, RBcount = trajs[i].getNumberTrajectory(species = trajs[i].species(RiboID))
            
            RIBOcount = RIBOcount + RBcount + RBcount
            
            RiboID = 'PM_' + locusNum + '_s'
            
            t, RBcount = trajs[i].getNumberTrajectory(species = trajs[i].species(RiboID))
            
            RIBOcount = RIBOcount + RBcount #+ RBcount
            
            RiboID = 'PM_' + locusNum + '_d'
            
            t, RBcount = trajs[i].getNumberTrajectory(species = trajs[i].species(RiboID))
            
            RIBOcount = RIBOcount + RBcount
            
            RiboID = 'PE_' + locusNum
            
            t, RBcount = trajs[i].getNumberTrajectory(species = trajs[i].species(RiboID))
            
            RIBOcount = RIBOcount + RBcount + RBcount
            
            RiboID = 'PE_' + locusNum + '_s'
            
            t, RBcount = trajs[i].getNumberTrajectory(species = trajs[i].species(RiboID))
            
            RIBOcount = RIBOcount + RBcount 

    RIBOcount = RIBOcount / configs
    
    return RIBOcount

def getRNAsequences(jcvi3AID):
    # returns genomic and protein sequences
    try:
        rnasequence = genomeLocDict[jcvi3AID].extract(genome3A.seq).transcribe()
        
    except:
        rnasequence = 0
    
    return rnasequence
        
# Create list of proteins with no proteomics data
# ptnNoQuant = set()

def getSequences(jcvi3AID):
    # returns genomic and protein sequences
    try:
        rnasequence = genomeLocDict[jcvi3AID].extract(genome3A.seq).transcribe()
        
        # Using translation table 4 from NCBI: "Mycoplasma Code"
        # https://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi#SG4
        aasequence  = genomeLocDict[jcvi3AID].extract(genome3A.seq).transcribe().translate(table=4)
        
    except:
        aasequence  = 0
        rnasequence = 0
    
    return rnasequence, aasequence

In [None]:
def calcAvgGeneReads(locusTag):
    
    locusNum = locusTag.split('_')[1].lstrip('0')
    
    rnasequence, aasequence = getSequences(locusTag)
    
    t, RNAcount = pltAvgRna(locusTag)
    
    numberMade = 0   
    
    for i in range(len(RNAcount)-1):
        
        if RNAcount[i] < RNAcount[i+1]:
            
            numberMade = numberMade + 1
            
    numberMade = numberMade / configs
    
    return numberMade


def calcRepGeneReads(locusTag,rep):
    
    locusNum = locusTag.split('_')[1].lstrip('0')
    
    rnasequence, aasequence = getSequences(locusTag)
    
    aaCount = defaultdict(int)
    for aa in set(aasequence):
        aaCount[aa] = aasequence.count(aa)
    ptn_len = sum(list(aaCount.values()))

    if ptn_len <= 134:
        max_poly_size = 1
    elif ptn_len > 134:
        max_poly_size = min(Max_Poly_Count,int(ptn_len/134))
    
    RNAcount = np.zeros(simtime+1)
    i=rep-1
        
    RnaName = 'R_' + locusNum

    t, Rcount = trajs[i].getNumberTrajectory(species = trajs[i].species(RnaName))

    RNAcount = RNAcount + Rcount

    DegName = 'D_' + locusNum

    t, RDEGcount = trajs[i].getNumberTrajectory(species = trajs[i].species(DegName))

    RNAcount = RNAcount + RDEGcount

    for mp in range(Max_Poly_Count):

        for pn in range(mp+1): #range(max_poly_size):

            if (max_poly_size == 1) or (mp == 0):

                Ribo_ID = 'RB' + str(mp+1) + '_' + locusNum + '_' + str(pn+1)

                t, RRcount = trajs[i].getNumberTrajectory(species = trajs[i].species(Ribo_ID))

                RNAcount = RNAcount + RRcount

                break

            elif max_poly_size > 1:

                if pn == 0:

                    Ribo_ID = 'RB' + str(mp+1) + '_' + locusNum + '_' + str(pn+1)

                    t, RRcount = trajs[i].getNumberTrajectory(species = trajs[i].species(Ribo_ID))

                    RNAcount = RNAcount + RRcount

                elif (pn+1 == max_poly_size) or (pn == mp):

                    Ribo_ID = 'RB' + str(mp+1) + '_' + locusNum + '_' + str(pn+1)

                    t, RRcount = trajs[i].getNumberTrajectory(species = trajs[i].species(Ribo_ID))

                    RNAcount = RNAcount + RRcount

                    break

                else:

                    Ribo_ID = 'RB' + str(mp+1) + '_' + locusNum + '_' + str(pn+1)

                    t, RRcount = trajs[i].getNumberTrajectory(species = trajs[i].species(Ribo_ID))

                    RNAcount = RNAcount + RRcount
    
    numberMade = 0   
    
    for i in range(len(RNAcount)-1):
        
        if RNAcount[i] < RNAcount[i+1]:
            
            numberMade = numberMade + 1
    
    return numberMade



In [None]:
### Load all necessary files
# The reconstruction matches reactions with gene-protein-reactions (GPR) that use MMSYN1* IDs.
reconstPD = pd.read_excel("../model_data/reconstruction.xlsx", sheet_name='Reactions')

# The annotation matches MMSYN1* IDs with JCVISYN3* IDs (or "locus tags").
annotatPD = pd.read_excel("../model_data/FBA/Syn3A_annotation_compilation.xlsx",
                         sheet_name="Syn3A_annotation_compilation_condensed")

annotatPD_updated = pd.read_excel("../model_data/proteomics_annotations.xlsx",sheet_name="Sheet1")

# The genome data matches "locus tags" with AOE* protein IDs.
# It provides both the gene sequence, needed for transcription reactions in the ODE model,
# and the protein sequence, needed for translation reactions in the model.
# This is the NCBI Gene Bank-formated file (https://www.ncbi.nlm.nih.gov/nuccore/CP014992.1).

genomeFile2 = '../model_data/syn2.gb'
genome2 = next(SeqIO.parse(genomeFile2, "gb"))

# This is the NCBI Gene Bank-formated file (https://www.ncbi.nlm.nih.gov/nuccore/CP016816.2).
genomeFile3A = '../model_data/syn3A.gb'
genome3A = next(SeqIO.parse(genomeFile3A, "gb"))

# The proteomics matches AOE IDs with quantitative proteomics data.
proteomPD = pd.read_excel("../model_data/proteomics.xlsx", sheet_name="Proteomics", skiprows=[0] )

genome_syn3A = list(SeqIO.parse(genomeFile3A, "genbank"))
dna3A = genome_syn3A[0]

In [None]:
AOEtoJ2 = dict()
J2toAOE = dict()
genomeLocDict = dict()
genomePtnLocDict = dict()
genomeRnaLocDict = dict()
Locus3A = []

for f in genome2.features:
    if f.type == "CDS":
        JCVSYN2_tag = f.qualifiers['locus_tag'][0]
        #print(JCVSYN2_tag)
        # Not all entries have an AOE protein_id
        if('protein_id' in f.qualifiers.keys()):
            AOE_locus = f.qualifiers['protein_id'][0]
            AOEtoJ2[AOE_locus] = JCVSYN2_tag
            J2toAOE[JCVSYN2_tag] = AOE_locus
#             genomeLocDict[JCVSYN2_tag] = f.location
        else:
            print("Locus ", JCVSYN2_tag, " has no AOE id!")
    if f.type == "rRNA":
        JCVSYN2_tag = f.qualifiers['locus_tag'][0]
#         genomeLocDict[JCVSYN2_tag] = f.location
    if f.type == "tRNA":
        JCVSYN2_tag = f.qualifiers['locus_tag'][0]
#         genomeLocDict[JCVSYN2_tag] = f.location
        
for f in genome3A.features:
    if f.type == "CDS":
        JCVSYN3A_tag = f.qualifiers['locus_tag'][0]
        Locus3A.append(JCVSYN3A_tag)
        #print(JCVSYN2_tag)
        # Not all entries have an AOE protein_id
        if('protein_id' in f.qualifiers.keys()):
#             AOE_locus = f.qualifiers['protein_id'][0]
#             AOEtoJ2[AOE_locus] = JCVSYN2_tag
#             J2toAOE[JCVSYN2_tag] = AOE_locus
            genomePtnLocDict[JCVSYN3A_tag] = f.location
            genomeLocDict[JCVSYN3A_tag] = f.location
        else:
            print("Locus ", JCVSYN3A_tag, " is pseudo.")
    if f.type == "rRNA":
        JCVSYN3A_tag = f.qualifiers['locus_tag'][0]
        Locus3A.append(JCVSYN3A_tag)
        genomeRnaLocDict[JCVSYN3A_tag] = f.location
        genomeLocDict[JCVSYN3A_tag] = f.location
    if f.type == "tRNA":
        JCVSYN3A_tag = f.qualifiers['locus_tag'][0]
        Locus3A.append(JCVSYN3A_tag)
        genomeRnaLocDict[JCVSYN3A_tag] = f.location
        genomeLocDict[JCVSYN3A_tag] = f.location

In [None]:
def repFlux(rxnName,rep):
    
    numMin = int(simtime/60)
    
    flux_list = []
    
    for i in range(1,numMin):
        
        fluxFileMin = workDir + str(rep) + '/fluxes/fluxDF_' + str(i) + 'min.csv'

        fluxDFMin = pd.read_csv(fluxFileMin,header=None)

        for index,row in fluxDFMin.iterrows():
            rxnID = row[1]
            if rxnID == rxnName:
                flux = row[2]
                flux_list.append(flux)
                
    return flux_list

In [None]:
# t, avgCnt = pltAvgRna('RNA_0910','Ribo_0910','Deg_0910')
# plt.plot(t/60, avgCnt)
# print(ht)
specID = 'M_rRNA_16S_c'
# specID = 'Degradosome'
specID = 'M_pi_c'
# specID = 'M_glntrna_c'
# specID = 'M_glutrnagln_c'
specID = 'ATP_mRNAdeg'
specID = 'ATP_translat'
# specID = 'D_60'
# specID = 'M_glutrnagln_c'
# specID = 'CellSA'
plt.plot(ht/60,avgHybridCnt(specID))
# print(np.average(avgHybridCnt(specID)[600:]))
plt.xlabel('Time (min)')
# plt.xlim(0,10)
plt.ylabel('Counts')
plt.ylim(0,10000)
# plt.title(specID)

In [None]:
specID = 'M_atp_c'
# specID = 'ATP_trsc'
rep = 1
plt.figure(figsize=(6,4))
plt.rcParams.update({'font.size': 18})
# plt.plot(ht/60,repHybridCnt(specID,rep))
plt.plot(ht/60,repHybridConc(specID,rep))
plt.xlabel('Time (min)')
plt.ylabel('Concentration (mM)')
# plt.title(specID)

In [None]:
specID = 'M_datp_c'
plt.figure(figsize=(6,4))
plt.rcParams.update({'font.size': 18})
plt.plot(ht/60,avgHybridConc(specID))
plt.xlabel('Time (min)')
plt.ylabel('Concentration (mM)')
plt.title(specID)

In [None]:
specIDs = ['M_atp_c','M_adp_c','M_gtp_c','M_ctp_c','M_utp_c']
specIDs_paper = ['ATP','ADP','GTP','CTP','UTP']
rep=0
plt.rcParams.update({'font.size': 8})
plt.rcParams.update({"font.family": 'sans-serif'})

fig = plt.figure(figsize=(3.3,2.5))

ax = plt.gca()

for specID in specIDs:
#     plt.plot(ht/60,repHybridConc(specID,rep))
    ax.plot(ht/60,avgHybridConc(specID))
ax.set_xlabel('Time (min)')
ax.set_ylabel('Concentration (mM)')
ax.set_ylim(-0.1,6.9)
# plt.legend(specIDs,loc=2)
ax.legend(specIDs_paper,ncol=2) #, bbox_to_anchor=(1.05, 1), loc='upper left')

plt.show()
# plt.title(specID)

In [None]:
rxnID = 'PAPA'
rep = 1
plt.figure(figsize=(6,4))
plt.rcParams.update({'font.size': 18})
plt.plot(ft,repFlux(rxnID,rep))
plt.xlabel('Time (min)')
plt.ylabel('Flux (mM/s)')
plt.title(rxnID)

In [None]:

riboPtnMetDF = pd.read_csv("../model_data/ribo_protein_metabolites.csv")
# riboPtnMetDF

In [None]:
plt.figure(figsize=(8,6))
inPlot = 0

total_mRNA = 0

for locusTag in genomePtnLocDict:
#     locusNum = locusTag.split('_')[1].lstrip('0')
#     rnaID = 'R_' + locusNum
#     riboName = 'RB1_' + locusNum + '_1'
#     degName = 'D_' + locusNum
#     t, avgCnt = pltAvgRna(rnaID,riboName,degName)
    t, avgCnt = pltAvgRna(locusTag)
    total_mRNA = total_mRNA + avgCnt[-1]
    plt.plot(t/60, avgCnt)
    inPlot = inPlot + 1

print(inPlot)
print(total_mRNA)

plt.xlabel('Time (min)')
plt.ylabel('Average mRNA Count')
plt.show()

In [None]:

inPlot = 0

total_mRNA = 0

Deg_tot = np.zeros(1201)
RNAP_tot = np.zeros(1201)
Ribo_tot = np.zeros(1201)

Deg_tot = np.zeros(simtime+1)
RNAP_tot = np.zeros(simtime+1)
Ribo_tot = np.zeros(simtime+1)

for locusTag in genomePtnLocDict:
#     rnaID = 'M_RNA_' + locusTag + '_c'
    locusNum = locusTag.split('_')[1].lstrip('0')
    rnapName = 'RP_' + locusNum
#     riboName = 'RB_' + locusNum
    degName = 'D_' + locusNum
    t, rnapcnt = pltAvgSpec(rnapName)
    t, degcnt = pltAvgSpec(degName)
    ribocnt = pltAvgRibos(locusTag)
#     t, ribocnt = pltAvgSpec(riboName)
#     t, avgCnt = pltAvgRna(rnaID,riboName,degName)
#     total_mRNA = total_mRNA + avgCnt[-1]
#     plt.plot(t/60, avgCnt)
#     inPlot = inPlot + 1
    RNAP_tot = RNAP_tot + rnapcnt
    Deg_tot = Deg_tot + degcnt
    Ribo_tot = Ribo_tot + ribocnt

# print(inPlot)
# print(total_mRNA)

In [None]:
fig = plt.figure(figsize=(2.5,6/8*2.5))

plt.rcParams.update({'font.size': 8})

ax = plt.gca()

plt.rcParams.update({'font.size': 8})

ax.plot(t/60,Deg_tot,'k',linewidth=1)

# print(np.average(Deg_tot[600:]))
# print(np.std(Deg_tot[600:]))

# ax.set_xlim(1,21)
# ax.set_ylim(7,22)

ax.set_xlabel('Time (min)')
ax.set_ylabel('Active Degradosomes')

plt.show()

In [None]:
fig = plt.figure(figsize=(2.5,6/8*2.5))

ax = plt.gca()
plt.rcParams.update({'font.size': 8})

t,rnapol = pltAvgSpec('RNApol')

ax.plot(t/60,RNAP_tot,'k',linewidth=1)

ax.set_xlim(1,21)
ax.set_ylim(50,70)
    
ax.set_xlabel('Time (min)')
ax.set_ylabel('Active RNAP')

plt.show()

In [None]:
fig = plt.figure(figsize=(2.5,6/8*2.5))

ax = plt.gca()
plt.rcParams.update({'font.size': 8})

# t,rnapol = pltAvgSpec('RNApol')

ax.plot(t/60,Ribo_tot,'k',linewidth=1)

# ax.set_xlim(1,21)
# ax.set_ylim(120,230)

# print(np.average(Ribo_tot[600:]))

ax.set_xlabel('Time (min)')
ax.set_ylabel('Active Ribosomes')

plt.show()

In [None]:
metListmRNA = []

# for rnaID in ModelSpecies:
#     if "RNA_MMS" in rnaID:
#         metListmRNA.append(rnaID)


plt.rcParams.update({'font.size': 18})

plt.figure(figsize=(8,6))

# time = np.arange(0,simtime+1,1)

# for mmcode in gene_list_other:
#     mRNA = "M_RNA_" + mmcode + "_c"
#     metListmRNA.append(mRNA)
#     plt.plot(time/60,AverageSpec(mRNA),'y')

for index, row in riboPtnMetDF.iterrows():
    mmcode = row["gene"]
    locusNum = mmcode.split('_')[1]
    locusTag = 'JCVISYN3A_' + locusNum
#     rnaID = 'M_RNA_' + locusTag + '_c'
    rnaID = 'RNA_' + locusNum
    riboName = 'Ribo_' + locusNum
    degName = 'Deg_' + locusNum
#     time, avgTrace = pltAvgRna(rnaID,riboName,degName)
    time, avgTrace = pltAvgRna(locusTag)
    
    if avgTrace[-1]>2:
        print(locusTag,avgTrace[-1])
    metListmRNA.append(locusTag)
     
#     avgTrace = AverageSpec(mRNA)
#     plt.plot(time/60,AverageSpec(mRNA),'crimson')
    plt.plot(time/60,avgTrace,'crimson')
    
plt.ylim(0,3)
plt.xlabel('Time (min)')
plt.ylabel('Average mRNA Count')
    
# metListPtns

In [None]:

plt.rcParams.update({'font.size': 18})

plt.figure(figsize=(8,6))

# time = np.arange(0,simtime+1,1)

# metListmRNA.append("M_RNA_JCVISYN3A_0065_c")
# plt.plot(time/60,AverageSpec("M_RNA_JCVISYN3A_0065_c"),'g')

# metListmRNA.append("M_RNA_JCVISYN3A_0621_c")
# plt.plot(time/60,AverageSpec("M_RNA_JCVISYN3A_0621_c"),'g')

for locusTag in genomePtnLocDict:

    locus_code = locusTag.split('3A_')[1]
    mmcode = 'MMSYN1_' + locus_code
    locusNum = mmcode.split('_')[1]
    JCVI3AID = "JCVISYN3A_" + locusNum

    function = annotatPD_updated.loc[ annotatPD_updated.iloc[:,3] == JCVI3AID ].iloc[0, 5].strip()
    if 'Metabolism' in function:
        
#         print(JCVI3AID)

        rnaID = 'RNA_' + locusNum
        riboName = 'Ribo_' + locusNum
        degName = 'Deg_' + locusNum
#         time, avgTrace = pltAvgRna(rnaID,riboName,degName)
        time, avgTrace = pltAvgRna(locusTag)
        if avgTrace[-1]>2:
            print(locusTag,avgTrace[-1])
        metListmRNA.append(locusTag)

        plt.plot(time/60,avgTrace,'g')

# plt.xlim(0,20)
plt.ylim(0,5)
plt.xlabel('Time (min)')
plt.ylabel('Average mRNA Count')

In [None]:

plt.rcParams.update({'font.size': 18})

plt.figure(figsize=(8,6))

# time = np.arange(0,simtime+1,1)

# metListmRNA.append("M_RNA_JCVISYN3A_0065_c")
# plt.plot(time/60,AverageSpec("M_RNA_JCVISYN3A_0065_c"),'g')

# metListmRNA.append("M_RNA_JCVISYN3A_0621_c")
# plt.plot(time/60,AverageSpec("M_RNA_JCVISYN3A_0621_c"),'g')

for locusTag in genomePtnLocDict:

    locus_code = locusTag.split('3A_')[1]
    mmcode = 'MMSYN1_' + locus_code
    locusNum = mmcode.split('_')[1]
    JCVI3AID = "JCVISYN3A_" + locusNum

    function = annotatPD_updated.loc[ annotatPD_updated.iloc[:,3] == JCVI3AID ].iloc[0, 5].strip()
    if 'Metabolism' in function:
        
#         print(JCVI3AID)

        rnaID = 'RNA_' + locusNum
        riboName = 'Ribo_' + locusNum
        degName = 'Deg_' + locusNum
#         time, avgTrace = pltAvgRna(rnaID,riboName,degName)
        time, avgTrace = pltAvgRna(locusTag)
        if avgTrace[-1]>2:
            print(locusTag,avgTrace[-1])
        metListmRNA.append(locusTag)

        plt.plot(time/60,avgTrace,'g')

# plt.xlim(0,20)
plt.ylim(0,5)
ax = plt.gca()
ax.axes.xaxis.set_visible(False)
plt.xlabel('Time (min)')
plt.ylabel('Average mRNA Count')
plt.show()

In [None]:
metListmRNA = []
plt.rcParams.update({'font.size': 8})

fig = plt.figure(figsize=(3.3,2.5))

ax = plt.gca()

# time = np.arange(0,simtime+1,1)

# metListmRNA.append("M_RNA_JCVISYN3A_0065_c")
# plt.plot(time/60,AverageSpec("M_RNA_JCVISYN3A_0065_c"),'g')

# metListmRNA.append("M_RNA_JCVISYN3A_0621_c")
# plt.plot(time/60,AverageSpec("M_RNA_JCVISYN3A_0621_c"),'g')

for locusTag in genomePtnLocDict:

    locus_code = locusTag.split('3A_')[1]
    mmcode = 'MMSYN1_' + locus_code
    locusNum = mmcode.split('_')[1]
    JCVI3AID = "JCVISYN3A_" + locusNum

    function = annotatPD_updated.loc[ annotatPD_updated.iloc[:,3] == JCVI3AID ].iloc[0, 5].strip()
    if 'Genetic' in function:
        
#         print(JCVI3AID)

        rnaID = 'RNA_' + locusNum
        riboName = 'Ribo_' + locusNum
        degName = 'Deg_' + locusNum
#         time, avgTrace = pltAvgRna(rnaID,riboName,degName)
        time, avgTrace = pltAvgRna(locusTag)
        if avgTrace[-1]>2:
            print(locusTag,avgTrace[-1])
        metListmRNA.append(locusTag)

        ax.plot(time/60,avgTrace,'dodgerblue')
        
# ax.set_xlim(0,20)
ax.set_ylim(-0.1,5.1)
ax.set_xlabel('Time (min)')
ax.set_ylabel('Average mRNA Count')

plt.show()

In [None]:

plt.rcParams.update({'font.size': 18})

plt.figure(figsize=(8,6))

# time = np.arange(0,simtime+1,1)

# metListmRNA.append("M_RNA_JCVISYN3A_0065_c")
# plt.plot(time/60,AverageSpec("M_RNA_JCVISYN3A_0065_c"),'g')

# metListmRNA.append("M_RNA_JCVISYN3A_0621_c")
# plt.plot(time/60,AverageSpec("M_RNA_JCVISYN3A_0621_c"),'g')

for locusTag in genomePtnLocDict:

    locus_code = locusTag.split('3A_')[1]
    mmcode = 'MMSYN1_' + locus_code
    locusNum = mmcode.split('_')[1]
    JCVI3AID = "JCVISYN3A_" + locusNum

    function = annotatPD_updated.loc[ annotatPD_updated.iloc[:,3] == JCVI3AID ].iloc[0, 5].strip()
    if 'Cellular Processes' in function:
        
#         print(JCVI3AID)

        rnaID = 'RNA_' + locusNum
        riboName = 'Ribo_' + locusNum
        degName = 'Deg_' + locusNum
#         time, avgTrace = pltAvgRna(rnaID,riboName,degName)
        time, avgTrace = pltAvgRna(locusTag)
        if avgTrace[-1]>2:
            print(locusTag,avgTrace[-1])
        metListmRNA.append(locusTag)

        plt.plot(time/60,avgTrace,'darkorange')
        
plt.ylim(0,3)
plt.xlabel('Time (min)')
plt.ylabel('Average mRNA Count')

In [None]:

plt.rcParams.update({'font.size': 18})

plt.figure(figsize=(8,6))

# time = np.arange(0,simtime+1,1)

# metListmRNA.append("M_RNA_JCVISYN3A_0065_c")
# plt.plot(time/60,AverageSpec("M_RNA_JCVISYN3A_0065_c"),'g')

# metListmRNA.append("M_RNA_JCVISYN3A_0621_c")
# plt.plot(time/60,AverageSpec("M_RNA_JCVISYN3A_0621_c"),'g')

for locusTag in genomePtnLocDict:

    locus_code = locusTag.split('3A_')[1]
    mmcode = 'MMSYN1_' + locus_code
    locusNum = mmcode.split('_')[1]
    JCVI3AID = "JCVISYN3A_" + locusNum

    function = annotatPD_updated.loc[ annotatPD_updated.iloc[:,3] == JCVI3AID ].iloc[0, 5].strip()
    if 'Unclear' in function:
        
#         print(JCVI3AID)

        rnaID = 'RNA_' + locusNum
        riboName = 'Ribo_' + locusNum
        degName = 'Deg_' + locusNum
#         time, avgTrace = pltAvgRna(rnaID,riboName,degName)
        time, avgTrace = pltAvgRna(locusTag)
        if avgTrace[-1]>2:
            print(locusTag,avgTrace[-1])
        metListmRNA.append(locusTag)

        plt.plot(time/60,avgTrace,'gray')

plt.ylim(0,3)        
plt.xlabel('Time (min)')
plt.ylabel('Average mRNA Count')

In [None]:

# inPlot = 0
total_ptns = 0

final_ptns = []
final_ptns_scaled = []

overTot = 0

for locusTag in genomePtnLocDict:
    locusNum = locusTag.split('_')[1].lstrip('0')
    ptnID = 'P_' + locusNum
#     locusNum = locusTag.split('_')[1]
#     riboName = 'Ribo_' + locusNum
    try:
        t, avgCnt = pltAvgPtn(ptnID)
    #     total_ptns = total_ptns + avgCnt[-1]
        if avgCnt[0] >= 10:
            scaledCnt = avgCnt[-1]/avgCnt[0]
            cntMade = avgCnt[-1] - avgCnt[0]
            total_ptns = total_ptns + cntMade
            final_ptns.append(cntMade)
            final_ptns_scaled.append(scaledCnt)
            if scaledCnt > 1.4:
                print(ptnID, ' : ', avgCnt[0], ' : ', scaledCnt)
                overTot = overTot + 1
            if scaledCnt < 1.05:
                print(ptnID, ' : ', avgCnt[0], ' : ', scaledCnt)
                overTot = overTot + 1
                
    except:
        continue
#     plt.plot(t/60, avgCnt)
#     inPlot = inPlot + 1

print(len(final_ptns))
print(total_ptns)
print(overTot)

In [None]:
plt.figure(figsize=(8,6))
plt.hist(final_ptns, bins = 30)
plt.xlabel('Number Generated in 20 min')
plt.ylabel('Number of Unique Proteins')
plt.show()

In [None]:
plt.rcParams.update({'font.size': 8})
plt.rcParams.update({"font.family": 'sans-serif'})

fig = plt.figure(figsize=(3.3,2.5))

ax = plt.gca()

binwidth = 0.02

ax.hist(final_ptns_scaled, bins=np.arange(min(final_ptns_scaled), max(final_ptns_scaled) + binwidth, binwidth),
        edgecolor='k', color='lightgreen')# bins = 80)

ax.axvline(x=np.average(final_ptns_scaled),c='b',lw=2,label='Average: '+str(np.round(np.average(final_ptns_scaled),2)))
ax.axvline(x=np.median(final_ptns_scaled),c='r',lw=2,label='Median: '+str(np.round(np.median(final_ptns_scaled),2)))

ax.set_xlabel('Scaled Protein Counts After 20 min')
ax.set_ylabel('Number of Unique Proteins')
ax.legend()
ax.set_xlim(0.95,1.8)
ax.set_ylim(0,90)

plt.show()

In [None]:
np.average(final_ptns_scaled)

In [None]:
total_ptns = 0

# final_ptns = []
# final_ptns_scaled = []

avg_gene_reads = []
avg_RNA_reads = []

overTot = 0

for locusTag in genomePtnLocDict:
    locusNum = locusTag.split('_')[1].lstrip('0')
    ptnID = 'P_' + locusNum
#     locusNum = locusTag.split('_')[1]
#     riboName = 'Ribo_' + locusNum
    try:
        t, avgCnt = pltAvgPtn(ptnID)
    #     total_ptns = total_ptns + avgCnt[-1]
        if avgCnt[0] >= 10:
            scaledCnt = avgCnt[-1]/avgCnt[0]
            cntMade = avgCnt[-1] - avgCnt[0]
            total_ptns = total_ptns + cntMade
#             final_ptns.append(cntMade)
#             final_ptns_scaled.append(scaledCnt)
            gene_reads = calcAvgGeneReads(locusTag)
            time, avgRna = pltAvgRna(locusTag)
            total_RNA = gene_reads + avgRna[0]
            avg_gene_reads.append(gene_reads)
            avg_RNA_reads.append(cntMade/total_RNA)
            if scaledCnt > 1.4:
                print(ptnID, ' : ', avgCnt[0], ' : ', scaledCnt)
                overTot = overTot + 1
            if scaledCnt == 1.0:
                print(ptnID, ' : ', avgCnt[0], ' : ', scaledCnt)
                overTot = overTot + 1
                
            if gene_reads > 10:
                print(locusTag, ' gene reads : ', gene_reads)
                
            if gene_reads < 0.5:
                print(locusTag, ' gene reads : ', gene_reads)
                
            if (cntMade/total_RNA > 6) or (cntMade/total_RNA < 3):
                print(locusTag, ' mRNA reads : ', cntMade/total_RNA)
                
    except:
        continue

In [None]:
print(min(avg_gene_reads))

In [None]:
fig = plt.figure(figsize=(2.5,6/8*2.5))

ax = plt.gca()

binwidth = 1
ax.hist(avg_gene_reads, bins=np.arange(min(avg_gene_reads), max(avg_gene_reads) + binwidth, binwidth),
        edgecolor='k', color='darkorange')
ax.set_xlim(-0.5,20)
ax.set_xlabel('Transcription Events/Protein-Coding Gene')
ax.set_ylabel('Genes')

plt.show()

In [None]:
fig = plt.figure(figsize=(2.5,6/8*2.5))

ax = plt.gca()

binwidth=1
ax.hist(avg_RNA_reads, bins=np.arange(min(avg_RNA_reads), max(avg_RNA_reads) + binwidth, binwidth),
        edgecolor='k', color='r')# bins = 20)
ax.set_xlabel('Proteins Translated per mRNA')
ax.set_ylabel('Uniquie mRNA')
# plt.xlim(0,8.5)

plt.show()

In [None]:
print(min(avg_RNA_reads))

In [None]:
def getRNAsequences(jcvi3AID):
    # returns genomic and protein sequences
    try:
        rnasequence = genomeLocDict[jcvi3AID].extract(genome3A.seq).transcribe()
        
    except:
        rnasequence = 0
    
    return rnasequence

In [None]:
# Define how to calculate transcription rate constants as in equation 3 for transcription reactions.
# Uses mature transcript length and proteomics for promoter strength.

def TranscriptRate(rnaMetID, ptnMetID, rnasequence, jcvi2ID):
    # Add trascription reaction
    
    # Check that we know all bases used in the sequence
    if ( set(rnasequence) - set(baseMap.keys()) ):
        raise Exception("Unknown base(s) in RNA sequence {}".format(set(rnasequence) - set(baseMap.keys())) )
    
    # Count how many times each base is used
    baseCount = defaultdict(int)
    for base in set(rnasequence):
        baseCount[base] = rnasequence.count(base)
    

    ptnCount, ptnName = getPtnCount(ptnMetID, jcvi2ID)
    
    kcat_mod = min(rnaPolKcat*(ptnCount/(180)),85)

    kcat_mod = max(10,kcat_mod)
    
    # The rate form needs specific sequence data for the first two monomers:
#     paramDict["CMonoA"] = baseMap[ rnasequence[0] ]
#     paramDict["CMonoB"] = baseMap[ rnasequence[1] ]
#     paramDict["KDA"] = rnaPolKd # Since we are current;y using the same Kd for all nucleotides
#     paramDict["KDB"] = rnaPolKd 
    
    # Add total number of monomers to parameter dict
    
    CMono1 = baseMap[ rnasequence[0] ]
    
    CMono2 = baseMap[ rnasequence[1] ]

    n_tot = sum(list(baseCount.values()))

    NMono_A = baseCount["A"]
    
    NMono_U = baseCount["U"]
    
    NMono_C = baseCount["C"]
    
    NMono_G = baseCount["G"]
    
    NMonoDict = [NMono_A,NMono_C,NMono_G,NMono_U]
#     print(NMonoDict)
    
    
    NMonoSum = NMono_A*rnaPolKd/ATPconc + NMono_C*rnaPolKd/CTPconc + NMono_U*rnaPolKd/UTPconc + NMono_G*rnaPolKd/GTPconc
    

    k_transcription = kcat_mod / ((rnaPolKd**2)/(CMono1*CMono2) + NMonoSum + n_tot - 1)
    
#     k_transcription = k_transcription * NaV
    
    return k_transcription



In [None]:
ptnNoQuant = set()

def getPtnCount(newMetID, jcvi2ID):
    
    # Check if protein quantification is available.
    try:
        if jcvi2ID.startswith("JCVIman_"):
            aoeID = manGPRPD.loc[ manGPRPD.MM == jcvi2ID.replace("JCVIman_",""), "AOE" ].values[0]
        else:
            aoeID = J2toAOE[ jcvi2ID ]
        
        ptnCount = max(defaultPtnCount,round(proteomPD.loc[ proteomPD.Protein == aoeID ].iloc[0,21]))
#         
        ptnName  = proteomPD.loc[ proteomPD.Protein == aoeID ].iloc[0,1].replace(
            " [synthetic bacterium JCVI-Syn3.0]","")
        
#         ptnConcentration = ptnCount*countToMiliMol
    except:
        print("WARNING: No protein count for", newMetID)
        print("Using default protein concentration.")

        ptnName = newMetID
        ptnCount = defaultPtnCount
#         ptnConcentration = defaultPtnConcentration

        ptnNoQuant.add(newMetID)
    
    return ptnCount, ptnName

In [None]:
baseMap = OrderedDict({ "A":"M_atp_c", "U":"M_utp_c", "G":"M_gtp_c", "C":"M_ctp_c" })
# baseMapToMonoP = OrderedDict({ "A":"M_amp_c", "U":"M_ump_c", "G":"M_gmp_c", "C":"M_cmp_c" })

# Global parameters for transcription
rnaPolKcat = 20 # nt/s
rnaPolK0 = 1e-4 #mM
rnaPolKd = 0.1 #mM

rrnaPolKcat = 85 # nt/s

krnadeg = 0.00578/2 # 1/s
# rna_deg_rate = sim.rateConst('RNAdeg', krnadeg, 2)

ptnDegRate = 7.70e-06 # 1/s

ATPconc = 4 #mM
UTPconc = 1 #mM
CTPconc = 1 #mM
GTPconc = 2 #mM

# Cell radius (meters):
# r_cell = 2.5*(10**-7)
r_cell = 2.0*(10**-7) # m

CytoVolume = (4*np.pi/3)*1000*r_cell**3 # L
cellVolume = CytoVolume

subvolume_vol = 1000*(8e-9)**3 # L

# print(cellVolume)

# Avogadro:
avgdr   = 6.022e23 # molec/mol
Avognum = avgdr

NaV = Avognum * subvolume_vol

countToMiliMol = 1000/(avgdr*cellVolume)

defaultPtnCount = 10

RnaPconc = 187*countToMiliMol # mM

# Global parameter for degradation of mRNAs
rnaDegRate = 0.00578/2 # 1/s

degrad_bind_rate = 11/60/countToMiliMol*1000 #1/M/s
# degrad_bind_rate = 11/60*NaV #1/M/s
# deg_bind_rate = sim.rateConst('RNAdeg', degrad_bind_rate, 2)

# Create a map for rna sequence to NTP concentration.
baseMap = OrderedDict({ "A":ATPconc, "U":UTPconc, "G":GTPconc, "C":CTPconc })

# Create Dictionaries to map tRNAs to associated aa abbreviations in protein sequences.
aaMap = OrderedDict({"A":"M_ala__L_c", "R":"M_arg__L_c", 
    "N":"M_asn__L_c", "D":"M_asp__L_c", "C":"M_cys__L_c", "E":"M_glu__L_c", "Q":"M_gln__L_c", "G":"M_gly_c", 
    "H":"M_his__L_c", "I":"M_ile__L_c", "L":"M_leu__L_c", "K":"M_lys__L_c", "M":"M_met__L_c", "F":"M_phe__L_c", 
    "P":"M_pro__L_c", "S":"M_ser__L_c", "T":"M_thr__L_c", "W":"M_trp__L_c", "Y":"M_tyr__L_c", "V":"M_val__L_c",
    "*":"Stop_Codon"})

aaTRNAMap = OrderedDict({"A":"M_alatrna_c", "R":"M_argtrna_c", 
    "N":"M_asntrna_c", "D":"M_asptrna_c", "C":"M_cystrna_c", "E":"M_glutrna_c", "Q":"M_glntrna_c", "G":"M_glytrna_c", 
    "H":"M_histrna_c", "I":"M_iletrna_c", "L":"M_leutrna_c", "K":"M_lystrna_c", "M":"M_mettrna_c", "F":"M_phetrna_c", 
    "P":"M_protrna_c", "S":"M_sertrna_c", "T":"M_thrtrna_c", "W":"M_trptrna_c", "Y":"M_tyrtrna_c", "V":"M_valtrna_c"})

aaTRNAFreeMap = OrderedDict({"A":"M_trnaala_c", "R":"M_trnaarg_c", 
    "N":"M_trnaasn_c", "D":"M_trnaasp_c", "C":"M_trnacys_c", "E":"M_trnaglu_c", "Q":"M_trnagln_c", "G":"M_trnagly_c", 
    "H":"M_trnahis_c", "I":"M_trnaile_c", "L":"M_trnaleu_c", "K":"M_trnalys_c", "M":"M_trnamet_c", "F":"M_trnaphe_c", 
    "P":"M_trnapro_c", "S":"M_trnaser_c", "T":"M_trnathr_c", "W":"M_trnatrp_c", "Y":"M_trnatyr_c", "V":"M_trnaval_c"})


# Global parameters for translation
riboKcat = 12 # 1/s
riboK0 = 4*25e-6 # mM
riboKd = 0.0001 # mM

ribosomeConc = 500*countToMiliMol # mM

# Concentration of charged tRNA
ctRNAconc = 150*countToMiliMol # mM

# Global parameter for degradation of proteins
# Derived from eLife's model, using average protein half life of 25 hours. 
ptnDegRate = 7.70e-06 # 1/s



In [None]:
# Define how to add the particles and reactions for each protein and its corresponding mRNA and gene.

def calc_mRNA_thalf(jcvi3AID):
    locusNum = jcvi3AID.split('_')[1]
    mmcode = 'MMSYN1_' + locusNum
    # Checks if a translation to JCVISYN2* code is available
    try:
        jcvi2ID = annotatPD.loc[ annotatPD.iloc[:,5] == mmcode ].iloc[0, 13].strip()
    except:
        jcvi2ID = "JCVIunk_" + mmcode

#     print(mmcode, jcvi2ID, jcvi3AID)
    
#     genes_in_model.append(jcvi3AID)
    
    # We name proteins after their locus tag from the gen bank entry to be M_PTN_JCVISYN3A_XXXX_c.
    ptnMetID = 'PTN_' + jcvi3AID
    
    # If the protein is not in the model, add it:

#     ModelSpecies.append(ptnMetID)

    ptnCount, ptnName = getPtnCount(ptnMetID, jcvi2ID)
#     print(ptnCount)
    
#     ptnCounts.append(ptnCount)

#     print(ptnMetID, ptnCount)

    geneMetID = jcvi3AID + '_gene'

    # Get nucleotide and amino acid sequences, if available
    rnasequence = getRNAsequences(jcvi3AID) #, aasequence
#     print(rnasequence)
#     print(aasequence)

    if (rnasequence != 0): #and (aasequence != 0):
        
        locusNum = jcvi3AID.split('_')[1].lstrip('0')

        rnaMetID = "RNA_" + locusNum
        rnaName = "(mRNA) " + ptnName

#         ModelSpecies.append(rnaMetID)
    
        species = []
        species = [geneMetID, rnaMetID, ptnMetID]
        
        RnapName = 'RP_' + locusNum

        riboName = 'Ribo_' + locusNum
        degName = 'Deg_' + locusNum
        
        trsc_rate = TranscriptRate(rnaMetID, ptnMetID, rnasequence, jcvi2ID)
        
#         time, avgTrace = pltAvgRna(rnaMetID,riboName,degName)
        time, avgTrace = pltAvgRna(jcvi3AID)
        
        t2, avgRnap = pltAvgSpec(RnapName)
        
        total_et = 0
        total_messenger = 0

        for i in range(int(len(avgTrace)/2),len(avgTrace)):
#             print(i)
            total_et = total_et + avgRnap[i]
            total_messenger = total_messenger + avgTrace[i]
            
        avgET = total_et/(len(avgTrace) - len(avgTrace)/2)
        avgMess = total_messenger/(len(avgTrace) - len(avgTrace)/2)
        
        kDeg = trsc_rate * avgET / avgMess
        
        tHalf = np.log(2)/kDeg/60
        
        return tHalf
        

In [None]:
calc_mRNA_thalf('JCVISYN3A_0779')

In [None]:
half_lives = []

for locusTag in genomePtnLocDict:
    thalf = calc_mRNA_thalf(locusTag)
    half_lives.append(thalf)
    if thalf>4 or thalf<0.1:
        print(locusTag,thalf)


In [None]:
half_lives_2 = []

for half_life in half_lives:
    if half_life and (half_life != 0) and (half_life<20):
        half_lives_2.append(half_life)
        if half_life < 0.16:
            print(half_life)

In [None]:
plt.rcParams.update({'font.size': 8})

# fig = plt.figure(figsize=(2.5,6/8*2.5))
fig = plt.figure(figsize=(3.3,2.5))

ax = plt.gca()

binwidth=0.5
ax.hist(half_lives_2,
         bins=np.arange(min(half_lives_2), max(half_lives_2) + binwidth, binwidth),
         edgecolor='k',color='y')#bins=25)
ax.axvline(x=np.average(half_lives_2),c='b',lw=2,label='Average: '+str(np.round(np.average(half_lives_2),2))+' min')
ax.axvline(x=np.median(half_lives_2),c='r',lw=2,label='Median: '+str(np.round(np.median(half_lives_2),2))+' min')
ax.set_xlabel('mRNA Half Life (min)')
ax.set_ylabel('Number of Unique mRNA')
ax.set_xlim(-0.25,17)
ax.set_ylim(0,100)
ax.legend()

plt.show()

# plt.xscale('log')

In [None]:
np.average(half_lives_2)