In [3]:
from datetime import datetime
import numpy as np
import os
import importlib
import sys
from Bio import SeqIO


import WCM_analysis
importlib.reload(WCM_analysis)

import WCM_gene as gene
import WCM_ptn as ptn

#### Read in the pkl file

In [4]:
in_dir = '../healthy_10replicates/'

pkl_dir = in_dir

pkl_label = 'WCMensemble_healthy_10replicates'

# create a WCM_analysis class
w = WCM_analysis.WCM_ensemble()

# Read in the pkl file
w.read_merged_ensemble(pkl_dir,pkl_label)

fig_dir = './plots_WCM/'

if not os.path.exists(fig_dir):
        os.makedirs(fig_dir)

min_rep = 1
max_rep = 10

healthy_list = np.arange(min_rep,max_rep+1,dtype=np.int32)
healthy_indices = [rep -1 for rep in healthy_list]

#### Cell Growth

In [5]:
fig_label = 'growth'

w.plot_volume(fig_dir, fig_label,healthy_list,'.png',True )

w.plot_surfacearea(fig_dir, fig_label, healthy_list, '.png', True)

#### Replication Initiation

In [6]:
count_initcheck = w.get_specie_trace('RepInitCheck')[:,healthy_indices]

# Distribution of rounds of initiation
xlabel = 'Rounds of Replication Initiation'
ylabel = 'Frequency'
title = 'Distribution_of_Initiation_Rounds_After_105_Mins'
fig_label = 'Initiation_' 

w.plot_hist(fig_dir, fig_label, '.png', count_initcheck[-1,:], xlabel, ylabel, title, bins=10)

In [7]:
filas_mother, filas_d1, filas_d2 = gene.get_filamentlength_new(w)
filas = [filas_mother, filas_d1, filas_d2]
ylabel = r'Filament Length [\#]'
titles = ['Filalength_mother_all' ]

for i in range(len(titles)):
        title = titles[i]
        fila = filas[i]
        w.plot_in_replicates_single(fig_dir, fig_label, '.png', fila, healthy_list, ylabel, title, False, True)
        

In [8]:
ini_rounds, ini_times, ini_mother_times, ini_daughter1_times, ini_daughter2_times = gene.analyze_initiation(w,healthy_list)

ini_mother_times = [time/60 for time in ini_mother_times]

# Distribution of time to finish the first initiation
xlabel = 'Time to Finish First Round Replication Initiation [Min]'
ylabel = 'Frequency'
title = 'Distribution_of_Time_to_Finish_First_Initiation'

w.plot_hist(fig_dir, fig_label, '.png', ini_mother_times, xlabel, ylabel, title, bins=20)

Following is the initiation analysis of 10 replicates 0, 1, 2, 3, 4, 5, 6, 7, 8, 9.
Replicate 0 finished 3 rounds of replication initiation at 3.2666666666666666,68.6,78.98333333333333, minutes
Replicate 1 finished 7 rounds of replication initiation at 6.816666666666666,56.63333333333333,57.13333333333333,101.61666666666666,103.25,103.75,104.1, minutes
Replicate 2 finished 5 rounds of replication initiation at 0.9,47.583333333333336,63.35,93.58333333333333,97.3, minutes
Replicate 3 finished 3 rounds of replication initiation at 16.85,63.416666666666664,67.73333333333333, minutes
Replicate 4 finished 3 rounds of replication initiation at 2.566666666666667,73.43333333333334,85.58333333333333, minutes
Replicate 5 finished 5 rounds of replication initiation at 2.5166666666666666,52.733333333333334,61.983333333333334,96.25,96.95, minutes
Replicate 6 finished 4 rounds of replication initiation at 5.833333333333333,54.516666666666666,56.2,104.28333333333333, minutes
Replicate 7 finished 3 rou

#### Gene Copy Number

In [9]:
genomeFile3A =  '../input_data/syn3A.gb'

genome3A = next(SeqIO.parse(genomeFile3A, "gb"))

genomeDict = gene.mapDNA(genome3A)

LocusNumstoType, geneTypes = gene.categorizeGenes(genomeDict)

ptncoding_genes = ['G_{0}'.format(locusNum) for locusNum in LocusNumstoType['protein'] ]
tRNA_genes = ['G_{0}'.format(locusNum) for locusNum in LocusNumstoType['tRNA'] ]
rRNA_genes = ['G_{0}'.format(locusNum) for locusNum in LocusNumstoType['rRNA'] ]
tmRNA_genes = ['G_{0}'.format(locusNum) for locusNum in LocusNumstoType['tmRNA'] ]
ncRNA_genes = ['G_{0}'.format(locusNum) for locusNum in LocusNumstoType['ncRNA'] ]
pesudo_genes = ['G_{0}'.format(locusNum) for locusNum in LocusNumstoType['gene'] ]

entire_genes = ptncoding_genes + tRNA_genes + rRNA_genes + tmRNA_genes + ncRNA_genes

Produced_total_genes = ['Produced_{0}'.format(Gene) for Gene in entire_genes ]

RNAP_total_genes = ['RNAP_{0}'.format(Gene) for Gene in entire_genes]


free_genes_counts = w.get_species_traces(entire_genes)
Produced_genes_counts = w.get_species_traces(Produced_total_genes)
Bound_genes_counts = w.get_species_traces(RNAP_total_genes)

total_genes_counts = free_genes_counts + Bound_genes_counts 

27169
Six types of genes in Syn3A are protein, ncRNA, gene, rRNA, tRNA, tmRNA with respective numbers 455, 2, 3, 6, 29, 1.


In [10]:
fig_label = 'replication_'

title = 'Gene_Copy_Numbers'

w.plot_ensemble_averaged_multiples(fig_dir, fig_label, '.png', np.mean(total_genes_counts[:,:,healthy_indices], axis = 2), entire_genes, 'count', title, True )

xlabel = 'Gene Copy Numbers'
ylabel = 'Frequency'
title = 'Distribution_of_Gene_Copy_Numbers_After_105_min'

w.plot_hist(fig_dir, fig_label, '.png', np.mean(total_genes_counts[:,:,healthy_indices], axis = 2)[:,-1], xlabel, ylabel, title, bins=50  )

In [11]:

Produced_G_420 = w.get_specie_trace('Produced_G_0420')[:,healthy_indices]

t_end_list = []

no_rep = []

for i_rep in range(Produced_G_420.shape[1]):
        trace = Produced_G_420[:,i_rep]
        try:
                tend_replication = np.where(trace == 1)[0][0]/60
                t_end_list.append(tend_replication)
        except:
                no_rep.append(i_rep)
                t_end_list.append(6300/60)


xlabel = 'Time to Finish First Round Replication [Min]'
ylabel = 'Frequency'
title = 'Distribution_of_Time_to_Finish_First_Replication'

w.plot_hist(fig_dir, fig_label, '.png', t_end_list, xlabel, ylabel, title, bins=20)

#### Transcriptomics/mRNAs

In [12]:
mRNA_LocusTag_Dict = {}

mRNA_Counts_Dict = {}

Categories = ['free', 'produced','ribosome', 'degradosome', 'degradated']

Prefixes = ['R_', 'Produced_R_', 'Ribosome_mRNA_', 'Degradosome_mRNA_', 'Degradated_mRNA_']

for i_cate, Category in enumerate(Categories):

        prefix = Prefixes[i_cate]

        mRNA_LocusTag_Dict[Category] = [prefix+locusNum for locusNum in LocusNumstoType['protein']] 

        mRNA_Counts_Dict[Category] = w.get_species_traces(mRNA_LocusTag_Dict[Category])

In [13]:
fig_label = 'mRNA_'
Counts1 = mRNA_Counts_Dict['free'] + mRNA_Counts_Dict['ribosome'] + mRNA_Counts_Dict['degradosome']
ylabel = 'Count [\#]'
title =  'Counts_All_mRNAs'

w.plot_in_replicates_single(fig_dir, fig_label, '.png', np.sum(Counts1, axis=0), healthy_list, ylabel, title, True, True)

#### Proteomics

In [14]:
normal_ptns, ribosomal_ptns, ten_ptns = ptn.get_categories_ptn()

fig_label = 'ptn_'

ptns_list =  normal_ptns

Produced_ptns = ['Produced_P_{0}'.format(locusNum) for locusNum in ptns_list ]
ptns = ['P_{0}'.format(locusNum) for locusNum in ptns_list ]

Produced_ptns_counts = w.get_species_traces(Produced_ptns)
ptns_counts = w.get_species_traces(ptns)

total_ptns = np.zeros_like(ptns_counts)
ratio = np.zeros_like(ptns_counts)


for i_time in range(np.shape(ptns_counts)[1]):
        total_ptns[:,i_time,:] = Produced_ptns_counts[:,i_time,:] + ptns_counts[:,0,:]
        ratio[:,i_time,:] = Produced_ptns_counts[:,i_time,:]/ptns_counts[:,0,:] + 1

# Ensemble averaged counts of proteins
w.plot_ensemble_averaged_multiples(fig_dir, fig_label, '.png',np.mean(total_ptns[:,:,healthy_indices], axis=2),ptns, 'count',title, True )

# Ensemble averaged ratio of proteins
w.plot_ensemble_averaged_multiples(fig_dir, fig_label, '.png', np.mean(ratio[:,:,healthy_indices], axis =2), ptns, 'ratio', title, True)

scaledPtn = np.mean(ratio[:,-1,healthy_indices], axis = 1)

xlabel = 'Scaled Protein Abundance After 105 min'
ylabel = 'Frequency'

# histogram of scaled ptn abundance after 105 min
w.plot_hist(fig_dir, fig_label, '.png', scaledPtn, xlabel, ylabel, title, bins=50)

In [16]:
fig_dir = './plots_WCM_Metabolites/'

if not os.path.exists(fig_dir):
        os.makedirs(fig_dir)


#### Plots the time traces of All Metabolites

In [17]:
fig_label = 'metabolites_'

intra_metabolites = []

for species, index in w.species_map.items():
        if species.startswith('M_'):
                intra_metabolites.append(species)

intra_metabolites.extend(['ptsi','ptsh','crr','ptsg','ptsi_P','ptsh_P','crr_P','ptsg_P'])


w.plot_individual_concentrations(fig_dir, fig_label, '.png', intra_metabolites, healthy_list, True )

  1.2*np.amax(y))
  1.2*np.amax(y))
  1.2*np.amax(y))
  1.2*np.amax(y))
  1.2*np.amax(y))
  1.2*np.amax(y))
  1.2*np.amax(y))
  1.2*np.amax(y))
  1.2*np.amax(y))
  1.2*np.amax(y))
  1.2*np.amax(y))
