# Load translation voxel simulation data

In [1]:
%%time
%load_ext autoreload

from analysis_utils import *
from joblib import Parallel, delayed
import multiprocessing  
import numpy as np
import pandas as pd

#Growth rate = 3.0
#Computes the translation latencies from our model with simulations containing
#1 to 14 cognate ternary complexes in the voxel (14 to 42 not included because
#the probability of voxels with that number of cognates is ~0)
path = "/Users/Akshay/Documents/tRNAShuffle/data/"
ensmbl_latency_dict = dict()
rxndiff=dict()
scalingFactor = 200
NR_scaledrates = {'k1r':718*200/scalingFactor,'k2f':1475*200/scalingFactor,'k2r_nr':1120*200/scalingFactor,'k3_nr':6*200/scalingFactor,'k4':209*200/scalingFactor}


data = "191003_2210/"
for i in range(1,7):
    sim_i = computeTransportRxnTimes(path+data,100*(i-1),100*(i-1)+100,cogtRNANum=i, ribosomeNum = 7,scaling=scalingFactor,NR_scaling = NR_scaledrates)
    ensmbl_latency_dict[i] = (CellLatencies(sim_i))

data = "191105_0908/"
for i in range(7,14):
    sim_i = computeTransportRxnTimes(path+data, 100*(i-1-6),100*(i-1-6)+100,cogtRNANum=i, ribosomeNum = 7,scaling=scalingFactor,NR_scaling = NR_scaledrates)
    ensmbl_latency_dict[i] = (CellLatencies(sim_i))
    
#Experimentally obtained codon usage frequencies and tRNA abundances at growth rate = 2.5 dbl/hr
gr25_Codon = np.array([2.36, 1.26, 45.55, 34.17, 16.97, 57.86, 19.27, 33.74, 14.98, 22.31, 43.18, 7.67, 24.11, 24.87, 39.49, 11.81, 0.03, 0.63, 2.19, 9.31, 17.22, 55.01, 5.61, 29.21, 21.67, 0.52, 15.79, 43.86, 4.17, 2.61, 20.64, 26.7, 7.03, 0.19, 2.76, 3.81, 6.72, 16.52, 4.27, 2.73, 7.92, 23.25, 2.51, 1.98, 16.33, 11.68, 0.62, 0.67, 43.82, 20.59, 27.28, 7.01, 6.78, 14.21, 60.75, 0.82, 3.86, 4.09, 28.82, 5.18, 4.38, 1.09])
gr25_pCodon = gr25_Codon/np.sum(gr25_Codon)
gr25_ptRNA = [0.0602409638554217, 0.010542168674698798, 0.07379518072289158, 0.00602409638554217, 0.010542168674698798, 0.00602409638554217, 0.021084337349397596, 0.043674698795180725, 0.021084337349397596, 0.01204819277108434, 0.01807228915662651, 0.08433734939759038, 0.03162650602409639, 0.07228915662650603, 0.01204819277108434, 0.07078313253012049, 0.06325301204819278, 0.01656626506024097, 0.009036144578313254, 0.027108433734939763, 0.010542168674698798, 0.03012048192771085, 0.013554216867469882, 0.015060240963855425, 0.0075301204819277125, 0.010542168674698798, 0.0075301204819277125, 0.003012048192771085, 0.021084337349397596, 0.004518072289156627, 0.01656626506024097, 0.01204819277108434, 0.0015060240963855425, 0.009036144578313254, 0.01656626506024097, 0.019578313253012052, 0.015060240963855425, 0.027108434, 0.05873493975903616, 0.019578313]
gr25_ptRNA = gr25_ptRNA/np.sum(gr25_ptRNA)

n_cores = 16

##Compute the codon distribution and elongation latency of each gene weighted by transcript number
codon_elongt = compute_codon_elongt(gr25_ptRNA, len(gr25_pCodon), ensmbl_latency_dict)
gene_map, gene_latency = compute_gene_elongt(codon_elongt)
pCodon_transcriptome, transcriptome_elongt = compute_transcript_distributions(gene_map,gene_latency)
pCodon_transcriptome = np.array(pCodon_transcriptome)

Computing...
Computing...
Computing...
Computing...
Computing...
Computing...
Computing...
Computing...
Computing...
Computing...
Computing...
Computing...
Computing...


TypeError: compute_gene_elongt() missing 1 required positional argument: 'codon_elongt'

In [None]:
min(gr25_ptRNA)

## Uniform Random tRNA, WT Codon usage, 100 samples

In [2]:
%%time
elongt_list = list()
ptRNA_list = list()
inputs = list()

for i in range(100):
    ptRNA_uniform = np.random.uniform(min(gr25_ptRNA),max(gr25_ptRNA),len(gr25_ptRNA))
    ptRNA_list.append(ptRNA_uniform)
    inputs.append([ptRNA_uniform,pCodon_transcriptome,ensmbl_latency_dict])
elongt_list = Parallel(n_jobs=n_cores,backend='loky')(delayed(computeElongationLatency_multithread)(i) for i in inputs)
elongt_list = [elongt[0][0] for elongt in elongt_list]
np.save('./data/UniformRandomtRNA_WTCodonUsage_100sample_elongt',elongt_list)
np.save('./data/UniformRandomtRNA_WTCodonUsage_100sample_ptRNA',ptRNA_list)


CPU times: user 13.3 s, sys: 421 ms, total: 13.7 s
Wall time: 24.1 s


## Uniform Random tRNA, WT Codon usage, 10k samples

In [3]:
%%time
elongt_list = list()
ptRNA_list = list()
inputs = list()

for i in range(10000):
    ptRNA_uniform = np.random.uniform(min(gr25_ptRNA),max(gr25_ptRNA),len(gr25_ptRNA))
    ptRNA_list.append(ptRNA_uniform)
    inputs.append([ptRNA_uniform,pCodon_transcriptome,ensmbl_latency_dict])
elongt_list = Parallel(n_jobs=n_cores,backend='loky')(delayed(computeElongationLatency_multithread)(i) for i in inputs)
elongt_list = [elongt[0][0] for elongt in elongt_list]
np.save('./data/UniformRandomtRNA_WTCodonUsage_10ksample_elongt',elongt_list)
np.save('./data/UniformRandomtRNA_WTCodonUsage_10ksample_ptRNA',ptRNA_list)


CPU times: user 27min 39s, sys: 11.3 s, total: 27min 51s
Wall time: 44min 6s


## WT tRNA, Uniform Random Codon usage, 100 samples

In [None]:
%%time
elongt_list = list()
pCodon_list = list()
inputs = list()

syn_codon_list = [[0,1,2,3],[4,5],[6,7],[8,9,10,11],[12,13,14,15],[16,17,46,47,48,49],[18,19,42,43,44,45],
              [20,21],[22,23],[24],[25,26,27],[28,29,30,31],[32],[33],[34,35],[36,37],[38,39,54,55,56,57],[40,41],
              [50,51],[52,53],[58,59,60,61]]
        
for i in range(100):
    pCodon_uniform = np.zeros(len(gr25_pCodon))
    for syn_codons in syn_codon_list:
        pCodon_uniform[syn_codons] = np.random.uniform(min(gr25_Codon),max(gr25_Codon),len(syn_codons))
        pCodon_uniform[syn_codons] = sum(pCodon_transcriptome[syn_codons])/sum(pCodon_uniform[syn_codons])*pCodon_uniform[syn_codons]
    pCodon_list.append(pCodon_uniform)
    inputs.append([gr25_ptRNA, pCodon_uniform,ensmbl_latency_dict])

elongt_list = Parallel(n_jobs=n_cores,backend='loky')(delayed(computeElongationLatency_multithread)(i) for i in inputs)
elongt_list = [elongt[0][0] for elongt in elongt_list]

np.save('./data/WTtRNA_UniformRandomCodonUsage_100sample_elongt',elongt_list)
np.save('./data/WTtRNA_UniformRandomCodonUsage_100sample_pCodon',pCodon_list)


## WT tRNA, Uniform Random Codon usage, 10k samples

In [None]:
%%time
elongt_list = list()
pCodon_list = list()
inputs = list()

syn_codon_list = [[0,1,2,3],[4,5],[6,7],[8,9,10,11],[12,13,14,15],[16,17,46,47,48,49],[18,19,42,43,44,45],
              [20,21],[22,23],[24],[25,26,27],[28,29,30,31],[32],[33],[34,35],[36,37],[38,39,54,55,56,57],[40,41],
              [50,51],[52,53],[58,59,60,61]]
        
for i in range(10000):
    pCodon_uniform = np.zeros(len(pCodon_transcriptome))
    for syn_codons in syn_codon_list:
        pCodon_uniform[syn_codons] = np.random.uniform(min(gr25_Codon),max(gr25_Codon),len(syn_codons))
        pCodon_uniform[syn_codons] = sum(pCodon_transcriptome[syn_codons])/sum(pCodon_uniform[syn_codons])*pCodon_uniform[syn_codons]
    pCodon_list.append(pCodon_uniform)
    inputs.append([gr25_ptRNA, pCodon_uniform,ensmbl_latency_dict])

elongt_list = Parallel(n_jobs=n_cores,backend='loky')(delayed(computeElongationLatency_multithread)(i) for i in inputs)
elongt_list = [elongt[0][0] for elongt in elongt_list]

np.save('./data/WTtRNA_UniformRandomCodonUsage_10ksample_elongt',elongt_list)
np.save('./data/WTtRNA_UniformRandomCodonUsage_10ksample_pCodon',pCodon_list)


## Genetic algorithm: faster tRNA distributions (Codon usage fixed, WT)

In [4]:
%%time
%load_ext autoreload
%autoreload 2
from analysis_utils import *

### Codon array fixed
elongt_list = np.load('./data/UniformRandomtRNA_WTCodonUsage_100sample_elongt.npy')
ptRNA_list = np.load('./data/UniformRandomtRNA_WTCodonUsage_100sample_ptRNA.npy')
fullelongt_list = list()

for i in range(2000):
    fitness, ptRNA_list, elongt_list = run_ga_tRNA(ptRNA_list,pCodon_transcriptome,elongt_list,ensmbl_latency_dict,min(gr25_ptRNA),max(gr25_ptRNA),objective='fast')
    fullelongt_list.append(min(elongt_list))
    if i%100 == 0:
        print('Generation: ',i, ' elongt: ', min(elongt_list))
        
np.save('./data/UniformRandomtRNA_WTCodonUsage_2ksample_fasttRNA_elongt',elongt_list)
np.save('./data/UniformRandomtRNA_WTCodonUsage_2ksample_fasttRNA_ptRNA',ptRNA_list)
np.save('./data/UniformRandomtRNA_WTCodonUsage_2ksample_fasttRNA_fullelongt',fullelongt_list)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
Generation:  0  elongt:  204.7468593260982
Generation:  100  elongt:  185.59303909290207
Generation:  200  elongt:  183.6698904143997
Generation:  300  elongt:  181.6704947318644
Generation:  400  elongt:  180.376199821021
Generation:  500  elongt:  178.59407803249746
Generation:  600  elongt:  178.59407803249746
Generation:  700  elongt:  178.12911201678904
Generation:  800  elongt:  178.12911201678904
Generation:  900  elongt:  178.04931287245662
Generation:  1000  elongt:  177.2590105137187
Generation:  1100  elongt:  177.2590105137187
Generation:  1200  elongt:  176.55324018947073
Generation:  1300  elongt:  176.55324018947073
Generation:  1400  elongt:  176.14176666410413
Generation:  1500  elongt:  176.14176666410413
Generation:  1600  elongt:  176.14176666410413
Generation:  1700  elongt:  175.50266749159155
Generation:  1800  elongt:  175.50266749159155
Generation:  1900  elongt:  175.502667

## Genetic algorithm: slower tRNA distributions (Codon usage fixed, WT)

In [5]:
%%time
%load_ext autoreload
%autoreload 2
from analysis_utils import *

### Codon array fixed
elongt_list = np.load('./data/UniformRandomtRNA_WTCodonUsage_100sample_elongt.npy')
ptRNA_list = np.load('./data/UniformRandomtRNA_WTCodonUsage_100sample_ptRNA.npy')
fullelongt_list = list()

for i in range(2000):
    fitness, ptRNA_list, elongt_list = run_ga_tRNA(ptRNA_list,pCodon_transcriptome,elongt_list,ensmbl_latency_dict,min(gr25_ptRNA),max(gr25_ptRNA),objective='slow')
    fullelongt_list.append(max(elongt_list))
    if i%100 == 0:
        print('Generation: ',i)
np.save('./data/UniformRandomtRNA_WTCodonUsage_2ksample_slowtRNA_elongt',elongt_list)
np.save('./data/UniformRandomtRNA_WTCodonUsage_2ksample_slowtRNA_ptRNA',ptRNA_list)
np.save('./data/UniformRandomtRNA_WTCodonUsage_2ksample_slowtRNA_fullelongt',fullelongt_list)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
Generation:  0
Generation:  100
Generation:  200
Generation:  300
Generation:  400
Generation:  500
Generation:  600
Generation:  700
Generation:  800
Generation:  900
Generation:  1000
Generation:  1100
Generation:  1200
Generation:  1300
Generation:  1400
Generation:  1500
Generation:  1600
Generation:  1700
Generation:  1800
Generation:  1900
CPU times: user 23min 36s, sys: 3.5 s, total: 23min 40s
Wall time: 1h 18min


## Genetic algorithm: faster codon usage distributions (tRNA abundances fixed, WT)

In [9]:
%%time
%load_ext autoreload
%autoreload 2
from analysis_utils import *

### Codon array fixed
elongt_list = np.load('./data/WTtRNA_UniformRandomCodonUsage_100sample_elongt.npy')
pCodon_list = np.load('./data/WTtRNA_UniformRandomCodonUsage_100sample_pCodon.npy')
fullelongt_list = list()

for i in range(2000):
    fitness, pCodon_list, elongt_list = run_ga_CodonSweep(gr25_ptRNA,pCodon_list,elongt_list,ensmbl_latency_dict,min(gr25_ptRNA),max(gr25_ptRNA),objective='fast')
    fullelongt_list.append(min(elongt_list))
    if i%100 == 0:
        print('Generation: ',i, ' elongt: ', min(elongt_list))
        
np.save('./data/WTtRNA_UniformRandomCodonUsage_2ksample_fastCodonUsage_elongt',elongt_list)
np.save('./data/WTtRNA_UniformRandomCodonUsage_2ksample_fastCodonUsage_pCodon',pCodon_list)
np.save('./data/WTtRNA_UniformRandomCodonUsage_2ksample_fastCodonUsage_fullelongt',fullelongt_list)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
Generation:  0  elongt:  191.20403884990284
Generation:  100  elongt:  183.8317274791367
Generation:  200  elongt:  183.8317274791367
Generation:  300  elongt:  183.79480130218053
Generation:  400  elongt:  183.00058094871963
Generation:  500  elongt:  183.00058094871963
Generation:  600  elongt:  182.9682133905058
Generation:  700  elongt:  181.89405407246474
Generation:  800  elongt:  181.89405407246474
Generation:  900  elongt:  181.89405407246474
Generation:  1000  elongt:  181.89405407246474
Generation:  1100  elongt:  181.89405407246474
Generation:  1200  elongt:  181.89405407246474
Generation:  1300  elongt:  181.88442465158676
Generation:  1400  elongt:  181.59291600231413
Generation:  1500  elongt:  181.59291600231413
Generation:  1600  elongt:  181.46748086212898
Generation:  1700  elongt:  181.38451951397764
Generation:  1800  elongt:  181.38451951397764
Generation:  1900  elongt:  181.38

## Genetic algorithm: slower codon usage distributions (tRNA abundances fixed, WT)

In [10]:
%%time
%load_ext autoreload
%autoreload 2
from analysis_utils import *

### Codon array fixed
elongt_list = np.load('./data/WTtRNA_UniformRandomCodonUsage_100sample_elongt.npy')
pCodon_list = np.load('./data/WTtRNA_UniformRandomCodonUsage_100sample_pCodon.npy')
fullelongt_list = list()

for i in range(2000):
    fitness, pCodon_list, elongt_list = run_ga_CodonSweep(gr25_ptRNA,pCodon_list,elongt_list,ensmbl_latency_dict,min(gr25_ptRNA),max(gr25_ptRNA),objective='slow')
    fullelongt_list.append(max(elongt_list))
    if i%100 == 0:
        print('Generation: ',i, ' elongt: ', min(elongt_list))
        
np.save('./data/WTtRNA_UniformRandomCodonUsage_2ksample_slowCodonUsage_elongt',elongt_list)
np.save('./data/WTtRNA_UniformRandomCodonUsage_2ksample_slowCodonUsage_pCodon',pCodon_list)
np.save('./data/WTtRNA_UniformRandomCodonUsage_2ksample_slowCodonUsage_fullelongt',fullelongt_list)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
Generation:  0  elongt:  197.15079674970096
Generation:  100  elongt:  204.67877264653683
Generation:  200  elongt:  207.69370503330927
Generation:  300  elongt:  208.5881553822576
Generation:  400  elongt:  209.1548761281126
Generation:  500  elongt:  209.9507496043587
Generation:  600  elongt:  210.76652294650046
Generation:  700  elongt:  207.24318243566364
Generation:  800  elongt:  208.94475841032855
Generation:  900  elongt:  211.12963582348067
Generation:  1000  elongt:  210.52570746321175
Generation:  1100  elongt:  211.24092953215046
Generation:  1200  elongt:  210.48032627973268
Generation:  1300  elongt:  210.47866205263128
Generation:  1400  elongt:  209.784447529888
Generation:  1500  elongt:  208.99184589196045
Generation:  1600  elongt:  210.6692604960466
Generation:  1700  elongt:  212.53593727342954
Generation:  1800  elongt:  209.31250488275492
Generation:  1900  elongt:  210.61894

# Minimal code

In [6]:
##Compute the codon distribution and elongation latency of each gene weighted by transcript number
codon_elongt = compute_codon_elongt(gr25_ptRNA, len(gr25_pCodon), ensmbl_latency_dict)
gene_map, gene_latency = compute_gene_elongt(codon_elongt, red20=True)
pCodon_transcriptome, transcriptome_elongt = compute_transcript_distributions(gene_map,gene_latency)
pCodon_transcriptome = np.array(pCodon_transcriptome)

Number of polypeptides reported:  4434
Returned gene map of length  15934
Removed  24  non-divisible by three genes
['insAB1', 'insCD2', 'dgd', 'insCD4', 'insCD6', 'alaB', 'prfB', 'supK', 'b2891', 'ECK2886', 'insCD3', 'insEF4', 'insAB2', 'insEF5', 'insCD1', 'insAB5', 'insEF1', 'insCD5', 'insEF3', 'insAB3', 'insJK', 'insAB4', 'insEF2', 'insAB6']
Unique transcripts without a Gene identifier:  272
Sequences not available for # genes in Ecocyc DB:  153
Inconsistency in codons:  0
3767
Missing genes in transcriptome_dict in compute_transcript_distributions:  133


## Uniform Random tRNA, minimal code, 100 samples


In [7]:
%%time
elongt_list = list()
ptRNA_list = list()
inputs = list()

for i in range(100):
    ptRNA_uniform = np.random.uniform(min(gr25_ptRNA),max(gr25_ptRNA),len(gr25_ptRNA))
    ptRNA_list.append(ptRNA_uniform)
    inputs.append([ptRNA_uniform,pCodon_transcriptome,ensmbl_latency_dict])
elongt_list = Parallel(n_jobs=n_cores,backend='loky')(delayed(computeElongationLatency_multithread)(i) for i in inputs)
elongt_list = [elongt[0][0] for elongt in elongt_list]
np.save('./data/UniformRandomtRNA_minimalcode_100sample_elongt',elongt_list)
np.save('./data/UniformRandomtRNA_minimalcode_100sample_ptRNA',ptRNA_list)

CPU times: user 12 s, sys: 52.5 ms, total: 12.1 s
Wall time: 18 s


## Genetic algorithm: faster tRNA distribution with full tRNA, minimal code

In [8]:
%%time
%load_ext autoreload
%autoreload 2
from analysis_utils import *

### Codon array fixed
elongt_list = np.load('./data/UniformRandomtRNA_minimalcode_100sample_elongt.npy')
ptRNA_list = np.load('./data/UniformRandomtRNA_minimalcode_100sample_ptRNA.npy')
fullelongt_list = list()

for i in range(2000):
    fitness, ptRNA_list, elongt_list = run_ga_tRNA(ptRNA_list,pCodon_transcriptome,elongt_list,ensmbl_latency_dict,min(gr25_ptRNA),max(gr25_ptRNA),objective='fast')
    fullelongt_list.append(min(elongt_list))
    if i%100 == 0:
        print('Generation: ',i, ' elongt: ', min(elongt_list))
        
np.save('./data/UniformRandomtRNA_minimalcode_2ksample_fasttRNA_elongt',elongt_list)
np.save('./data/UniformRandomtRNA_minimalcode_2ksample_fasttRNA_ptRNA',ptRNA_list)
np.save('./data/UniformRandomtRNA_minimalcode_2ksample_fasttRNA_fullelongt',fullelongt_list)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
Generation:  0  elongt:  206.14060944009728
Generation:  100  elongt:  184.47065249040617
Generation:  200  elongt:  172.76141694247272
Generation:  300  elongt:  171.97699467004063
Generation:  400  elongt:  170.77511429725126
Generation:  500  elongt:  167.5584628728589
Generation:  600  elongt:  166.85355551029303
Generation:  700  elongt:  166.85355551029303
Generation:  800  elongt:  166.60856577493433
Generation:  900  elongt:  166.60856577493433
Generation:  1000  elongt:  166.60856577493433
Generation:  1100  elongt:  166.60856577493433
Generation:  1200  elongt:  164.92418059150185
Generation:  1300  elongt:  164.54101061324639
Generation:  1400  elongt:  164.49475712293437
Generation:  1500  elongt:  164.49475712293437
Generation:  1600  elongt:  164.49475712293437
Generation:  1700  elongt:  164.49475712293437
Generation:  1800  elongt:  164.49475712293437
Generation:  1900  elongt:  164.

## Genetic algorithm: slower tRNA distribution with full tRNA, minimal code

In [9]:
%%time
%load_ext autoreload
%autoreload 2
from analysis_utils import *

### Codon array fixed
elongt_list = np.load('./data/UniformRandomtRNA_minimalcode_100sample_elongt.npy')
ptRNA_list = np.load('./data/UniformRandomtRNA_minimalcode_100sample_ptRNA.npy')
fullelongt_list = list()

for i in range(2000):
    fitness, ptRNA_list, elongt_list = run_ga_tRNA(ptRNA_list,pCodon_transcriptome,elongt_list,ensmbl_latency_dict,min(gr25_ptRNA),max(gr25_ptRNA),objective='slow')
    fullelongt_list.append(max(elongt_list))
    if i%100 == 0:
        print('Generation: ',i,'elongt: ', max(elongt_list))
np.save('./data/UniformRandomtRNA_minimalcode_2ksample_slowtRNA_elongt',elongt_list)
np.save('./data/UniformRandomtRNA_minimalcode_2ksample_slowtRNA_ptRNA',ptRNA_list)
np.save('./data/UniformRandomtRNA_minimalcode_2ksample_slowtRNA_fullelongt',fullelongt_list)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
Generation:  0 elongt:  229.79502262616816
Generation:  100 elongt:  243.30179107950988
Generation:  200 elongt:  248.55241118882523
Generation:  300 elongt:  249.73461591316013
Generation:  400 elongt:  250.8934690614592
Generation:  500 elongt:  251.18101657269682
Generation:  600 elongt:  251.7319741362925
Generation:  700 elongt:  251.83228975842928
Generation:  800 elongt:  251.83228975842928
Generation:  900 elongt:  251.87044065674507
Generation:  1000 elongt:  251.9466119058292
Generation:  1100 elongt:  252.14476281671844
Generation:  1200 elongt:  252.14476281671844
Generation:  1300 elongt:  252.14476281671844
Generation:  1400 elongt:  252.14476281671844
Generation:  1500 elongt:  252.23480612657724
Generation:  1600 elongt:  252.24424399888665
Generation:  1700 elongt:  252.25445399941663
Generation:  1800 elongt:  252.25445399941663
Generation:  1900 elongt:  252.25445399941663
CPU tim

## minimal uniform random tRNA, minimal code, 100 samples

In [10]:
%%time

##Compute the codon distribution and elongation latency of each gene weighted by transcript number
codon_elongt = compute_codon_elongt(gr25_ptRNA, len(gr25_pCodon), ensmbl_latency_dict)
gene_map, gene_latency = compute_gene_elongt(codon_elongt, red20=True)
pCodon_transcriptome, transcriptome_elongt = compute_transcript_distributions(gene_map,gene_latency)
pCodon_transcriptome = np.array(pCodon_transcriptome)

ptRNA_red20 = pd.read_excel('./data/tables/tRNAValues_RED20.xlsx',header=None)
ptRNA_red20 = ptRNA_red20[6]/sum(ptRNA_red20[6])
tRNA_indices = np.argwhere(ptRNA_red20 == 0)
tRNA_indices = [indices[0] for indices in tRNA_indices]
elongt_list = list()
ptRNA_list = list()
inputs = list()

for i in range(100):
    ptRNA_uniform = np.random.uniform(min(ptRNA_red20),max(ptRNA_red20),len(ptRNA_red20))
    ptRNA_uniform[tRNA_indices] = 0
    ptRNA_list.append(ptRNA_uniform)
    inputs.append([ptRNA_uniform,pCodon_transcriptome,ensmbl_latency_dict])
elongt_list = Parallel(n_jobs=n_cores,backend='loky')(delayed(computeElongationLatency_multithread)(i) for i in inputs)
elongt_list = [elongt[0][0] for elongt in elongt_list]
np.save('./data/minimaltRNA_minimalcode_100sample_elongt',elongt_list)
np.save('./data/minimaltRNA_minimalcode_100sample_ptRNA',ptRNA_list)

Number of polypeptides reported:  4434
Returned gene map of length  15934
Removed  24  non-divisible by three genes
['insAB1', 'insCD2', 'dgd', 'insCD4', 'insCD6', 'alaB', 'prfB', 'supK', 'b2891', 'ECK2886', 'insCD3', 'insEF4', 'insAB2', 'insEF5', 'insCD1', 'insAB5', 'insEF1', 'insCD5', 'insEF3', 'insAB3', 'insJK', 'insAB4', 'insEF2', 'insAB6']
Unique transcripts without a Gene identifier:  272
Sequences not available for # genes in Ecocyc DB:  153
Inconsistency in codons:  0
3767
Missing genes in transcriptome_dict in compute_transcript_distributions:  133


  return bound(*args, **kwds)


CPU times: user 2min 26s, sys: 604 ms, total: 2min 26s
Wall time: 2min 32s


## Genetic algorithm: faster tRNA distribution with minimal tRNA, minimal code

In [11]:
%%time
%load_ext autoreload
%autoreload 2
from analysis_utils import *

### Codon array fixed
elongt_list = np.load('./data/minimaltRNA_minimalcode_100sample_elongt.npy')
ptRNA_list = np.load('./data/minimaltRNA_minimalcode_100sample_ptRNA.npy')
fullelongt_list = list()

for i in range(2000):
    fitness, ptRNA_list, elongt_list = run_ga_tRNA(ptRNA_list,pCodon_transcriptome,elongt_list,ensmbl_latency_dict,min(gr25_ptRNA),max(gr25_ptRNA),objective='fast',ptRNA_red20=ptRNA_red20)
    fullelongt_list.append(min(elongt_list))
    if i%100 == 0:
        print('Generation: ',i, ' elongt: ', min(elongt_list))
        print(ptRNA_list[0])
        
np.save('./data/minimaltRNA_minimalcode_2ksample_fasttRNA_elongt',elongt_list)
np.save('./data/minimaltRNA_minimalcode_2ksample_fasttRNA_ptRNA',ptRNA_list)
np.save('./data/minimaltRNA_minimalcode_2ksample_fasttRNA_fullelongt',fullelongt_list)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
Generation:  0  elongt:  168.09758757389093
[0.094584   0.         0.07732972 0.         0.         0.
 0.09288514 0.00731383 0.05229929 0.         0.01686421 0.12535952
 0.01398833 0.         0.05215043 0.10667536 0.         0.
 0.05866316 0.         0.         0.11496429 0.05205781 0.00121284
 0.06831448 0.         0.         0.         0.08328057 0.
 0.         0.         0.09280249 0.         0.         0.
 0.11712083 0.12272277 0.         0.09683255]
Generation:  100  elongt:  160.82498902171335
[0.10796268 0.         0.00693931 0.         0.         0.
 0.04325375 0.06313258 0.00354878 0.         0.01149503 0.11188015
 0.05979841 0.         0.01027266 0.05210967 0.         0.
 0.07443973 0.         0.         0.03855808 0.00604482 0.03897659
 0.01473463 0.         0.         0.         0.06169955 0.
 0.         0.         0.09640443 0.         0.         0.
 0.03460651 0.10150281 0.         0.

Generation:  1800  elongt:  153.5230994793707
[0.10498044 0.         0.05496889 0.         0.         0.
 0.05587383 0.07986485 0.00395272 0.         0.07114567 0.08154833
 0.07416841 0.         0.00205166 0.0588068  0.         0.
 0.09151393 0.         0.         0.05873882 0.00188754 0.00193548
 0.01393903 0.         0.         0.         0.05123332 0.
 0.         0.         0.08169776 0.         0.         0.
 0.00490251 0.01682306 0.         0.08996695]
Generation:  1900  elongt:  153.5230994793707
[0.10498044 0.         0.05496889 0.         0.         0.
 0.05587383 0.07986485 0.00395272 0.         0.07114567 0.08154833
 0.07416841 0.         0.00205166 0.0588068  0.         0.
 0.09151393 0.         0.         0.05873882 0.00188754 0.00193548
 0.01393903 0.         0.         0.         0.05123332 0.
 0.         0.         0.08169776 0.         0.         0.
 0.00490251 0.01682306 0.         0.08996695]
CPU times: user 23min 20s, sys: 4.05 s, total: 23min 24s
Wall time: 1h 17min

## Genetic algorithm: slower tRNA distribution with minimal tRNA, minimal code

In [12]:
%%time
%load_ext autoreload
%autoreload 2
from analysis_utils import *

##Compute the codon distribution and elongation latency of each gene weighted by transcript number
codon_elongt = compute_codon_elongt(gr25_ptRNA, len(gr25_pCodon), ensmbl_latency_dict)
gene_map, gene_latency = compute_gene_elongt(codon_elongt, red20=True)
pCodon_transcriptome, transcriptome_elongt = compute_transcript_distributions(gene_map,gene_latency)
pCodon_transcriptome = np.array(pCodon_transcriptome)

ptRNA_red20 = pd.read_excel('./data/tables/tRNAValues_RED20.xlsx',header=None)
ptRNA_red20 = ptRNA_red20[6]/sum(ptRNA_red20[6])
tRNA_indices = np.argwhere(ptRNA_red20 == 0)
tRNA_indices = [indices[0] for indices in tRNA_indices]
elongt_list = list()
ptRNA_list = list()
inputs = list()

### Codon array fixed
elongt_list = np.load('./data/minimaltRNA_minimalcode_100sample_elongt.npy')
ptRNA_list = np.load('./data/minimaltRNA_minimalcode_100sample_ptRNA.npy')
fullelongt_list = list()

for i in range(2000):
    fitness, ptRNA_list, elongt_list = run_ga_tRNA(ptRNA_list,pCodon_transcriptome,elongt_list,ensmbl_latency_dict,min(gr25_ptRNA),max(gr25_ptRNA),objective='slow',ptRNA_red20=ptRNA_red20)
    fullelongt_list.append(max(elongt_list))
    if i%100 == 0:
        print('Generation: ',i,'elongt: ', max(elongt_list))
        print(ptRNA_list[0])

np.save('./data/minimaltRNA_minimalcode_2ksample_slowtRNA_elongt',elongt_list)
np.save('./data/minimaltRNA_minimalcode_2ksample_slowtRNA_ptRNA',ptRNA_list)
np.save('./data/minimaltRNA_minimalcode_2ksample_slowtRNA_fullelongt',fullelongt_list)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
Number of polypeptides reported:  4434
Returned gene map of length  15934
Removed  24  non-divisible by three genes
['insAB1', 'insCD2', 'dgd', 'insCD4', 'insCD6', 'alaB', 'prfB', 'supK', 'b2891', 'ECK2886', 'insCD3', 'insEF4', 'insAB2', 'insEF5', 'insCD1', 'insAB5', 'insEF1', 'insCD5', 'insEF3', 'insAB3', 'insJK', 'insAB4', 'insEF2', 'insAB6']
Unique transcripts without a Gene identifier:  272
Sequences not available for # genes in Ecocyc DB:  153
Inconsistency in codons:  0
3767
Missing genes in transcriptome_dict in compute_transcript_distributions:  133
Generation:  0 elongt:  200.61332867794016
[0.094584   0.         0.07732972 0.         0.         0.
 0.09288514 0.00731383 0.05229929 0.         0.01686421 0.12535952
 0.01398833 0.         0.05215043 0.10667536 0.         0.
 0.05866316 0.         0.         0.11496429 0.05205781 0.00121284
 0.06831448 0.         0.         0.         0.083280

Generation:  1700 elongt:  248.31151776400637
[0.00367172 0.         0.00457251 0.         0.         0.
 0.00613112 0.07271548 0.02517086 0.         0.00552931 0.01808634
 0.00324466 0.         0.00433166 0.00464067 0.         0.
 0.00393794 0.         0.         0.00218645 0.00962714 0.00402439
 0.32596821 0.         0.         0.         0.01036445 0.
 0.         0.         0.00769806 0.         0.         0.
 0.48282955 0.00135353 0.         0.00391595]
Generation:  1800 elongt:  248.31151776400637
[0.0038398  0.         0.00869002 0.         0.         0.
 0.00667999 0.00490433 0.04543514 0.         0.00419257 0.00282413
 0.00334625 0.         0.00301447 0.00844897 0.         0.
 0.0047562  0.         0.         0.00202172 0.01008231 0.00438467
 0.3250015  0.         0.         0.         0.00310664 0.
 0.         0.         0.00805045 0.         0.         0.
 0.5469691  0.0014747  0.         0.00277705]
Generation:  1900 elongt:  248.32361153769364
[0.0038398  0.         0.00869

## minimal uniform random tRNA, minimal code GFP, 100 samples

In [7]:
%%time

gene_map, pCodon_GFP = singlegene_to_genemap('/Users/akshay/Documents/tRNAShuffle/data/tables/sfGFP_RED20.csv')

ptRNA_red20 = pd.read_excel('./data/tables/tRNAValues_RED20.xlsx',header=None)
ptRNA_red20 = ptRNA_red20[6]/sum(ptRNA_red20[6])
tRNA_indices = np.argwhere(ptRNA_red20 == 0)
tRNA_indices = [indices[0] for indices in tRNA_indices]
elongt_list = list()
ptRNA_list = list()
inputs = list()

for i in range(100):
    ptRNA_uniform = np.random.uniform(min(ptRNA_red20),max(ptRNA_red20),len(ptRNA_red20))
    ptRNA_uniform[tRNA_indices] = 0
    ptRNA_list.append(ptRNA_uniform)
    inputs.append([ptRNA_uniform,pCodon_GFP,ensmbl_latency_dict])
elongt_list = Parallel(n_jobs=n_cores,backend='loky')(delayed(computeElongationLatency_multithread)(i) for i in inputs)
elongt_list = [elongt[0][0] for elongt in elongt_list]
np.save('./data/minimaltRNA_minimalcode-sfGFP_100sample_elongt',elongt_list)
np.save('./data/minimaltRNA_minimalcode-sfGFP_100sample_ptRNA',ptRNA_list)
print(np.average(elongt_list))

183.09023091756296
CPU times: user 15.4 s, sys: 71.5 ms, total: 15.5 s
Wall time: 23.4 s


## Genetic algorithm: faster tRNA distribution with minimal tRNA, GFP

In [None]:
%%time
%load_ext autoreload
%autoreload 2
from analysis_utils import *

### Codon array fixed
elongt_list = np.load('./data/minimaltRNA_minimalcode-sfGFP_100sample_elongt.npy')
ptRNA_list = np.load('./data/minimaltRNA_minimalcode-sfGFP_100sample_ptRNA.npy')
fullelongt_list = list()

for i in range(2000):
    fitness, ptRNA_list, elongt_list = run_ga_tRNA(ptRNA_list,pCodon_GFP,elongt_list,ensmbl_latency_dict,min(gr25_ptRNA),max(gr25_ptRNA),objective='fast',ptRNA_red20=ptRNA_red20)
    fullelongt_list.append(min(elongt_list))
    if i%100 == 0:
        print('Generation: ',i, ' elongt: ', min(elongt_list))
        
np.save('./data/UniformRandomtRNA_minimalcode-sfGFP_2ksample_fasttRNA_elongt',elongt_list)
np.save('./data/UniformRandomtRNA_minimalcode-sfGFP_2ksample_fasttRNA_ptRNA',ptRNA_list)
np.save('./data/UniformRandomtRNA_minimalcode-sfGFP_2ksample_fasttRNA_fullelongt',fullelongt_list)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
Generation:  0  elongt:  168.89434001563058
Generation:  100  elongt:  155.55834683196917
Generation:  200  elongt:  153.80336817660336
Generation:  300  elongt:  153.80336817660336
Generation:  400  elongt:  153.80336817660336
Generation:  500  elongt:  153.80336817660336
Generation:  600  elongt:  153.80336817660336
Generation:  700  elongt:  153.80336817660336
Generation:  800  elongt:  153.80336817660336


## Genetic algorithm: slower tRNA distribution with minimal tRNA, GFP

In [None]:
%%time
%load_ext autoreload
%autoreload 2
from analysis_utils import *

### Codon array fixed
elongt_list = np.load('./data/minimaltRNA_minimalcode-sfGFP_100sample_elongt.npy')
ptRNA_list = np.load('./data/minimaltRNA_minimalcode-sfGFP_100sample_ptRNA.npy')
fullelongt_list = list()

for i in range(2000):
    fitness, ptRNA_list, elongt_list = run_ga_tRNA(ptRNA_list,pCodon_GFP,elongt_list,ensmbl_latency_dict,min(gr25_ptRNA),max(gr25_ptRNA),objective='slow',ptRNA_red20=ptRNA_red20)
    fullelongt_list.append(min(elongt_list))
    if i%100 == 0:
        print('Generation: ',i, ' elongt: ', min(elongt_list))
        
np.save('./data/UniformRandomtRNA_minimalcode-sfGFP_2ksample_slowtRNA_elongt',elongt_list)
np.save('./data/UniformRandomtRNA_minimalcode-sfGFP_2ksample_slowtRNA_ptRNA',ptRNA_list)
np.save('./data/UniformRandomtRNA_minimalcode-sfGFP_2ksample_slowtRNA_fullelongt',fullelongt_list)

{'Ala1B': 0.006472521287859333,
 'Ala2': 0.0,
 'Arg2': 0.0037291502135492842,
 'Arg3': 0.0,
 'Arg4': 0.0,
 'Arg5': 0.005624615455773357,
 'Asn': 0.0853044814112671,
 'Asp1': 0.09120325714899878,
 'Cys': 0.0018702566666913703,
 'Gln1': 0.0,
 'Gln2': 0.004066273847753027,
 'Glu2': 0.06286686794810334,
 'Gly2': 0.06924314733200086,
 'Gly3': 0.007412299016853992,
 'His': 0.07808569242973361,
 'Ile1': 0.053825484869078184,
 'Leu1': 0.0,
 'Leu2': 0.0,
 'Leu3': 0.08769918827297907,
 'Leu4': 0.0,
 'Leu5': 0.0,
 'Lys': 0.06337225769201801,
 'Met m': 0.005351244819313088,
 'Phe': 0.07114945695652974,
 'Pro1': 0.026632428020890244,
 'Pro2': 0.0,
 'Pro3': 0.019129080209759307,
 'Sec': 0.0,
 'Ser1': 0.06089590866570733,
 'Ser2': 0.0,
 'Ser3': 0.0,
 'Ser5': 0.003299176174976649,
 'Thr1': 0.08381153824308947,
 'Thr2': 0.0,
 'Thr3': 0.0,
 'Thr4': 0.0,
 'Trp': 0.013025787161088345,
 'Tyr1+Tyr2': 0.017496095674801312,
 'Val1': 0.0,
 'Val2A+2B': 0.07843379048118512}