# Load translation voxel simulation data

In [1]:
%%time
%load_ext autoreload

from analysis_utils import *
from joblib import Parallel, delayed
import multiprocessing  
import numpy as np
import pandas as pd

#Growth rate = 3.0
#Computes the translation latencies from our model with simulations containing
#1 to 14 cognate ternary complexes in the voxel (14 to 42 not included because
#the probability of voxels with that number of cognates is ~0)
path = "/Users/Akshay/Documents/tRNAShuffle/data/"
ensmbl_latency_dict = dict()
rxndiff=dict()
scalingFactor = 200
NR_scaledrates = {'k1r':718*200/scalingFactor,'k2f':1475*200/scalingFactor,'k2r_nr':1120*200/scalingFactor,'k3_nr':6*200/scalingFactor,'k4':209*200/scalingFactor}


data = "191003_2210/"
for i in range(1,7):
    sim_i = computeTransportRxnTimes(path+data,100*(i-1),100*(i-1)+100,cogtRNANum=i, ribosomeNum = 7,scaling=scalingFactor,NR_scaling = NR_scaledrates)
    ensmbl_latency_dict[i] = (CellLatencies(sim_i))

data = "191105_0908/"
for i in range(7,14):
    sim_i = computeTransportRxnTimes(path+data, 100*(i-1-6),100*(i-1-6)+100,cogtRNANum=i, ribosomeNum = 7,scaling=scalingFactor,NR_scaling = NR_scaledrates)
    ensmbl_latency_dict[i] = (CellLatencies(sim_i))
    
#Experimentally obtained codon usage frequencies and tRNA abundances at growth rate = 2.5 dbl/hr
gr25_Codon = np.array([2.36, 1.26, 45.55, 34.17, 16.97, 57.86, 19.27, 33.74, 14.98, 22.31, 43.18, 7.67, 24.11, 24.87, 39.49, 11.81, 0.03, 0.63, 2.19, 9.31, 17.22, 55.01, 5.61, 29.21, 21.67, 0.52, 15.79, 43.86, 4.17, 2.61, 20.64, 26.7, 7.03, 0.19, 2.76, 3.81, 6.72, 16.52, 4.27, 2.73, 7.92, 23.25, 2.51, 1.98, 16.33, 11.68, 0.62, 0.67, 43.82, 20.59, 27.28, 7.01, 6.78, 14.21, 60.75, 0.82, 3.86, 4.09, 28.82, 5.18, 4.38, 1.09])
gr25_pCodon = gr25_Codon/np.sum(gr25_Codon)
gr25_ptRNA = [0.0602409638554217, 0.010542168674698798, 0.07379518072289158, 0.00602409638554217, 0.010542168674698798, 0.00602409638554217, 0.021084337349397596, 0.043674698795180725, 0.021084337349397596, 0.01204819277108434, 0.01807228915662651, 0.08433734939759038, 0.03162650602409639, 0.07228915662650603, 0.01204819277108434, 0.07078313253012049, 0.06325301204819278, 0.01656626506024097, 0.009036144578313254, 0.027108433734939763, 0.010542168674698798, 0.03012048192771085, 0.013554216867469882, 0.015060240963855425, 0.0075301204819277125, 0.010542168674698798, 0.0075301204819277125, 0.003012048192771085, 0.021084337349397596, 0.004518072289156627, 0.01656626506024097, 0.01204819277108434, 0.0015060240963855425, 0.009036144578313254, 0.01656626506024097, 0.019578313253012052, 0.015060240963855425, 0.027108434, 0.05873493975903616, 0.019578313]
gr25_ptRNA = gr25_ptRNA/np.sum(gr25_ptRNA)

n_cores = 16

##Compute the codon distribution and elongation latency of each gene weighted by transcript number
codon_elongt = compute_codon_elongt(gr25_ptRNA, len(gr25_pCodon), ensmbl_latency_dict)
gene_map, gene_latency = compute_gene_elongt(codon_elongt)
pCodon_transcriptome, transcriptome_elongt = compute_transcript_distributions(gene_map,gene_latency)
pCodon_transcriptome = np.array(pCodon_transcriptome)

Computing...
Computing...
Computing...
Computing...
Computing...
Computing...
Computing...
Computing...
Computing...
Computing...
Computing...
Computing...
Computing...


KeyboardInterrupt: 

## Uniform Random tRNA, WT Codon usage, 100 samples

In [3]:
%%time
elongt_list = list()
ptRNA_list = list()
inputs = list()

for i in range(100):
    ptRNA_uniform = np.random.uniform(min(gr25_ptRNA),max(gr25_ptRNA),len(gr25_ptRNA))
    ptRNA_list.append(ptRNA_uniform)
    inputs.append([ptRNA_uniform,pCodon_transcriptome,ensmbl_latency_dict])
elongt_list = Parallel(n_jobs=n_cores,backend='loky')(delayed(computeElongationLatency_multithread)(i) for i in inputs)
elongt_list = [elongt[0][0] for elongt in elongt_list]
np.save('./data/UniformRandomtRNA_WTCodonUsage_100sample_elongt',elongt_list)
np.save('./data/UniformRandomtRNA_WTCodonUsage_100sample_ptRNA',ptRNA_list)


CPU times: user 12.6 s, sys: 307 ms, total: 12.9 s
Wall time: 22.3 s


## Uniform Random tRNA, WT Codon usage, 10k samples

In [4]:
%%time
elongt_list = list()
ptRNA_list = list()
inputs = list()

for i in range(10000):
    ptRNA_uniform = np.random.uniform(min(gr25_ptRNA),max(gr25_ptRNA),len(gr25_ptRNA))
    ptRNA_list.append(ptRNA_uniform)
    inputs.append([ptRNA_uniform,pCodon_transcriptome,ensmbl_latency_dict])
elongt_list = Parallel(n_jobs=n_cores,backend='loky')(delayed(computeElongationLatency_multithread)(i) for i in inputs)
elongt_list = [elongt[0][0] for elongt in elongt_list]
np.save('./data/UniformRandomtRNA_WTCodonUsage_10ksample_elongt',elongt_list)
np.save('./data/UniformRandomtRNA_WTCodonUsage_10ksample_ptRNA',ptRNA_list)


CPU times: user 31min 15s, sys: 10 s, total: 31min 25s
Wall time: 45min 13s


## WT tRNA, Uniform Random Codon usage, 100 samples

In [5]:
%%time
elongt_list = list()
pCodon_list = list()
inputs = list()

syn_codon_list = [[0,1,2,3],[4,5],[6,7],[8,9,10,11],[12,13,14,15],[16,17,46,47,48,49],[18,19,42,43,44,45],
              [20,21],[22,23],[24],[25,26,27],[28,29,30,31],[32],[33],[34,35],[36,37],[38,39,54,55,56,57],[40,41],
              [50,51],[52,53],[58,59,60,61]]
        
for i in range(100):
    pCodon_uniform = np.zeros(len(gr25_pCodon))
    for syn_codons in syn_codon_list:
        pCodon_uniform[syn_codons] = np.random.uniform(min(gr25_Codon),max(gr25_Codon),len(syn_codons))
        pCodon_uniform[syn_codons] = sum(pCodon_transcriptome[syn_codons])/sum(pCodon_uniform[syn_codons])*pCodon_uniform[syn_codons]
    pCodon_list.append(pCodon_uniform)
    inputs.append([gr25_ptRNA, pCodon_uniform,ensmbl_latency_dict])

elongt_list = Parallel(n_jobs=n_cores,backend='loky')(delayed(computeElongationLatency_multithread)(i) for i in inputs)
elongt_list = [elongt[0][0] for elongt in elongt_list]

np.save('./data/WTtRNA_UniformRandomCodonUsage_100sample_elongt',elongt_list)
np.save('./data/WTtRNA_UniformRandomCodonUsage_100sample_pCodon',pCodon_list)


CPU times: user 17.6 s, sys: 106 ms, total: 17.7 s
Wall time: 27.5 s


## WT tRNA, Uniform Random Codon usage, 10k samples

In [6]:
%%time
elongt_list = list()
pCodon_list = list()
inputs = list()

syn_codon_list = [[0,1,2,3],[4,5],[6,7],[8,9,10,11],[12,13,14,15],[16,17,46,47,48,49],[18,19,42,43,44,45],
              [20,21],[22,23],[24],[25,26,27],[28,29,30,31],[32],[33],[34,35],[36,37],[38,39,54,55,56,57],[40,41],
              [50,51],[52,53],[58,59,60,61]]
        
for i in range(10000):
    pCodon_uniform = np.zeros(len(pCodon_transcriptome))
    for syn_codons in syn_codon_list:
        pCodon_uniform[syn_codons] = np.random.uniform(min(gr25_Codon),max(gr25_Codon),len(syn_codons))
        pCodon_uniform[syn_codons] = sum(pCodon_transcriptome[syn_codons])/sum(pCodon_uniform[syn_codons])*pCodon_uniform[syn_codons]
    pCodon_list.append(pCodon_uniform)
    inputs.append([gr25_ptRNA, pCodon_uniform,ensmbl_latency_dict])

elongt_list = Parallel(n_jobs=n_cores,backend='loky')(delayed(computeElongationLatency_multithread)(i) for i in inputs)
elongt_list = [elongt[0][0] for elongt in elongt_list]

np.save('./data/WTtRNA_UniformRandomCodonUsage_10ksample_elongt',elongt_list)
np.save('./data/WTtRNA_UniformRandomCodonUsage_10ksample_pCodon',pCodon_list)


CPU times: user 30min 22s, sys: 9.05 s, total: 30min 31s
Wall time: 43min 41s


## Genetic algorithm: faster tRNA distributions (Codon usage fixed, WT)

In [7]:
%%time
%load_ext autoreload
%autoreload 2
from analysis_utils import *

### Codon array fixed
elongt_list = np.load('./data/UniformRandomtRNA_WTCodonUsage_100sample_elongt.npy')
ptRNA_list = np.load('./data/UniformRandomtRNA_WTCodonUsage_100sample_ptRNA.npy')
fullelongt_list = list()

for i in range(2000):
    fitness, ptRNA_list, elongt_list = run_ga_tRNA(ptRNA_list,pCodon_transcriptome,elongt_list,ensmbl_latency_dict,min(gr25_ptRNA),max(gr25_ptRNA),objective='fast')
    fullelongt_list.append(min(elongt_list))
    if i%100 == 0:
        print('Generation: ',i, ' elongt: ', min(elongt_list))
        
np.save('./data/UniformRandomtRNA_WTCodonUsage_2ksample_fasttRNA_elongt',elongt_list)
np.save('./data/UniformRandomtRNA_WTCodonUsage_2ksample_fasttRNA_ptRNA',ptRNA_list)
np.save('./data/UniformRandomtRNA_WTCodonUsage_2ksample_fasttRNA_fullelongt',fullelongt_list)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
Generation:  0  elongt:  205.1959620516039
Generation:  100  elongt:  188.65778009419725
Generation:  200  elongt:  183.1594175889988
Generation:  300  elongt:  180.61297703966687
Generation:  400  elongt:  179.4509158978308
Generation:  500  elongt:  179.14011847735455
Generation:  600  elongt:  177.29935253457626
Generation:  700  elongt:  177.08762769139537
Generation:  800  elongt:  177.08762769139537
Generation:  900  elongt:  176.84620471968387
Generation:  1000  elongt:  176.115415187664
Generation:  1100  elongt:  176.115415187664
Generation:  1200  elongt:  175.49822104624192
Generation:  1300  elongt:  175.296467635904
Generation:  1400  elongt:  175.296467635904
Generation:  1500  elongt:  175.296467635904
Generation:  1600  elongt:  175.296467635904
Generation:  1700  elongt:  175.06477871869257
Generation:  1800  elongt:  174.67737941842122
Generation:  1900  elongt:  174.5442590501617


## Genetic algorithm: slower tRNA distributions (Codon usage fixed, WT)

In [8]:
%%time
%load_ext autoreload
%autoreload 2
from analysis_utils import *

### Codon array fixed
elongt_list = np.load('./data/UniformRandomtRNA_WTCodonUsage_100sample_elongt.npy')
ptRNA_list = np.load('./data/UniformRandomtRNA_WTCodonUsage_100sample_ptRNA.npy')
fullelongt_list = list()

for i in range(2000):
    fitness, ptRNA_list, elongt_list = run_ga_tRNA(ptRNA_list,pCodon_transcriptome,elongt_list,ensmbl_latency_dict,min(gr25_ptRNA),max(gr25_ptRNA),objective='slow')
    fullelongt_list.append(max(elongt_list))
    if i%100 == 0:
        print('Generation: ',i)
np.save('./data/UniformRandomtRNA_WTCodonUsage_2ksample_slowtRNA_elongt',elongt_list)
np.save('./data/UniformRandomtRNA_WTCodonUsage_2ksample_slowtRNA_ptRNA',ptRNA_list)
np.save('./data/UniformRandomtRNA_WTCodonUsage_2ksample_slowtRNA_fullelongt',fullelongt_list)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
Generation:  0
Generation:  100
Generation:  200
Generation:  300
Generation:  400
Generation:  500
Generation:  600
Generation:  700
Generation:  800
Generation:  900
Generation:  1000
Generation:  1100
Generation:  1200
Generation:  1300
Generation:  1400
Generation:  1500
Generation:  1600
Generation:  1700
Generation:  1800
Generation:  1900
CPU times: user 29min 50s, sys: 5.22 s, total: 29min 55s
Wall time: 1h 42min 9s


## Genetic algorithm: faster codon usage distributions (tRNA abundances fixed, WT)

In [9]:
%%time
%load_ext autoreload
%autoreload 2
from analysis_utils import *

### Codon array fixed
elongt_list = np.load('./data/WTtRNA_UniformRandomCodonUsage_100sample_elongt.npy')
pCodon_list = np.load('./data/WTtRNA_UniformRandomCodonUsage_100sample_pCodon.npy')
fullelongt_list = list()

for i in range(2000):
    fitness, pCodon_list, elongt_list = run_ga_CodonSweep(gr25_ptRNA,pCodon_list,elongt_list,ensmbl_latency_dict,min(gr25_ptRNA),max(gr25_ptRNA),objective='fast')
    fullelongt_list.append(min(elongt_list))
    if i%100 == 0:
        print('Generation: ',i, ' elongt: ', min(elongt_list))
        
np.save('./data/WTtRNA_UniformRandomCodonUsage_2ksample_fastCodonUsage_elongt',elongt_list)
np.save('./data/WTtRNA_UniformRandomCodonUsage_2ksample_fastCodonUsage_pCodon',pCodon_list)
np.save('./data/WTtRNA_UniformRandomCodonUsage_2ksample_fastCodonUsage_fullelongt',fullelongt_list)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
Generation:  0  elongt:  191.20403884990284
Generation:  100  elongt:  183.8317274791367
Generation:  200  elongt:  183.8317274791367
Generation:  300  elongt:  183.79480130218053
Generation:  400  elongt:  183.00058094871963
Generation:  500  elongt:  183.00058094871963
Generation:  600  elongt:  182.9682133905058
Generation:  700  elongt:  181.89405407246474
Generation:  800  elongt:  181.89405407246474
Generation:  900  elongt:  181.89405407246474
Generation:  1000  elongt:  181.89405407246474
Generation:  1100  elongt:  181.89405407246474
Generation:  1200  elongt:  181.89405407246474
Generation:  1300  elongt:  181.88442465158676
Generation:  1400  elongt:  181.59291600231413
Generation:  1500  elongt:  181.59291600231413
Generation:  1600  elongt:  181.46748086212898
Generation:  1700  elongt:  181.38451951397764
Generation:  1800  elongt:  181.38451951397764
Generation:  1900  elongt:  181.38

## Genetic algorithm: slower codon usage distributions (tRNA abundances fixed, WT)

In [10]:
%%time
%load_ext autoreload
%autoreload 2
from analysis_utils import *

### Codon array fixed
elongt_list = np.load('./data/WTtRNA_UniformRandomCodonUsage_100sample_elongt.npy')
pCodon_list = np.load('./data/WTtRNA_UniformRandomCodonUsage_100sample_pCodon.npy')
fullelongt_list = list()

for i in range(2000):
    fitness, pCodon_list, elongt_list = run_ga_CodonSweep(gr25_ptRNA,pCodon_list,elongt_list,ensmbl_latency_dict,min(gr25_ptRNA),max(gr25_ptRNA),objective='slow')
    fullelongt_list.append(max(elongt_list))
    if i%100 == 0:
        print('Generation: ',i, ' elongt: ', min(elongt_list))
        
np.save('./data/WTtRNA_UniformRandomCodonUsage_2ksample_slowCodonUsage_elongt',elongt_list)
np.save('./data/WTtRNA_UniformRandomCodonUsage_2ksample_slowCodonUsage_pCodon',pCodon_list)
np.save('./data/WTtRNA_UniformRandomCodonUsage_2ksample_slowCodonUsage_fullelongt',fullelongt_list)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
Generation:  0  elongt:  197.15079674970096
Generation:  100  elongt:  204.67877264653683
Generation:  200  elongt:  207.69370503330927
Generation:  300  elongt:  208.5881553822576
Generation:  400  elongt:  209.1548761281126
Generation:  500  elongt:  209.9507496043587
Generation:  600  elongt:  210.76652294650046
Generation:  700  elongt:  207.24318243566364
Generation:  800  elongt:  208.94475841032855
Generation:  900  elongt:  211.12963582348067
Generation:  1000  elongt:  210.52570746321175
Generation:  1100  elongt:  211.24092953215046
Generation:  1200  elongt:  210.48032627973268
Generation:  1300  elongt:  210.47866205263128
Generation:  1400  elongt:  209.784447529888
Generation:  1500  elongt:  208.99184589196045
Generation:  1600  elongt:  210.6692604960466
Generation:  1700  elongt:  212.53593727342954
Generation:  1800  elongt:  209.31250488275492
Generation:  1900  elongt:  210.61894

# Minimal code

In [9]:
##Compute the codon distribution and elongation latency of each gene weighted by transcript number
codon_elongt = compute_codon_elongt(gr25_ptRNA, len(gr25_pCodon), ensmbl_latency_dict)
gene_map, gene_latency = compute_gene_elongt(codon_elongt, red20=True)
pCodon_transcriptome, transcriptome_elongt = compute_transcript_distributions(gene_map,gene_latency)
pCodon_transcriptome = np.array(pCodon_transcriptome)

Removed  349  RNA species
4358
Removed  81  non-divisible by three genes
Inconsistency: reduceGeneMap_fullProteinsOnly didnt find a gene,  1257
4277
Missing genes in transcriptome_dict in compute_transcript_distributions:  1064


## Uniform Random tRNA, minimal code, 100 samples


In [10]:
%%time
elongt_list = list()
ptRNA_list = list()
inputs = list()

for i in range(100):
    ptRNA_uniform = np.random.uniform(min(gr25_ptRNA),max(gr25_ptRNA),len(gr25_ptRNA))
    ptRNA_list.append(ptRNA_uniform)
    inputs.append([ptRNA_uniform,pCodon_transcriptome,ensmbl_latency_dict])
elongt_list = Parallel(n_jobs=n_cores,backend='loky')(delayed(computeElongationLatency_multithread)(i) for i in inputs)
elongt_list = [elongt[0][0] for elongt in elongt_list]
np.save('./data/UniformRandomtRNA_minimalcode_100sample_elongt',elongt_list)
np.save('./data/UniformRandomtRNA_minimalcode_100sample_ptRNA',ptRNA_list)

CPU times: user 12.7 s, sys: 43.5 ms, total: 12.7 s
Wall time: 19.3 s


## Genetic algorithm: faster tRNA distribution with full tRNA, minimal code

In [11]:
%%time
%load_ext autoreload
%autoreload 2
from analysis_utils import *

### Codon array fixed
elongt_list = np.load('./data/UniformRandomtRNA_minimalcode_100sample_elongt.npy')
ptRNA_list = np.load('./data/UniformRandomtRNA_minimalcode_100sample_ptRNA.npy')
fullelongt_list = list()

for i in range(2000):
    fitness, ptRNA_list, elongt_list = run_ga_tRNA(ptRNA_list,pCodon_transcriptome,elongt_list,ensmbl_latency_dict,min(gr25_ptRNA),max(gr25_ptRNA),objective='fast')
    fullelongt_list.append(min(elongt_list))
    if i%100 == 0:
        print('Generation: ',i, ' elongt: ', min(elongt_list))
        
np.save('./data/UniformRandomtRNA_minimalcode_2ksample_fasttRNA_elongt',elongt_list)
np.save('./data/UniformRandomtRNA_minimalcode_2ksample_fasttRNA_ptRNA',ptRNA_list)
np.save('./data/UniformRandomtRNA_minimalcode_2ksample_fasttRNA_fullelongt',fullelongt_list)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
Generation:  0  elongt:  198.60491334362854
Generation:  100  elongt:  181.73761587914353
Generation:  200  elongt:  174.87020202658613
Generation:  300  elongt:  174.86359689371983
Generation:  400  elongt:  173.88416662750083
Generation:  500  elongt:  173.01797927720386
Generation:  600  elongt:  172.15415640410401
Generation:  700  elongt:  168.9502169532995
Generation:  800  elongt:  168.9502169532995
Generation:  900  elongt:  168.9502169532995
Generation:  1000  elongt:  168.14891864061445
Generation:  1100  elongt:  168.14891864061445
Generation:  1200  elongt:  168.14891864061445
Generation:  1300  elongt:  166.2087252762881
Generation:  1400  elongt:  165.7788962749751
Generation:  1500  elongt:  165.7788962749751
Generation:  1600  elongt:  165.7788962749751
Generation:  1700  elongt:  165.7788962749751
Generation:  1800  elongt:  165.35083251049207
Generation:  1900  elongt:  165.3508325

## Genetic algorithm: slower tRNA distribution with full tRNA, minimal code

In [12]:
%%time
%load_ext autoreload
%autoreload 2
from analysis_utils import *

### Codon array fixed
elongt_list = np.load('./data/UniformRandomtRNA_minimalcode_100sample_elongt.npy')
ptRNA_list = np.load('./data/UniformRandomtRNA_minimalcode_100sample_ptRNA.npy')
fullelongt_list = list()

for i in range(2000):
    fitness, ptRNA_list, elongt_list = run_ga_tRNA(ptRNA_list,pCodon_transcriptome,elongt_list,ensmbl_latency_dict,min(gr25_ptRNA),max(gr25_ptRNA),objective='slow')
    fullelongt_list.append(max(elongt_list))
    if i%100 == 0:
        print('Generation: ',i,'elongt: ', max(elongt_list))
np.save('./data/UniformRandomtRNA_minimalcode_2ksample_slowtRNA_elongt',elongt_list)
np.save('./data/UniformRandomtRNA_minimalcode_2ksample_slowtRNA_ptRNA',ptRNA_list)
np.save('./data/UniformRandomtRNA_minimalcode_2ksample_slowtRNA_fullelongt',fullelongt_list)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
Generation:  0 elongt:  227.82867876066786
Generation:  100 elongt:  245.55721300193855
Generation:  200 elongt:  249.15370412396504
Generation:  300 elongt:  250.21448498771207
Generation:  400 elongt:  251.0195624233942
Generation:  500 elongt:  251.0195624233942
Generation:  600 elongt:  251.50373528724865
Generation:  700 elongt:  251.67445555306364
Generation:  800 elongt:  251.77727018646277
Generation:  900 elongt:  251.89758821909686
Generation:  1000 elongt:  251.89758821909686
Generation:  1100 elongt:  251.89758821909686
Generation:  1200 elongt:  251.89758821909686
Generation:  1300 elongt:  251.89758821909686
Generation:  1400 elongt:  252.12466120346508
Generation:  1500 elongt:  252.12466120346508
Generation:  1600 elongt:  252.27660312446278
Generation:  1700 elongt:  252.27660312446278
Generation:  1800 elongt:  252.29804206326122
Generation:  1900 elongt:  252.3330122570248
CPU tim

## minimal uniform random tRNA, minimal code, 100 samples

In [6]:
%%time

##Compute the codon distribution and elongation latency of each gene weighted by transcript number
codon_elongt = compute_codon_elongt(gr25_ptRNA, len(gr25_pCodon), ensmbl_latency_dict)
gene_map, gene_latency = compute_gene_elongt(codon_elongt, red20=True)
pCodon_transcriptome, transcriptome_elongt = compute_transcript_distributions(gene_map,gene_latency)
pCodon_transcriptome = np.array(pCodon_transcriptome)

ptRNA_red20 = pd.read_excel('./data/tables/tRNAValues_RED20.xlsx',header=None)
ptRNA_red20 = ptRNA_red20[6]/sum(ptRNA_red20[6])
tRNA_indices = np.argwhere(ptRNA_red20 == 0)
tRNA_indices = [indices[0] for indices in tRNA_indices]
elongt_list = list()
ptRNA_list = list()
inputs = list()

for i in range(100):
    ptRNA_uniform = np.random.uniform(min(ptRNA_red20),max(ptRNA_red20),len(ptRNA_red20))
    ptRNA_uniform[tRNA_indices] = 0
    ptRNA_list.append(ptRNA_uniform)
    inputs.append([ptRNA_uniform,pCodon_transcriptome,ensmbl_latency_dict])
elongt_list = Parallel(n_jobs=n_cores,backend='loky')(delayed(computeElongationLatency_multithread)(i) for i in inputs)
elongt_list = [elongt[0][0] for elongt in elongt_list]
np.save('./data/minimaltRNA_minimalcode_100sample_elongt',elongt_list)
np.save('./data/minimaltRNA_minimalcode_100sample_ptRNA',ptRNA_list)

Removed  349  RNA species
4358
Removed  81  non-divisible by three genes
Inconsistency: reduceGeneMap_fullProteinsOnly didnt find a gene,  1257
4277
Missing genes in transcriptome_dict in compute_transcript_distributions:  1064
CPU times: user 2min 30s, sys: 965 ms, total: 2min 31s
Wall time: 2min 46s


## Genetic algorithm: faster tRNA distribution with minimal tRNA, minimal code

In [7]:
%%time
%load_ext autoreload
%autoreload 2
from analysis_utils import *

### Codon array fixed
elongt_list = np.load('./data/minimaltRNA_minimalcode_100sample_elongt.npy')
ptRNA_list = np.load('./data/minimaltRNA_minimalcode_100sample_ptRNA.npy')
fullelongt_list = list()

for i in range(2000):
    fitness, ptRNA_list, elongt_list = run_ga_tRNA(ptRNA_list,pCodon_transcriptome,elongt_list,ensmbl_latency_dict,min(gr25_ptRNA),max(gr25_ptRNA),objective='fast',ptRNA_red20=ptRNA_red20)
    fullelongt_list.append(min(elongt_list))
    if i%100 == 0:
        print('Generation: ',i, ' elongt: ', min(elongt_list))
        print(ptRNA_list[0])
        
np.save('./data/minimaltRNA_minimalcode_2ksample_fasttRNA_elongt',elongt_list)
np.save('./data/minimaltRNA_minimalcode_2ksample_fasttRNA_ptRNA',ptRNA_list)
np.save('./data/minimaltRNA_minimalcode_2ksample_fasttRNA_fullelongt',fullelongt_list)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
Generation:  0  elongt:  168.14160024066234
[0.09914532 0.         0.06802323 0.         0.         0.
 0.02213267 0.08575104 0.04725787 0.         0.05637249 0.08116213
 0.07669085 0.         0.06521133 0.09861972 0.         0.
 0.01955819 0.         0.         0.03283825 0.02492907 0.0223025
 0.00247281 0.         0.         0.         0.1051619  0.
 0.         0.         0.01570526 0.         0.         0.
 0.01406488 0.06219248 0.         0.000408  ]
Generation:  100  elongt:  157.78472484389374
[0.09841162 0.         0.0822379  0.         0.         0.
 0.04476057 0.073834   0.00821897 0.         0.07312617 0.06120977
 0.08718799 0.         0.00072701 0.03100832 0.         0.
 0.0784168  0.         0.         0.01689105 0.06481828 0.05645332
 0.00930612 0.         0.         0.         0.06860238 0.
 0.         0.         0.04994885 0.         0.         0.
 0.00908081 0.01703274 0.         0.0

Generation:  1800  elongt:  153.51378130567971
[0.09992645 0.         0.06202881 0.         0.         0.
 0.03077383 0.08646015 0.00765669 0.         0.02034573 0.07211751
 0.10136568 0.         0.00099659 0.07442965 0.         0.
 0.10706795 0.         0.         0.07687485 0.00564513 0.05704079
 0.00925979 0.         0.         0.         0.04982338 0.
 0.         0.         0.05449977 0.         0.         0.
 0.00749196 0.01120805 0.         0.06498726]
Generation:  1900  elongt:  153.51378130567971
[0.09992645 0.         0.06202881 0.         0.         0.
 0.03077383 0.08646015 0.00765669 0.         0.02034573 0.07211751
 0.10136568 0.         0.00099659 0.07442965 0.         0.
 0.10706795 0.         0.         0.07687485 0.00564513 0.05704079
 0.00925979 0.         0.         0.         0.04982338 0.
 0.         0.         0.05449977 0.         0.         0.
 0.00749196 0.01120805 0.         0.06498726]
CPU times: user 28min 11s, sys: 5.09 s, total: 28min 16s
Wall time: 1h 35m

## Genetic algorithm: slower tRNA distribution with minimal tRNA, minimal code

In [15]:
%%time
%load_ext autoreload
%autoreload 2
from analysis_utils import *

##Compute the codon distribution and elongation latency of each gene weighted by transcript number
codon_elongt = compute_codon_elongt(gr25_ptRNA, len(gr25_pCodon), ensmbl_latency_dict)
gene_map, gene_latency = compute_gene_elongt(codon_elongt, red20=True)
pCodon_transcriptome, transcriptome_elongt = compute_transcript_distributions(gene_map,gene_latency)
pCodon_transcriptome = np.array(pCodon_transcriptome)

ptRNA_red20 = pd.read_excel('./data/tables/tRNAValues_RED20.xlsx',header=None)
ptRNA_red20 = ptRNA_red20[6]/sum(ptRNA_red20[6])
tRNA_indices = np.argwhere(ptRNA_red20 == 0)
tRNA_indices = [indices[0] for indices in tRNA_indices]
elongt_list = list()
ptRNA_list = list()
inputs = list()

### Codon array fixed
elongt_list = np.load('./data/minimaltRNA_minimalcode_100sample_elongt.npy')
ptRNA_list = np.load('./data/minimaltRNA_minimalcode_100sample_ptRNA.npy')
fullelongt_list = list()

for i in range(2000):
    fitness, ptRNA_list, elongt_list = run_ga_tRNA(ptRNA_list,pCodon_transcriptome,elongt_list,ensmbl_latency_dict,min(gr25_ptRNA),max(gr25_ptRNA),objective='slow',ptRNA_red20=ptRNA_red20)
    fullelongt_list.append(max(elongt_list))
    if i%100 == 0:
        print('Generation: ',i,'elongt: ', max(elongt_list))
        print(ptRNA_list[0])

np.save('./data/minimaltRNA_minimalcode_2ksample_slowtRNA_elongt',elongt_list)
np.save('./data/minimaltRNA_minimalcode_2ksample_slowtRNA_ptRNA',ptRNA_list)
np.save('./data/minimaltRNA_minimalcode_2ksample_slowtRNA_fullelongt',fullelongt_list)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
Removed  349  RNA species
4358
Removed  81  non-divisible by three genes
Inconsistency: reduceGeneMap_fullProteinsOnly didnt find a gene,  1257
4277
Missing genes in transcriptome_dict in compute_transcript_distributions:  1064
Generation:  0 elongt:  206.9071502850114
[0.01608285 0.         0.12536134 0.         0.         0.
 0.08132055 0.12612634 0.0930813  0.         0.02490149 0.01985029
 0.01355763 0.         0.00464132 0.06228925 0.         0.
 0.00323951 0.         0.         0.09208818 0.0886565  0.05680324
 0.10653141 0.         0.         0.         0.09695683 0.
 0.         0.         0.00398555 0.         0.         0.
 0.11960412 0.12398968 0.         0.04951663]
Generation:  100 elongt:  228.39365506817404
[0.00835815 0.         0.01323072 0.         0.         0.
 0.07008058 0.00996779 0.21654365 0.         0.00432683 0.03231602
 0.03229349 0.         0.009947   0.01535434 0.        

Generation:  1800 elongt:  249.28273433617954
[0.00756554 0.         0.00164367 0.         0.         0.
 0.00783748 0.0030131  0.41823207 0.         0.00573404 0.00452327
 0.00278422 0.         0.00193869 0.03550851 0.         0.
 0.00478462 0.         0.         0.04252236 0.006778   0.00737255
 0.41191008 0.         0.         0.         0.00690812 0.
 0.         0.         0.00544089 0.         0.         0.
 0.01263644 0.00755376 0.         0.00531258]
Generation:  1900 elongt:  249.28273433617954
[0.00473598 0.         0.00182263 0.         0.         0.
 0.00849298 0.00335519 0.45135685 0.         0.00181051 0.00491298
 0.00300473 0.         0.00185252 0.00857088 0.         0.
 0.00522775 0.         0.         0.00235471 0.00657783 0.00800774
 0.44453414 0.         0.         0.         0.00735282 0.
 0.         0.         0.01236231 0.         0.         0.
 0.0093866  0.00852395 0.         0.0057569 ]
CPU times: user 32min 57s, sys: 13.9 s, total: 33min 11s
Wall time: 1h 49min

In [8]:
ptRNA_red20 = pd.read_excel('./data/tables/tRNAValues_RED20.xlsx',header=None)
ptRNA_red20 = ptRNA_red20[6]/sum(ptRNA_red20[6])
tRNA_indices = np.argwhere(ptRNA_red20 == 0)

[1, 3, 4, 5, 9, 13, 16, 17, 19, 20, 25, 26, 27, 29, 30, 31, 33, 34, 35, 38]


In [13]:
elongt_list = np.load('./data/UniformRandomtRNA_minimalcode_100sample_elongt.npy')
elongt_list

array([227.82867876, 220.49867173, 225.22127618, 219.33597043,
       216.53793099, 213.50814525, 216.43765119, 215.39500003,
       217.30159397, 216.43433774, 224.80244017, 223.37886192,
       221.43334577, 217.3145207 , 215.62300387, 217.35460056,
       214.28603358, 207.23218558, 218.60327648, 216.33534745,
       225.21111856, 207.35530133, 211.80614719, 214.41355737,
       213.37879721, 217.11638837, 206.67012536, 226.32781276,
       219.32548762, 213.61684655, 214.14888916, 219.29841786,
       214.07183499, 213.80682526, 219.73764384, 221.36863078,
       215.22769438, 222.65773344, 220.47351254, 214.69010084,
       198.60491334, 211.1982192 , 215.08958281, 220.56160904,
       214.74410262, 224.75226341, 223.18711035, 220.5232296 ,
       225.4795083 , 214.32128004, 216.56714637, 209.48958663,
       215.04908037, 213.44123307, 214.63500148, 218.31925558,
       209.87940431, 217.37424716, 222.31730348, 219.05546106,
       221.7805431 , 214.03764226, 211.38203252, 205.25

In [14]:
ptRNA_list[0]

array([0.06586788, 0.        , 0.02047568, 0.        , 0.        ,
       0.        , 0.05669084, 0.06377628, 0.0212702 , 0.        ,
       0.04173338, 0.06243044, 0.06777071, 0.        , 0.06776294,
       0.04417216, 0.        , 0.        , 0.07383399, 0.        ,
       0.        , 0.05070421, 0.00497097, 0.02958561, 0.05496023,
       0.        , 0.0441642 , 0.        , 0.0068525 , 0.        ,
       0.        , 0.00989889, 0.07040875, 0.        , 0.        ,
       0.        , 0.04073738, 0.06696222, 0.        , 0.03497054])

In [16]:
ptRNA_list = np.load('./data/minimaltRNA_minimalcode_100sample_ptRNA.npy')
