In [4]:
%%time
%load_ext autoreload

from analysis_utils import *
from joblib import Parallel, delayed
import multiprocessing  
import numpy as np
import pandas as pd

#Growth rate = 3.0
#Computes the translation latencies from our model with simulations containing
#1 to 14 cognate ternary complexes in the voxel (14 to 42 not included because
#the probability of voxels with that number of cognates is ~0)
path = "/Users/Akshay/Documents/tRNAShuffle/data/"
ensmbl_latency_dict = dict()
rxndiff=dict()
scalingFactor = 200
NR_scaledrates = {'k1r':718*200/scalingFactor,'k2f':1475*200/scalingFactor,'k2r_nr':1120*200/scalingFactor,'k3_nr':6*200/scalingFactor,'k4':209*200/scalingFactor}


data = "191003_2210/"
for i in range(1,7):
    sim_i = computeTransportRxnTimes(path+data,100*(i-1),100*(i-1)+100,cogtRNANum=i, ribosomeNum = 7,scaling=scalingFactor,NR_scaling = NR_scaledrates)
    ensmbl_latency_dict[i] = (CellLatencies(sim_i))

data = "191105_0908/"
for i in range(7,14):
    sim_i = computeTransportRxnTimes(path+data, 100*(i-1-6),100*(i-1-6)+100,cogtRNANum=i, ribosomeNum = 7,scaling=scalingFactor,NR_scaling = NR_scaledrates)
    ensmbl_latency_dict[i] = (CellLatencies(sim_i))
    
#Experimentally obtained codon usage frequencies and tRNA abundances at growth rate = 2.5 dbl/hr
gr25_Codon = np.array([2.36, 1.26, 45.55, 34.17, 16.97, 57.86, 19.27, 33.74, 14.98, 22.31, 43.18, 7.67, 24.11, 24.87, 39.49, 11.81, 0.03, 0.63, 2.19, 9.31, 17.22, 55.01, 5.61, 29.21, 21.67, 0.52, 15.79, 43.86, 4.17, 2.61, 20.64, 26.7, 7.03, 0.19, 2.76, 3.81, 6.72, 16.52, 4.27, 2.73, 7.92, 23.25, 2.51, 1.98, 16.33, 11.68, 0.62, 0.67, 43.82, 20.59, 27.28, 7.01, 6.78, 14.21, 60.75, 0.82, 3.86, 4.09, 28.82, 5.18, 4.38, 1.09])
gr25_pCodon = gr25_Codon/np.sum(gr25_Codon)
gr25_ptRNA = [0.0602409638554217, 0.010542168674698798, 0.07379518072289158, 0.00602409638554217, 0.010542168674698798, 0.00602409638554217, 0.021084337349397596, 0.043674698795180725, 0.021084337349397596, 0.01204819277108434, 0.01807228915662651, 0.08433734939759038, 0.03162650602409639, 0.07228915662650603, 0.01204819277108434, 0.07078313253012049, 0.06325301204819278, 0.01656626506024097, 0.009036144578313254, 0.027108433734939763, 0.010542168674698798, 0.03012048192771085, 0.013554216867469882, 0.015060240963855425, 0.0075301204819277125, 0.010542168674698798, 0.0075301204819277125, 0.003012048192771085, 0.021084337349397596, 0.004518072289156627, 0.01656626506024097, 0.01204819277108434, 0.0015060240963855425, 0.009036144578313254, 0.01656626506024097, 0.019578313253012052, 0.015060240963855425, 0.027108434, 0.05873493975903616, 0.019578313]
gr25_ptRNA = gr25_ptRNA/np.sum(gr25_ptRNA)

n_cores = 16

##Compute the codon distribution and elongation latency of each gene weighted by transcript number
codon_elongt = compute_codon_elongt(gr25_ptRNA, len(gr25_pCodon), ensmbl_latency_dict)

Computing...
Computing...
Computing...
Computing...
Computing...
Computing...
Computing...
Computing...
Computing...
Computing...
Computing...
Computing...
Computing...
CPU times: user 1min 20s, sys: 745 ms, total: 1min 21s
Wall time: 1min 23s


In [6]:
#GFP gene optimization

tRNA_distributions = {}
tRNA_tags = ['Ala1B', 'Ala2', 'Arg2', 'Arg3', 'Arg4', 'Arg5', 'Asn', 'Asp1', 'Cys', 'Gln1', 'Gln2', 'Glu2', 'Gly2', 'Gly3', 'His', 'Ile1', 'Leu1', 'Leu2', 'Leu3', 'Leu4', 'Leu5', 'Lys', 'Met m', 'Phe', 'Pro1', 'Pro2', 'Pro3', 'Sec', 'Ser1', 'Ser2', 'Ser3', 'Ser5', 'Thr1', 'Thr2', 'Thr3', 'Thr4', 'Trp', 'Tyr1+Tyr2', 'Val1', 'Val2A+2B']
codon_tags = ['GGG', 'GGA', 'GGU', 'GGC', 'GAG', 'GAA', 'GAU', 'GAC', 'GUG', 'GUA', 'GUU', 'GUC', 'GCG', 'GCA', 'GCU', 'GCC', 'AGG', 'AGA', 'AGU', 'AGC', 'AAG', 'AAA', 'AAU', 'AAC', 'AUG', 'AUA', 'AUU', 'AUC', 'ACG', 'ACA', 'ACU', 'ACC', 'UGG', 'UGA', 'UGU', 'UGC', 'UAU', 'UAC', 'UUG', 'UUA', 'UUU', 'UUC', 'UCG', 'UCA', 'UCU', 'UCC', 'CGG', 'CGA', 'CGU', 'CGC', 'CAG', 'CAA', 'CAU', 'CAC', 'CUG', 'CUA', 'CUU', 'CUC', 'CCG', 'CCA', 'CCU', 'CCC']


from analysis_utils import *
gene_map, pCodon_GFP = singlegene_to_genemap('/Users/akshay/Documents/tRNAShuffle/data/tables/sfGFP_RED20.csv')
#wildtype
ptRNA_red20 = pd.read_excel('./data/tables/tRNAValues_RED20.xlsx',header=None)
ptRNA_red20 = ptRNA_red20[6]/sum(ptRNA_red20[6])

codon_elongt = compute_codon_elongt(ptRNA_red20, len(pCodon_GFP), ensmbl_latency_dict)
avg_elongt_wt = compute_gene_elongt(gene_map['sfGFP_RED20'],codon_elongt)
print('elongt wildtype', avg_elongt_wt)
tRNA_distributions['GFP_wt'] = [avg_elongt_wt,dict(zip(tRNA_tags,ptRNA_red20)),dict(zip(codon_tags,pCodon_GFP))]

#Uniform
ptRNA_red20_uniform = pd.read_excel('./data/tables/tRNAValues_RED20.xlsx',header=None)
ptRNA_red20_uniform = ptRNA_red20_uniform[6]/sum(ptRNA_red20_uniform[6])
ptRNA_red20_uniform[ptRNA_red20_uniform>0] = 1/20

codon_elongt = compute_codon_elongt(ptRNA_red20_uniform, len(pCodon_GFP), ensmbl_latency_dict)
avg_elongt_uniform = compute_gene_elongt(gene_map['sfGFP_RED20'],codon_elongt)
print('elongt uniform', avg_elongt_uniform)
tRNA_distributions['GFP_uniform'] = [avg_elongt_uniform,dict(zip(tRNA_tags,ptRNA_red20_uniform)),dict(zip(codon_tags,pCodon_GFP))]


#GFP Codon-weighted correlated
ptRNA_red20 = pd.read_excel('./data/tables/tRNAValues_RED20.xlsx',header=None)
ptRNA_red20 = ptRNA_red20[6]/sum(ptRNA_red20[6])
ptRNA_dict, pcodon_dict, codon_dict, codonLabels, pCodon, tRNA_dict = cognateDistrib(ptRNA_red20,pCodon_GFP,extra2=True)
tRNA_sum_dict = tRNA_dict.copy()
tRNA_weightedbycodon = list()
for keys in tRNA_dict.keys():
    tRNA_sum_dict[keys] = (sum([pcodon_dict[tRNA_key] for tRNA_key in tRNA_dict[keys]]))
tRNA_weightedbycodon = list(tRNA_sum_dict.values())/sum(tRNA_sum_dict.values())
tRNA_sorted_indices = np.flip(tRNA_weightedbycodon.argsort())
tRNA_sorted = np.array(tRNA_tags)[tRNA_sorted_indices]

##Assign monotonic increase to GFP codon-sorted tRNA
ptRNA_sorted = tRNA_weightedbycodon[tRNA_sorted_indices]
sortedtRNAdict = dict(zip(tRNA_sorted, ptRNA_sorted))
resortedtRNA = [sortedtRNAdict[tRNA] for tRNA in tRNA_tags]

codon_elongt = compute_codon_elongt(resortedtRNA, len(pCodon_GFP), ensmbl_latency_dict)
avg_elongt_codonweightedcorrelated = compute_gene_elongt(gene_map['sfGFP_RED20'],codon_elongt)
print('elongt codon-weighted correlated', avg_elongt_codonweightedcorrelated)

tRNA_distributions['GFP_codonweightedcorrelated'] = [avg_elongt_codonweightedcorrelated,dict(zip(tRNA_tags,resortedtRNA)),dict(zip(codon_tags,pCodon_GFP))]


#GFP Codon-weighted anticorrelated
tRNA_tags = ['Ala1B', 'Ala2', 'Arg2', 'Arg3', 'Arg4', 'Arg5', 'Asn', 'Asp1', 'Cys', 'Gln1', 'Gln2', 'Glu2', 'Gly2', 'Gly3', 'His', 'Ile1', 'Leu1', 'Leu2', 'Leu3', 'Leu4', 'Leu5', 'Lys', 'Met m', 'Phe', 'Pro1', 'Pro2', 'Pro3', 'Sec', 'Ser1', 'Ser2', 'Ser3', 'Ser5', 'Thr1', 'Thr2', 'Thr3', 'Thr4', 'Trp', 'Tyr1+Tyr2', 'Val1', 'Val2A+2B']
codon_tags = ['GGG', 'GGA', 'GGU', 'GGC', 'GAG', 'GAA', 'GAU', 'GAC', 'GUG', 'GUA', 'GUU', 'GUC', 'GCG', 'GCA', 'GCU', 'GCC', 'AGG', 'AGA', 'AGU', 'AGC', 'AAG', 'AAA', 'AAU', 'AAC', 'AUG', 'AUA', 'AUU', 'AUC', 'ACG', 'ACA', 'ACU', 'ACC', 'UGG', 'UGA', 'UGU', 'UGC', 'UAU', 'UAC', 'UUG', 'UUA', 'UUU', 'UUC', 'UCG', 'UCA', 'UCU', 'UCC', 'CGG', 'CGA', 'CGU', 'CGC', 'CAG', 'CAA', 'CAU', 'CAC', 'CUG', 'CUA', 'CUU', 'CUC', 'CCG', 'CCA', 'CCU', 'CCC']

ptRNA_red20 = pd.read_excel('./data/tables/tRNAValues_RED20.xlsx',header=None)
ptRNA_red20 = ptRNA_red20[6]/sum(ptRNA_red20[6])
ptRNA_dict, pcodon_dict, codon_dict, codonLabels, pCodon, tRNA_dict = cognateDistrib(ptRNA_red20,pCodon_GFP,extra2=True)
tRNA_sum_dict = tRNA_dict.copy()
tRNA_weightedbycodon = list()
for keys in tRNA_dict.keys():
    tRNA_sum_dict[keys] = (sum([pcodon_dict[tRNA_key] for tRNA_key in tRNA_dict[keys]]))
tRNA_weightedbycodon = list(tRNA_sum_dict.values())/sum(tRNA_sum_dict.values())
tRNA_sorted_indices = np.flip(tRNA_weightedbycodon.argsort())
tRNA_sorted = np.array(tRNA_tags)[tRNA_sorted_indices]

##Assign monotonic increase to GFP codon-sorted tRNA
ptRNA_sorted = tRNA_weightedbycodon[tRNA_sorted_indices]
ptRNA_sorted_flipped = np.append(np.flip([ptRNA for ptRNA in ptRNA_sorted if ptRNA != 0]),np.array([ptRNA for ptRNA in ptRNA_sorted if ptRNA == 0]))
sortedtRNAdict = dict(zip(tRNA_sorted, ptRNA_sorted_flipped))
resortedtRNA = [sortedtRNAdict[tRNA] for tRNA in tRNA_tags]

codon_elongt = compute_codon_elongt(resortedtRNA, len(pCodon_GFP), ensmbl_latency_dict)
avg_elongt_codonweightedanticorrelated = compute_gene_elongt(gene_map['sfGFP_RED20'],codon_elongt)
print('elongt codon-weighted anticorrelated', avg_elongt_codonweightedanticorrelated)

tRNA_distributions['GFP_codonweightedanticorrelated'] = [avg_elongt_codonweightedanticorrelated,dict(zip(tRNA_tags,resortedtRNA)),dict(zip(codon_tags,pCodon_GFP))]


########### Genetic algorithm fast and slow #############

elongt_list_fast =np.load('./data/UniformRandomtRNA_minimalcode-sfGFP_2ksample_fasttRNA_elongt.npy')
ptRNA_list_fast=np.load('./data/UniformRandomtRNA_minimalcode-sfGFP_2ksample_fasttRNA_ptRNA.npy')
elongt_fastest = np.min(elongt_list_fast)
ptRNA_min = ptRNA_list_fast[np.argmin(elongt_list_fast)]
tRNA_distributions['GFP_gafast'] = [elongt_fastest,dict(zip(tRNA_tags,ptRNA_min)),dict(zip(codon_tags,pCodon_GFP))]

elongt_list_slow =np.load('./data/UniformRandomtRNA_minimalcode-sfGFP_2ksample_slowtRNA_elongt.npy')
ptRNA_list_slow =np.load('./data/UniformRandomtRNA_minimalcode-sfGFP_2ksample_slowtRNA_ptRNA.npy')
elongt_slowest = np.max(elongt_list_slow)
ptRNA_max = ptRNA_list_slow[np.argmax(elongt_list_slow)]
tRNA_distributions['GFP_gaslow'] = [elongt_slowest,dict(zip(tRNA_tags,ptRNA_max)),dict(zip(codon_tags,pCodon_GFP))]


######### Export all to pickle ##############

import pickle
with open('tRNA_distributions-GFP.pickle', 'wb') as handle:
    pickle.dump(tRNA_distributions, handle, protocol=pickle.HIGHEST_PROTOCOL)

  warn(msg)


elongt wildtype 172.03439339681063


  warn(msg)


elongt uniform 177.98557489079514


  warn(msg)


elongt codon-weighted correlated 162.55149505038742


  warn(msg)


elongt codon-weighted anticorrelated 201.5280117630773


In [13]:
tRNA_distributions['GFP_gaslow']

[250.83730445466912,
 {'Ala1B': 0.00661085002001485,
  'Ala2': 0.0,
  'Arg2': 0.0019971128079857344,
  'Arg3': 0.0,
  'Arg4': 0.0,
  'Arg5': 0.0,
  'Asn': 0.0014292634719609955,
  'Asp1': 0.003766698270230139,
  'Cys': 0.0047040969672237235,
  'Gln1': 0.0,
  'Gln2': 0.002308752464957013,
  'Glu2': 0.007053452230498484,
  'Gly2': 0.002656256890993093,
  'Gly3': 0.0,
  'His': 0.005606656731590659,
  'Ile1': 0.0031969596152049843,
  'Leu1': 0.0,
  'Leu2': 0.0,
  'Leu3': 0.0027200038313679614,
  'Leu4': 0.0,
  'Leu5': 0.0,
  'Lys': 0.0022994414824551866,
  'Met m': 0.006007177101277787,
  'Phe': 0.006364825205384883,
  'Pro1': 0.6772020061619743,
  'Pro2': 0.0,
  'Pro3': 0.0,
  'Sec': 0.0,
  'Ser1': 0.001964793853604591,
  'Ser2': 0.0,
  'Ser3': 0.0,
  'Ser5': 0.0,
  'Thr1': 0.0027545724067898647,
  'Thr2': 0.0,
  'Thr3': 0.0,
  'Thr4': 0.0,
  'Trp': 0.2549425809848878,
  'Tyr1+Tyr2': 0.003329881490910349,
  'Val1': 0.0,
  'Val2A+2B': 0.003084618010687356},
 {'GGG': 0.0,
  'GGA': 0.0920502

In [18]:
#mRFP gene optimization

tRNA_distributions = {}
tRNA_tags = ['Ala1B', 'Ala2', 'Arg2', 'Arg3', 'Arg4', 'Arg5', 'Asn', 'Asp1', 'Cys', 'Gln1', 'Gln2', 'Glu2', 'Gly2', 'Gly3', 'His', 'Ile1', 'Leu1', 'Leu2', 'Leu3', 'Leu4', 'Leu5', 'Lys', 'Met m', 'Phe', 'Pro1', 'Pro2', 'Pro3', 'Sec', 'Ser1', 'Ser2', 'Ser3', 'Ser5', 'Thr1', 'Thr2', 'Thr3', 'Thr4', 'Trp', 'Tyr1+Tyr2', 'Val1', 'Val2A+2B']
codon_tags = ['GGG', 'GGA', 'GGU', 'GGC', 'GAG', 'GAA', 'GAU', 'GAC', 'GUG', 'GUA', 'GUU', 'GUC', 'GCG', 'GCA', 'GCU', 'GCC', 'AGG', 'AGA', 'AGU', 'AGC', 'AAG', 'AAA', 'AAU', 'AAC', 'AUG', 'AUA', 'AUU', 'AUC', 'ACG', 'ACA', 'ACU', 'ACC', 'UGG', 'UGA', 'UGU', 'UGC', 'UAU', 'UAC', 'UUG', 'UUA', 'UUU', 'UUC', 'UCG', 'UCA', 'UCU', 'UCC', 'CGG', 'CGA', 'CGU', 'CGC', 'CAG', 'CAA', 'CAU', 'CAC', 'CUG', 'CUA', 'CUU', 'CUC', 'CCG', 'CCA', 'CCU', 'CCC']


from analysis_utils import *
gene_map, pCodon_mRFP = singlegene_to_genemap('/Users/akshay/Documents/tRNAShuffle/data/tables/mRFP_RED20.csv')
#wildtype
ptRNA_red20 = pd.read_excel('./data/tables/tRNAValues_RED20.xlsx',header=None)
ptRNA_red20 = ptRNA_red20[6]/sum(ptRNA_red20[6])

codon_elongt = compute_codon_elongt(ptRNA_red20, len(pCodon_GFP), ensmbl_latency_dict)
avg_elongt_wt = compute_gene_elongt(gene_map['mRFP_RED20'],codon_elongt)
print('elongt wildtype', avg_elongt_wt)
tRNA_distributions['mRFP_RED20'] = [avg_elongt_wt,dict(zip(tRNA_tags,ptRNA_red20)),dict(zip(codon_tags,pCodon_mRFP))]

#Uniform
ptRNA_red20_uniform = pd.read_excel('./data/tables/tRNAValues_RED20.xlsx',header=None)
ptRNA_red20_uniform = ptRNA_red20_uniform[6]/sum(ptRNA_red20_uniform[6])
ptRNA_red20_uniform[ptRNA_red20_uniform>0] = 1/20

codon_elongt = compute_codon_elongt(ptRNA_red20_uniform, len(pCodon_GFP), ensmbl_latency_dict)
avg_elongt_uniform = compute_gene_elongt(gene_map['mRFP_RED20'],codon_elongt)
print('elongt uniform', avg_elongt_uniform)
tRNA_distributions['mRFP_uniform'] = [avg_elongt_uniform,dict(zip(tRNA_tags,ptRNA_red20_uniform)),dict(zip(codon_tags,pCodon_mRFP))]


#GFP Codon-weighted correlated

ptRNA_red20 = pd.read_excel('./data/tables/tRNAValues_RED20.xlsx',header=None)
ptRNA_red20 = ptRNA_red20[6]/sum(ptRNA_red20[6])
ptRNA_dict, pcodon_dict, codon_dict, codonLabels, pCodon, tRNA_dict = cognateDistrib(ptRNA_red20,pCodon_mRFP,extra2=True)
tRNA_sum_dict = tRNA_dict.copy()
tRNA_weightedbycodon = list()
for keys in tRNA_dict.keys():
    tRNA_sum_dict[keys] = (sum([pcodon_dict[tRNA_key] for tRNA_key in tRNA_dict[keys]]))
tRNA_weightedbycodon = list(tRNA_sum_dict.values())/sum(tRNA_sum_dict.values())
tRNA_sorted_indices = np.flip(tRNA_weightedbycodon.argsort())
tRNA_sorted = np.array(tRNA_tags)[tRNA_sorted_indices]

##Assign monotonic increase to GFP codon-sorted tRNA
ptRNA_sorted = tRNA_weightedbycodon[tRNA_sorted_indices]
sortedtRNAdict = dict(zip(tRNA_sorted, ptRNA_sorted))
resortedtRNA = [sortedtRNAdict[tRNA] for tRNA in tRNA_tags]

codon_elongt = compute_codon_elongt(resortedtRNA, len(pCodon_GFP), ensmbl_latency_dict)
avg_elongt_codonweightedcorrelated = compute_gene_elongt(gene_map['mRFP_RED20'],codon_elongt)
print('elongt codon-weighted correlated', avg_elongt_codonweightedcorrelated)

tRNA_distributions['mRFP_codonweightedcorrelated'] = [avg_elongt_codonweightedcorrelated,dict(zip(tRNA_tags,resortedtRNA)),dict(zip(codon_tags,pCodon_mRFP))]


#GFP Codon-weighted anticorrelated
tRNA_tags = ['Ala1B', 'Ala2', 'Arg2', 'Arg3', 'Arg4', 'Arg5', 'Asn', 'Asp1', 'Cys', 'Gln1', 'Gln2', 'Glu2', 'Gly2', 'Gly3', 'His', 'Ile1', 'Leu1', 'Leu2', 'Leu3', 'Leu4', 'Leu5', 'Lys', 'Met m', 'Phe', 'Pro1', 'Pro2', 'Pro3', 'Sec', 'Ser1', 'Ser2', 'Ser3', 'Ser5', 'Thr1', 'Thr2', 'Thr3', 'Thr4', 'Trp', 'Tyr1+Tyr2', 'Val1', 'Val2A+2B']
codon_tags = ['GGG', 'GGA', 'GGU', 'GGC', 'GAG', 'GAA', 'GAU', 'GAC', 'GUG', 'GUA', 'GUU', 'GUC', 'GCG', 'GCA', 'GCU', 'GCC', 'AGG', 'AGA', 'AGU', 'AGC', 'AAG', 'AAA', 'AAU', 'AAC', 'AUG', 'AUA', 'AUU', 'AUC', 'ACG', 'ACA', 'ACU', 'ACC', 'UGG', 'UGA', 'UGU', 'UGC', 'UAU', 'UAC', 'UUG', 'UUA', 'UUU', 'UUC', 'UCG', 'UCA', 'UCU', 'UCC', 'CGG', 'CGA', 'CGU', 'CGC', 'CAG', 'CAA', 'CAU', 'CAC', 'CUG', 'CUA', 'CUU', 'CUC', 'CCG', 'CCA', 'CCU', 'CCC']

ptRNA_red20 = pd.read_excel('./data/tables/tRNAValues_RED20.xlsx',header=None)
ptRNA_red20 = ptRNA_red20[6]/sum(ptRNA_red20[6])
ptRNA_dict, pcodon_dict, codon_dict, codonLabels, pCodon, tRNA_dict = cognateDistrib(ptRNA_red20,pCodon_mRFP,extra2=True)
tRNA_sum_dict = tRNA_dict.copy()
tRNA_weightedbycodon = list()
for keys in tRNA_dict.keys():
    tRNA_sum_dict[keys] = (sum([pcodon_dict[tRNA_key] for tRNA_key in tRNA_dict[keys]]))
tRNA_weightedbycodon = list(tRNA_sum_dict.values())/sum(tRNA_sum_dict.values())
tRNA_sorted_indices = np.flip(tRNA_weightedbycodon.argsort())
tRNA_sorted = np.array(tRNA_tags)[tRNA_sorted_indices]

##Assign monotonic increase to GFP codon-sorted tRNA
ptRNA_sorted = tRNA_weightedbycodon[tRNA_sorted_indices]
ptRNA_sorted_flipped = np.append(np.flip([ptRNA for ptRNA in ptRNA_sorted if ptRNA != 0]),np.array([ptRNA for ptRNA in ptRNA_sorted if ptRNA == 0]))
sortedtRNAdict = dict(zip(tRNA_sorted, ptRNA_sorted_flipped))
resortedtRNA = [sortedtRNAdict[tRNA] for tRNA in tRNA_tags]

codon_elongt = compute_codon_elongt(resortedtRNA, len(pCodon_GFP), ensmbl_latency_dict)
avg_elongt_codonweightedanticorrelated = compute_gene_elongt(gene_map['mRFP_RED20'],codon_elongt)
print('elongt codon-weighted anticorrelated', avg_elongt_codonweightedanticorrelated)

tRNA_distributions['mRFP_codonweightedanticorrelated'] = [avg_elongt_codonweightedanticorrelated,dict(zip(tRNA_tags,resortedtRNA)),dict(zip(codon_tags,pCodon_mRFP))]


########### Genetic algorithm fast and slow #############

elongt_list_fast =np.load('./data/UniformRandomtRNA_minimalcode-mRFP_2ksample_fasttRNA_elongt.npy')
ptRNA_list_fast=np.load('./data/UniformRandomtRNA_minimalcode-mRFP_2ksample_fasttRNA_ptRNA.npy')
elongt_fastest = np.min(elongt_list_fast)
ptRNA_min = ptRNA_list_fast[np.argmin(elongt_list_fast)]
tRNA_distributions['mRFP_gafast'] = [elongt_fastest,dict(zip(tRNA_tags,ptRNA_min)),dict(zip(codon_tags,pCodon_mRFP))]

elongt_list_slow =np.load('./data/UniformRandomtRNA_minimalcode-mRFP_2ksample_slowtRNA_elongt.npy')
ptRNA_list_slow =np.load('./data/UniformRandomtRNA_minimalcode-mRFP_2ksample_slowtRNA_ptRNA.npy')
elongt_slowest = np.max(elongt_list_slow)
ptRNA_max = ptRNA_list_slow[np.argmax(elongt_list_slow)]
tRNA_distributions['mRFP_gaslow'] = [elongt_slowest,dict(zip(tRNA_tags,ptRNA_max)),dict(zip(codon_tags,pCodon_mRFP))]


######### Export all to pickle ##############

import pickle
with open('tRNA_distributions-mRFP.pickle', 'wb') as handle:
    pickle.dump(tRNA_distributions, handle, protocol=pickle.HIGHEST_PROTOCOL)

elongt wildtype 173.44537806058804
elongt uniform 179.08324128594163
elongt codon-weighted correlated 162.42524525183458
elongt codon-weighted anticorrelated 196.00742981509876


In [None]:
#Max elongt check w/ all tRNA zero but one tRNA = 1 probability, as control
ptRNA_red20_zeros = pd.read_excel('./data/tables/tRNAValues_RED20.xlsx',header=None)
ptRNA_red20_zeros = ptRNA_red20_zeros[6]/sum(ptRNA_red20_zeros[6])
ptRNA_red20_zeros[ptRNA_red20_zeros>0] = 0
ptRNA_red20_zeros[1] = 1

codon_elongt = compute_codon_elongt(ptRNA_red20_zeros, len(pCodon_GFP), ensmbl_latency_dict)
avg_elongt_max = compute_gene_elongt(gene_map['sfGFP_RED20'],codon_elongt)
print('elongt max', avg_elongt_max)

In [2]:
import pickle
with open(r"tRNA_distributions-GFP.pickle", "rb") as input_file:
    e = pickle.load(input_file)

In [12]:
e['GFP_gaslow']

[250.83730445466912,
 {'Ala1B': 0.00661085002001485,
  'Ala2': 0.0,
  'Arg2': 0.0019971128079857344,
  'Arg3': 0.0,
  'Arg4': 0.0,
  'Arg5': 0.0,
  'Asn': 0.0014292634719609955,
  'Asp1': 0.003766698270230139,
  'Cys': 0.0047040969672237235,
  'Gln1': 0.0,
  'Gln2': 0.002308752464957013,
  'Glu2': 0.007053452230498484,
  'Gly2': 0.002656256890993093,
  'Gly3': 0.0,
  'His': 0.005606656731590659,
  'Ile1': 0.0031969596152049843,
  'Leu1': 0.0,
  'Leu2': 0.0,
  'Leu3': 0.0027200038313679614,
  'Leu4': 0.0,
  'Leu5': 0.0,
  'Lys': 0.0022994414824551866,
  'Met m': 0.006007177101277787,
  'Phe': 0.006364825205384883,
  'Pro1': 0.6772020061619743,
  'Pro2': 0.0,
  'Pro3': 0.0,
  'Sec': 0.0,
  'Ser1': 0.001964793853604591,
  'Ser2': 0.0,
  'Ser3': 0.0,
  'Ser5': 0.0,
  'Thr1': 0.0027545724067898647,
  'Thr2': 0.0,
  'Thr3': 0.0,
  'Thr4': 0.0,
  'Trp': 0.2549425809848878,
  'Tyr1+Tyr2': 0.003329881490910349,
  'Val1': 0.0,
  'Val2A+2B': 0.003084618010687356},
 {'GGG': 0.0,
  'GGA': 0.0920502

In [10]:
#GFP GA slow fast optimization

tRNA_distributions = {}
from analysis_utils import *
gene_map, pCodon_GFP = singlegene_to_genemap('/Users/akshay/Documents/tRNAShuffle/data/tables/sfGFP_RED20.csv')

tRNA_tags = ['Ala1B', 'Ala2', 'Arg2', 'Arg3', 'Arg4', 'Arg5', 'Asn', 'Asp1', 'Cys', 'Gln1', 'Gln2', 'Glu2', 'Gly2', 'Gly3', 'His', 'Ile1', 'Leu1', 'Leu2', 'Leu3', 'Leu4', 'Leu5', 'Lys', 'Met m', 'Phe', 'Pro1', 'Pro2', 'Pro3', 'Sec', 'Ser1', 'Ser2', 'Ser3', 'Ser5', 'Thr1', 'Thr2', 'Thr3', 'Thr4', 'Trp', 'Tyr1+Tyr2', 'Val1', 'Val2A+2B']
codon_tags = ['GGG', 'GGA', 'GGU', 'GGC', 'GAG', 'GAA', 'GAU', 'GAC', 'GUG', 'GUA', 'GUU', 'GUC', 'GCG', 'GCA', 'GCU', 'GCC', 'AGG', 'AGA', 'AGU', 'AGC', 'AAG', 'AAA', 'AAU', 'AAC', 'AUG', 'AUA', 'AUU', 'AUC', 'ACG', 'ACA', 'ACU', 'ACC', 'UGG', 'UGA', 'UGU', 'UGC', 'UAU', 'UAC', 'UUG', 'UUA', 'UUU', 'UUC', 'UCG', 'UCA', 'UCU', 'UCC', 'CGG', 'CGA', 'CGU', 'CGC', 'CAG', 'CAA', 'CAU', 'CAC', 'CUG', 'CUA', 'CUU', 'CUC', 'CCG', 'CCA', 'CCU', 'CCC']

import numpy as np
elongt_list_fast =np.load('./data/UniformRandomtRNA_minimalcode-sfGFP_2ksample_fasttRNA_elongt.npy')
ptRNA_list_fast=np.load('./data/UniformRandomtRNA_minimalcode-sfGFP_2ksample_fasttRNA_ptRNA.npy')
elongt_fastest = np.min(elongt_list_fast)
ptRNA_min = ptRNA_list_fast[np.argmin(elongt_list_fast)]
tRNA_distributions['GFP_gafast'] = [elongt_fastest,dict(zip(tRNA_tags,ptRNA_min)),dict(zip(codon_tags,pCodon_GFP))]

elongt_list_slow =np.load('./data/UniformRandomtRNA_minimalcode-sfGFP_2ksample_slowtRNA_elongt.npy')
ptRNA_list_slow =np.load('./data/UniformRandomtRNA_minimalcode-sfGFP_2ksample_slowtRNA_ptRNA.npy')
elongt_slowest = np.max(elongt_list_slow)
ptRNA_max = ptRNA_list_slow[np.argmax(elongt_list_slow)]
tRNA_distributions['GFP_gaslow'] = [elongt_slowest,dict(zip(tRNA_tags,ptRNA_max)),dict(zip(codon_tags,pCodon_GFP))]




In [13]:
ptRNA_list_slow[2]

array([0.00606658, 0.        , 0.00228968, 0.        , 0.        ,
       0.        , 0.00326699, 0.0043185 , 0.01338964, 0.        ,
       0.00289458, 0.00679118, 0.00462071, 0.        , 0.00381738,
       0.00304735, 0.        , 0.        , 0.00259271, 0.        ,
       0.        , 0.00221045, 0.01085204, 0.01972292, 0.62466653,
       0.        , 0.        , 0.        , 0.0024463 , 0.        ,
       0.        , 0.        , 0.00356713, 0.        , 0.        ,
       0.        , 0.27650342, 0.00404843, 0.        , 0.00288748])