Skip to content

Commit

Permalink
Changed tRNA_prop to num_tRNA so that min number of tRNAs is always t…
Browse files Browse the repository at this point in the history
…here. Also changed observables generator so only 20 tRNA species required
  • Loading branch information
Ashwin-Srinivasan committed Aug 6, 2018
1 parent e43bfc3 commit 42051ef
Show file tree
Hide file tree
Showing 3 changed files with 114 additions and 68 deletions.
10 changes: 7 additions & 3 deletions wc_kb_gen/random/complex.py
Expand Up @@ -22,7 +22,7 @@ def clean_and_validate_options(self):
""" Apply default options and validate options """
options = self.options
assigned_complexes = options.get(
'assigned_complexes', ['subunit_30S', 'subunit_50S','complex_70S_IA', 'complex_70S_A'])
'assigned_complexes', ['subunit_30S', 'subunit_50S','complex_70S'])

options['assigned_complexes'] = assigned_complexes

Expand All @@ -34,10 +34,14 @@ def gen_components(self):
for comp in assigned_complexes:
comp_species = cell.species_types.get_or_create(
id=comp, __type=wc_kb.ComplexSpeciesType)

comp_species.concentration = 1e-2
species = comp_species.species.get_or_create(compartment=cytosol)
if comp.startswith('subunit'):
comp_species.formation_process = 7 # process_RibosomeAssembly
elif '70S' in comp:
comp_species.formation_process = 9 # process_translation
comp_species.subunits.append(wc_kb.SpeciesCoefficient(species = cell.species_types.get_one(id = 'subunit_30S').species.get_one(compartment = cytosol), coefficient = -1))
comp_species.subunits.append(wc_kb.SpeciesCoefficient(species = cell.species_types.get_one(id = 'subunit_50S').species.get_one(compartment = cytosol), coefficient = -1))
'''species_30S = cell.species_types.get_one(id = 'subunit_30S').species.get_one(compartment = cytosol)
species_50S = cell.species_types.get_one(id = 'subunit_50S').species.get_one(compartment = cytosol)
comp_species.subunits.append(species_30S.species_coefficients.get_or_create(coefficient = 1))
comp_species.subunits.append(species_50S.species_coefficients.get_or_create(coefficient = 1))'''
66 changes: 38 additions & 28 deletions wc_kb_gen/random/genome.py
Expand Up @@ -76,11 +76,11 @@ def clean_and_validate_options(self):
assert(rRNA_prop >= 0 and rRNA_prop <= 1)
options['rRNA_prop'] = rRNA_prop

tRNA_prop = options.get('tRNA_prop', 0.5)
assert(tRNA_prop >= 0 and tRNA_prop <= 1)
options['tRNA_prop'] = tRNA_prop
num_tRNA = options.get('num_tRNA', 20)
assert(num_tRNA >= 20)
options['num_tRNA'] = num_tRNA

assert((ncRNA_prop + rRNA_prop + tRNA_prop) <= 1)
assert((ncRNA_prop + rRNA_prop + num_tRNA/mean_num_genes) <= 1)

# DOI: 10.1093/molbev/msk019
mean_gene_len = options.get(
Expand Down Expand Up @@ -142,7 +142,8 @@ def gen_genome(self):
chromosome_topology = options.get('chromosome_topology')
ncRNA_prop = options.get('ncRNA_prop')
rRNA_prop = options.get('rRNA_prop')
tRNA_prop = options.get('tRNA_prop')
num_tRNA = options.get('num_tRNA')


# print(tRNA_prop)

Expand All @@ -164,11 +165,13 @@ def gen_genome(self):
# The probability of each base being selected randomly
PROB_BASES = [(1 - mean_gc_frac) / 2, mean_gc_frac /
2, mean_gc_frac/2, (1-mean_gc_frac)/2]


num_genes_all = self.rand(mean_num_genes, min = num_tRNA)[0]

# Create a chromosome n times
for i_chr in range(num_chromosomes):
# number of genes in the chromosome
num_genes = self.rand(mean_num_genes / num_chromosomes)[0]
num_genes = math.ceil(num_genes_all / num_chromosomes)
# list of gene lengths (generated randomly) on chromosome
gene_lens = 3 * self.rand(mean_gene_len, count=num_genes, min=2)

Expand Down Expand Up @@ -201,6 +204,7 @@ def gen_genome(self):
gene_starts = numpy.int64(numpy.cumsum(numpy.concatenate(([0], gene_lens[0:-1])) +
numpy.concatenate((numpy.round(intergene_lens[0:1] / 2), intergene_lens[1:]))))

count = 0 #temporary way of incorporating num_tRNA
# creates GeneLocus objects for the genes and labels their GeneType (which type of RNA they transcribe)
for i_gene, gene_start in enumerate(gene_starts):
gene = self.knowledge_base.cell.loci.get_or_create(
Expand All @@ -210,27 +214,33 @@ def gen_genome(self):
gene.end = gene.start + gene_lens[i_gene] - 1 # 1-indexed
# print(gene_lens[i_gene] % 3 == 0)
gene.name = 'gene {} {}'.format(i_chr+1, i_gene+1)
typeList = [wc_kb.GeneType.mRna, wc_kb.GeneType.rRna,
wc_kb.GeneType.sRna, wc_kb.GeneType.tRna]
prob_rna = [1 - ncRNA_prop - tRNA_prop -
rRNA_prop, rRNA_prop, ncRNA_prop, tRNA_prop]
gene.type = random.choice(typeList, p=prob_rna)
if gene.type == wc_kb.GeneType.mRna: # if mRNA, then set up start/stop codons in the gene
start_codon = random.choice(START_CODONS)
stop_codon = random.choice(STOP_CODONS)
seq_str = str(chro.seq)
seq_str = seq_str[:gene.start-1] + start_codon + \
seq_str[gene.start+2: gene.end-3] + \
stop_codon + seq_str[gene.end:]
for i in range(gene.start+2, gene.end-3, 3):
# print(seq_str[i:i+3])
while seq_str[i:i+3] in START_CODONS or seq_str[i:i+3] in STOP_CODONS:
# print('here')
codon_i = "".join(random.choice(
BASES, p=PROB_BASES, size=(3,)))
seq_str = seq_str[:i]+codon_i+seq_str[i+3:]

chro.seq = Seq(seq_str, Alphabet.DNAAlphabet())

if count < num_tRNA:
gene.type = wc_kb.GeneType.tRna
count += 1
else:
tRNA_prop = num_tRNA / num_genes_all
typeList = [wc_kb.GeneType.mRna, wc_kb.GeneType.rRna,
wc_kb.GeneType.sRna]
mRNA_prop = 1 - rRNA_prop/(1 - tRNA_prop) - ncRNA_prop/(1 - tRNA_prop)
prob_rna = [mRNA_prop, rRNA_prop/(1 - tRNA_prop), ncRNA_prop/(1 - tRNA_prop)]
gene.type = random.choice(typeList, p=prob_rna)
if gene.type == wc_kb.GeneType.mRna: # if mRNA, then set up start/stop codons in the gene
start_codon = random.choice(START_CODONS)
stop_codon = random.choice(STOP_CODONS)
seq_str = str(chro.seq)
seq_str = seq_str[:gene.start-1] + start_codon + \
seq_str[gene.start+2: gene.end-3] + \
stop_codon + seq_str[gene.end:]
for i in range(gene.start+2, gene.end-3, 3):
# print(seq_str[i:i+3])
while seq_str[i:i+3] in START_CODONS or seq_str[i:i+3] in STOP_CODONS:
# print('here')
codon_i = "".join(random.choice(
BASES, p=PROB_BASES, size=(3,)))
seq_str = seq_str[:i]+codon_i+seq_str[i+3:]

chro.seq = Seq(seq_str, Alphabet.DNAAlphabet())

def gen_rnas_proteins(self):
""" Creates RNA and protein objects corresponding to genes on chromosome
Expand Down
106 changes: 69 additions & 37 deletions wc_kb_gen/random/observables.py
Expand Up @@ -28,84 +28,116 @@ def clean_and_validate_options(self):
codons = [a + b + c for a in bases for b in bases for c in bases]
default_trnas = []
for codon in codons:
if codon != 'TAA' and codon != 'TAG' and codon != 'TGA':
default_trnas.append('tRNA_'+codon)

assigned_trnas = options.get('assigned_trnas', default_trnas)

rnas = self.knowledge_base.cell.species_types.get(
__type=wc_kb.RnaSpeciesType)

count = 0
for rna in rnas:
if rna.type == wc_kb.RnaType.tRna:
count += 1

assert (len(assigned_trnas) <= count)
options['assigned_trnas'] = assigned_trnas

assigned_proteins = options.get('assigned_proteins', ['IF', 'EF', 'RF',
'deg_ATPase', 'deg_protease', 'deg_rnase',
'rna_poly'])
'rna_poly', 'aminoacyl_synthetase'])

prots = self.knowledge_base.cell.species_types.get(
__type=wc_kb.ProteinSpeciesType)

assert(len(assigned_proteins) <= len(prots))
options['assigned_proteins'] = assigned_proteins

assigned_complexes = options.get('assigned_complexes', ['complex_70S_IA', 'complex_70S_A'])
assigned_complexes = options.get('assigned_complexes', ['complex_70S'])

options['assigned_complexes'] = assigned_complexes


def gen_components(self):
""" Takes random samples of the generated rnas and proteins and assigns them functions based on the included list of proteins and rnas"""

cell = self.knowledge_base.cell
cytosol = cell.compartments.get_one(id='c')

assigned_trnas = self.options['assigned_trnas']
assigned_proteins = self.options['assigned_proteins']
assigned_complexes = self.options['assigned_complexes']

prots = self.knowledge_base.cell.species_types.get(
__type=wc_kb.ProteinSpeciesType)
rnas = self.knowledge_base.cell.species_types.get(
__type=wc_kb.RnaSpeciesType)

trnas = []
for rna in rnas:
if rna.type == wc_kb.RnaType.tRna:
trnas.append(rna)

sampled_trnas = numpy.random.choice(
trnas, len(assigned_trnas), replace=False)

assigned_trnas = iter(assigned_trnas)

for rna in sampled_trnas:
rna_name = next(assigned_trnas)
observable = cell.observables.get_or_create(id=rna_name+'_obs')
observable.name = rna_name
observable.species.append(
wc_kb.SpeciesCoefficient(species=wc_kb.Species(species_type=rna, compartment=cytosol), coefficient=1))

codons = {
'I': ['ATT', 'ATC', 'ATA'],
'L': ['CTT', 'CTC', 'CTA', 'CTG', 'TTA', 'TTG'],
'V': ['GTT', 'GTC', 'GTA', 'GTG'],
'F': ['TTT', 'TTC'],
'M': ['ATG'],
'C': ['TGT', 'TGC'],
'A': ['GCT', 'GCC', 'GCA', 'GCG'],
'G': ['GGT', 'GGC', 'GGA', 'GGG'],
'P': ['CCT', 'CCC', 'CCA', 'CCG'],
'T': ['ACT', 'ACC', 'ACA', 'ACG'],
'S': ['TCT', 'TCC', 'TCA', 'TCG', 'AGT', 'AGC'],
'Y': ['TAT', 'TAC'],
'W': ['TGG'],
'Q': ['CAA', 'CAG'],
'N': ['AAT', 'AAC'],
'H': ['CAT', 'CAC'],
'E': ['GAA', 'GAG'],
'D': ['GAT', 'GAC'],
'K': ['AAA', 'AAG'],
'R': ['CGT', 'CGC', 'CGA', 'CGG', 'AGA', 'AGG'],}

for aa in codons:
rna = numpy.random.choice(trnas)
trnas.remove(rna)
species_coefficient = wc_kb.SpeciesCoefficient(species=wc_kb.Species(species_type=rna, compartment=cytosol), coefficient=1)
for i in range(len(codons[aa])):
codon = codons[aa][i]
rna_name = 'tRNA_'+codon
observable = cell.observables.get_or_create(id=rna_name+'_obs')
observable.name = rna_name
observable.species.append(species_coefficient)

sampled_proteins = numpy.random.choice(
prots, len(assigned_proteins), replace=False)

assigned_proteins = iter(assigned_proteins)
for protein in sampled_proteins:
protein_name = next(assigned_proteins)
observable = cell.observables.get_or_create(id=protein_name+'_obs')
observable.name = protein_name
observable.species.append(
wc_kb.SpeciesCoefficient(species=wc_kb.Species(species_type=protein, compartment=cytosol), coefficient=1))

if protein_name.startswith('IF'):
observable = cell.observables.get_or_create(id='IF_obs')
observable.name = 'IF'
observable.species.append(
wc_kb.SpeciesCoefficient(species=wc_kb.Species(species_type=protein, compartment=cytosol), coefficient=1))
if protein_name.startswith('EF'):
observable = cell.observables.get_or_create(id='EF_obs')
observable.name = 'EF'
observable.species.append(
wc_kb.SpeciesCoefficient(species=wc_kb.Species(species_type=protein, compartment=cytosol), coefficient=1))
if protein_name.startswith('RF'):
observable = cell.observables.get_or_create(id='RF_obs')
observable.name = 'RF'
observable.species.append(
wc_kb.SpeciesCoefficient(species=wc_kb.Species(species_type=protein, compartment=cytosol), coefficient=1))
if protein_name.startswith('rna_poly'):
observable = cell.observables.get_or_create(id='rna_poly_obs')
observable.name = 'rna_poly'
observable.species.append(
wc_kb.SpeciesCoefficient(species=wc_kb.Species(species_type=protein, compartment=cytosol), coefficient=1))
if protein_name.startswith('deg_protease'):
observable = cell.observables.get_or_create(id='deg_protease_obs')
observable.name = 'deg_protease'
observable.species.append(
wc_kb.SpeciesCoefficient(species=wc_kb.Species(species_type=protein, compartment=cytosol), coefficient=1))
if protein_name.startswith('deg_rnase'):
observable = cell.observables.get_or_create(id='deg_rnase_obs')
observable.name = 'deg_rnase'
observable.species.append(
wc_kb.SpeciesCoefficient(species=wc_kb.Species(species_type=protein, compartment=cytosol), coefficient=1))

for comp in assigned_complexes:
comp_species = cell.species_types.get_or_create(id = comp, __type=wc_kb.ComplexSpeciesType)
observable = cell.observables.get_or_create(id=comp+'_obs')
observable.name = comp
observable.species.append(
wc_kb.SpeciesCoefficient(species=wc_kb.Species(species_type=comp_species, compartment=cytosol), coefficient=1))
observable.species.append(comp_species.species.get_one(compartment=cytosol).species_coefficients.get_or_create(coefficient=1))



0 comments on commit 42051ef

Please sign in to comment.