Skip to content

Commit

Permalink
V1.0: all classes / methods added and tested, in sync with mycoplasma…
Browse files Browse the repository at this point in the history
… database
  • Loading branch information
balazs1987 committed Mar 27, 2018
1 parent 5948e77 commit 8b0e033
Show file tree
Hide file tree
Showing 3 changed files with 100 additions and 109 deletions.
121 changes: 56 additions & 65 deletions tests/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -323,99 +323,90 @@ def test_constructor(self):
self.assertEqual(protein.half_life, 2)
self.assertEqual(protein.cell, None)

@unittest.skip('Work in progress')
def test_get_seq(self):
records = Bio.SeqIO.parse('tests/fixtures/seq.fna', 'fasta')
dna_seq = next(records).seq
dna = core.DnaSpeciesType(seq=dna_seq)
cell.knowledge_base = core.KnowledgeBase(translation_table=1)
cell = dna.cell = core.Cell()
dna1 = core.DnaSpeciesType(seq=dna_seq)

# MPN001
rna = core.RnaSpeciesType(dna=dna, start=692, end=1834, strand=core.PolymerStrand.positive)
self.assertEqual(rna.get_seq()[0:10], 'AUGAAAGUUU')
self.assertEqual(rna.get_seq()[-10:], 'UUCCAAGUAA')

orf = core.OpenReadingFrameLocus(polymer=rna, start=1, end=rna.get_len())
self.assertEqual(orf.get_seq()[0:10], 'AUGAAAGUUU')
self.assertEqual(orf.get_seq()[-10:], 'UUCCAAGUAA')
cell1 = dna1.cell = core.Cell()
cell1.knowledge_base = core.KnowledgeBase(translation_table=1)

prot = core.ProteinSpeciesType(orfs=[orf])
self.assertEqual(prot.get_seq()[0:10], 'MKVLINKNEL')
# MPN001
gene1 = core.GeneLocus(id='gene1', cell=cell1, polymer=dna1, start=692, end=1834)
tu1 = core.TranscriptionUnitLocus(id='tu1', gene=[gene1], polymer=dna1)
prot1 = core.ProteinSpeciesType(id='prot1', gene=gene1)
self.assertEqual(prot1.get_seq()[0:10], 'MKVLINKNEL')

# MPN011
rna = core.RnaSpeciesType(dna=dna, start=12838, end=13533, strand=core.PolymerStrand.negative)
self.assertEqual(rna.get_seq()[0:10], 'AUGAAAUUUA')
self.assertEqual(rna.get_seq()[-10:], 'AAUUGAGUAA')
gene2 = core.GeneLocus(id='gene2', cell=cell1, polymer=dna1, start=12838, end=13533, strand=core.PolymerStrand.negative)
tu2 = core.TranscriptionUnitLocus(id='tu2', gene=[gene2], polymer=dna1)
prot2 = core.ProteinSpeciesType(id='prot2', gene=gene2)
self.assertEqual(prot2.get_seq()[0:10], 'MKFKFLLTPL')

orf = core.OpenReadingFrameLocus(polymer=rna, start=1, end=rna.get_len())
self.assertEqual(orf.get_seq()[0:10], 'AUGAAAUUUA')
self.assertEqual(orf.get_seq()[-10:], 'AAUUGAGUAA')

prot = core.ProteinSpeciesType(orfs=[orf])
self.assertEqual(prot.get_seq()[0:10], 'MKFKFLLTPL')

@unittest.skip('Work in progress')
def test_get_empirical_formula(self):
# Test is based on Collagen Type IV a3 (https://pubchem.ncbi.nlm.nih.gov/compound/44511378)
dna1 = core.DnaSpeciesType(seq=Bio.Seq.Seq('TGTAATTATTATTCTAATTCTTATTCTTTTTGGTTAGCTTCTTTAAATCCTGAACGT', alphabet=Bio.Alphabet.DNAAlphabet()))
cell = dna1.cell = core.Cell()
cell.knowledge_base = core.KnowledgeBase(translation_table=1)
rna1 = core.RnaSpeciesType(dna=dna1, start=1, end=dna1.get_len(), strand=core.PolymerStrand.positive)
orf1 = core.OpenReadingFrameLocus(polymer=rna1, start=1, end=rna1.get_len())
prot1 = core.ProteinSpeciesType(orfs=[orf1])
cell1 = dna1.cell = core.Cell()
cell1.knowledge_base = core.KnowledgeBase(translation_table=1)

gene1 = core.GeneLocus(id='gene1', cell=cell1, polymer=dna1, start=1, end=dna1.get_len(), strand=core.PolymerStrand.positive)
tu1 = core.TranscriptionUnitLocus(id='tu1', gene=[gene1], polymer=dna1)
prot1 = core.ProteinSpeciesType(id='prot1', gene=gene1)
self.assertEqual(prot1.get_empirical_formula(), chem.EmpiricalFormula('C105H144N26O32S'))

# Test is based on Tuftsin (hhttps://pubchem.ncbi.nlm.nih.gov/compounds/156080)
dna2 = core.DnaSpeciesType(seq=Bio.Seq.Seq('ACTAAACCTCGT', alphabet=Bio.Alphabet.DNAAlphabet()))
cell = dna2.cell = core.Cell()
cell.knowledge_base = core.KnowledgeBase(translation_table=1)
rna2 = core.RnaSpeciesType(dna=dna2, start=1, end=dna2.get_len(), strand=core.PolymerStrand.positive)
orf2 = core.OpenReadingFrameLocus(polymer=rna2, start=1, end=rna2.get_len())
prot2 = core.ProteinSpeciesType(orfs=[orf2])
self.assertEqual(prot2.get_empirical_formula(), chem.EmpiricalFormula('C21H40N8O6'))

@unittest.skip('Work in progress')
dna1 = core.DnaSpeciesType(seq=Bio.Seq.Seq('ACTAAACCTCGT', alphabet=Bio.Alphabet.DNAAlphabet()))
cell1 = dna1.cell = core.Cell()
cell1.knowledge_base = core.KnowledgeBase(translation_table=1)

gene1 = core.GeneLocus(id='gene1', cell=cell1, polymer=dna1, start=1, end=dna1.get_len(), strand=core.PolymerStrand.positive)
tu1 = core.TranscriptionUnitLocus(id='tu1', gene=[gene1], polymer=dna1)
prot1 = core.ProteinSpeciesType(id='prot1', gene=gene1)
self.assertEqual(prot1.get_empirical_formula(), chem.EmpiricalFormula('C21H40N8O6'))

def test_get_mol_wt(self):
# Test is based on Collagen Type IV a3 (https://pubchem.ncbi.nlm.nih.gov/compound/44511378)
dna1 = core.DnaSpeciesType(seq=Bio.Seq.Seq(
'TGTAATTATTATTCTAATTCTTATTCTTTTTGGTTAGCTTCTTTAAATCCTGAACGT', alphabet=Bio.Alphabet.DNAAlphabet()))
cell = dna1.cell = core.Cell()
cell.knowledge_base = core.KnowledgeBase(translation_table=1)
rna1 = core.RnaSpeciesType(dna=dna1, start=1, end=dna1.get_len(), strand=core.PolymerStrand.positive)
orf1 = core.OpenReadingFrameLocus(polymer=rna1, start=1, end=rna1.get_len())
prot1 = core.ProteinSpeciesType(orfs=[orf1])
cell1 = dna1.cell = core.Cell()
cell1.knowledge_base = core.KnowledgeBase(translation_table=1)

gene1 = core.GeneLocus(id='gene1', cell=cell1, polymer=dna1, start=1, end=dna1.get_len(), strand=core.PolymerStrand.positive)
tu1 = core.TranscriptionUnitLocus(id='tu1', gene=[gene1], polymer=dna1)
prot1 = core.ProteinSpeciesType(id='prot1', gene=gene1)
self.assertAlmostEqual(prot1.get_mol_wt(), 2314.517)

# Test is based on Tuftsin (hhttps://pubchem.ncbi.nlm.nih.gov/compounds/156080)
dna2 = core.DnaSpeciesType(seq=Bio.Seq.Seq('ACTAAACCTCGT', alphabet=Bio.Alphabet.DNAAlphabet()))
cell = dna2.cell = core.Cell()
cell.knowledge_base = core.KnowledgeBase(translation_table=1)
rna2 = core.RnaSpeciesType(dna=dna2, start=1, end=dna2.get_len(), strand=core.PolymerStrand.positive)
orf2 = core.OpenReadingFrameLocus(polymer=rna2, start=1, end=rna2.get_len())
prot2 = core.ProteinSpeciesType(orfs=[orf2])
self.assertAlmostEqual(prot2.get_mol_wt(), 500.601)

@unittest.skip('Work in progress')
dna1 = core.DnaSpeciesType(seq=Bio.Seq.Seq('ACTAAACCTCGT', alphabet=Bio.Alphabet.DNAAlphabet()))
cell1 = dna1.cell = core.Cell()
cell1.knowledge_base = core.KnowledgeBase(translation_table=1)

gene1 = core.GeneLocus(id='gene1', cell=cell1, polymer=dna1, start=1, end=dna1.get_len(), strand=core.PolymerStrand.positive)
tu1 = core.TranscriptionUnitLocus(id='tu1', gene=[gene1], polymer=dna1)
prot1 = core.ProteinSpeciesType(id='prot1', gene=gene1)
self.assertAlmostEqual(prot1.get_mol_wt(), 500.601)

def test_get_charge(self):
# Test is based on Collagen Type IV a3 (https://pubchem.ncbi.nlm.nih.gov/compound/44511378)
dna1 = core.DnaSpeciesType(seq=Bio.Seq.Seq(
'TGTAATTATTATTCTAATTCTTATTCTTTTTGGTTAGCTTCTTTAAATCCTGAACGT', alphabet=Bio.Alphabet.DNAAlphabet()))
cell = dna1.cell = core.Cell()
cell.knowledge_base = core.KnowledgeBase(translation_table=1)
rna1 = core.RnaSpeciesType(dna=dna1, start=1, end=dna1.get_len(), strand=core.PolymerStrand.positive)
orf1 = core.OpenReadingFrameLocus(polymer=rna1, start=1, end=rna1.get_len())
prot1 = core.ProteinSpeciesType(orfs=[orf1])
cell1 = dna1.cell = core.Cell()
cell1.knowledge_base = core.KnowledgeBase(translation_table=1)

gene1 = core.GeneLocus(id='gene1', cell=cell1, polymer=dna1, start=1, end=dna1.get_len(), strand=core.PolymerStrand.positive)
tu1 = core.TranscriptionUnitLocus(id='tu1', gene=[gene1], polymer=dna1)
prot1 = core.ProteinSpeciesType(id='prot1', gene=gene1)
self.assertEqual(prot1.get_charge(), 0)

# Test is based on Tuftsin (hhttps://pubchem.ncbi.nlm.nih.gov/compounds/156080)
dna2 = core.DnaSpeciesType(seq=Bio.Seq.Seq('ACTAAACCTCGT', alphabet=Bio.Alphabet.DNAAlphabet()))
cell = dna2.cell = core.Cell()
cell.knowledge_base = core.KnowledgeBase(translation_table=1)
rna2 = core.RnaSpeciesType(dna=dna2, start=1, end=dna2.get_len(), strand=core.PolymerStrand.positive)
orf2 = core.OpenReadingFrameLocus(polymer=rna2, start=1, end=rna2.get_len())
prot2 = core.ProteinSpeciesType(orfs=[orf2])
self.assertEqual(prot2.get_charge(), 2)
dna1 = core.DnaSpeciesType(seq=Bio.Seq.Seq('ACTAAACCTCGT', alphabet=Bio.Alphabet.DNAAlphabet()))
cell1 = dna1.cell = core.Cell()
cell1.knowledge_base = core.KnowledgeBase(translation_table=1)

gene1 = core.GeneLocus(id='gene1', cell=cell1, polymer=dna1, start=1, end=dna1.get_len(), strand=core.PolymerStrand.positive)
tu1 = core.TranscriptionUnitLocus(id='tu1', gene=[gene1], polymer=dna1)
prot1 = core.ProteinSpeciesType(id='prot1', gene=gene1)
self.assertEqual(prot1.get_charge(), 2)


class PolymerLocusTestCase(unittest.TestCase):
Expand Down
2 changes: 1 addition & 1 deletion tests/test_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ def setUp(self):
dna.seq = Bio.Seq.Seq(seq)

for i_trn in range(5):
trn = dna.rnas.create(id='tu_{}_{}'.format(i_chr + 1, i_trn + 1), type=core.RnaType.mRna)
trn = dna.loci.create(id='tu_{}_{}'.format(i_chr + 1, i_trn + 1))
trn.start = random.randint(100, 200)
trn.end = ((trn.start + random.randint(1, 200) - 1) % seq_len) + 1
trn.strand = core.PolymerStrand.positive
Expand Down
86 changes: 43 additions & 43 deletions wc_kb/core.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
""" Schema to represent knowledge bases
""" Schema to represent a knowledge base to build models
:Author: Balazs Szigeti <balazs.szigeti@mssm.edu>
:Author: Jonathan Karr <jonrkarr@gmail.com>
Expand All @@ -20,7 +20,30 @@
import six


""" Base classes and enumeration classes """
""" Enumeration classes """
PolymerStrand = enum.Enum(value='PolymerStrand', names=[
('positive', 1),
('+', 1),
('negative', -1),
('-', -1),])

class RnaType(enum.Enum):
""" Type of RNA """
mRna = 0
rRna = 1
sRna = 2
tRna = 3
mixed = 4

class GeneType(enum.Enum):
""" Type of gene """
mRna = 0
rRna = 1
sRna = 2
tRna = 3


""" Base classes """
class KnowledgeBaseObject(obj_model.core.Model):
""" Knowlege of a biological entity
Expand All @@ -33,7 +56,6 @@ class KnowledgeBaseObject(obj_model.core.Model):
name = obj_model.core.StringAttribute()
comments = obj_model.core.StringAttribute()


class KnowledgeBase(KnowledgeBaseObject):
""" A knowledge base
Expand All @@ -51,31 +73,6 @@ class Meta(obj_model.core.Model.Meta):
attribute_order = ('id', 'name', 'version', 'translation_table', 'comments')
tabular_orientation = obj_model.core.TabularOrientation.column


PolymerStrand = enum.Enum(value='PolymerStrand', names=[
('positive', 1),
('+', 1),
('negative', -1),
('-', -1),])


class RnaType(enum.Enum):
""" Type of RNA """
mRna = 0
rRna = 1
sRna = 2
tRna = 3
mixed = 4


class GeneType(enum.Enum):
""" Type of gene """
mRna = 0
rRna = 1
sRna = 2
tRna = 3


class Cell(KnowledgeBaseObject):
""" Knowledge of a cell
Expand All @@ -95,8 +92,7 @@ class Meta(obj_model.core.Model.Meta):
tabular_orientation = obj_model.core.TabularOrientation.column

def get_species_types(self, cls=None):
""" Get the DNA species types
""" Get species of given type(s)
Args:
cls (:obj:`type` or :obj:`tuple` of :obj:`type`, optional): type(s) of species types;
if :obj:`None`, every species type will be returned
Expand All @@ -107,8 +103,21 @@ def get_species_types(self, cls=None):
if cls is None:
return self.species_types
else:
return filter(lambda species_type: isinstance(species_type, cls), self.species_types)
return list(filter(lambda species_type: isinstance(species_type, cls), self.species_types))

def get_locus_types(self, cls=None):
""" Get loci of given types(s)
Args:
cls (:obj:`type` or :obj:`tuple` of :obj:`type`, optional): type(s) of species types;
if :obj:`None`, every species type will be returned
Returns:
:obj:`list` of :obj:`PolymerLocus`: polymer locus
"""
if cls is None:
return self.species_types
else:
return list(filter(lambda loci: isinstance(loci, cls), self.loci))

class Compartment(KnowledgeBaseObject):
""" Knowledge of a subcellular compartment
Expand All @@ -126,7 +135,6 @@ class Compartment(KnowledgeBaseObject):
class Meta(obj_model.core.Model.Meta):
attribute_order = ('id', 'cell', 'name', 'volume', 'comments')


class SpeciesType(six.with_metaclass(obj_model.abstract.AbstractModelMeta, KnowledgeBaseObject)):
""" Knowledge of a molecular species
Expand Down Expand Up @@ -173,7 +181,6 @@ def get_mol_wt(self):
"""
pass


class PolymerSpeciesType(SpeciesType):
""" Knowledge of a polymer
Expand Down Expand Up @@ -246,7 +253,6 @@ def get_subseq(self, start, end, strand=PolymerStrand.positive):
else:
return pos_seq.reverse_complement()


class PolymerLocus(KnowledgeBaseObject):
""" Knowledge about a locus of a polymer
Expand Down Expand Up @@ -291,7 +297,7 @@ class DnaSpeciesType(PolymerSpeciesType):
seq (:obj:`Bio.Seq.Seq`): sequence
Related attributes:
transcription_units (:obj:`list` of :obj:`TranscriptionUnitLocus`): TUs
transcription_unit (:obj:`list` of :obj:`TranscriptionUnitLocus`): TUs
"""

seq = obj_model.extra_attributes.BioSeqAttribute(verbose_name='Sequence')
Expand Down Expand Up @@ -389,7 +395,6 @@ def get_mol_wt(self):
"""
return self.get_empirical_formula().get_molecular_weight()


class RnaSpeciesType(PolymerSpeciesType):
""" Knowledge of an RNA species
Expand Down Expand Up @@ -469,7 +474,6 @@ def get_mol_wt(self):
"""
return self.get_empirical_formula().get_molecular_weight()


class ProteinSpeciesType(PolymerSpeciesType):
""" Knowledge of a protein monomer
Expand All @@ -490,8 +494,8 @@ def get_seq(self):
Returns:
:obj:`Bio.Seq.Seq`: sequence
"""
orf = self.orfs[0]
return orf.get_seq().translate(orf.polymer.dna.cell.knowledge_base.translation_table)
trans_table = self.gene.transcription_unit[0].polymer.cell.knowledge_base.translation_table
return self.gene.get_seq().translate(trans_table)

def get_empirical_formula(self):
""" Get the empirical formula
Expand Down Expand Up @@ -576,7 +580,6 @@ def get_mol_wt(self):
"""
return self.get_empirical_formula().get_molecular_weight()


class MetaboliteSpeciesType(SpeciesType):
""" Knowledge of a metabolite
Expand Down Expand Up @@ -654,7 +657,6 @@ class PromoterLocus(PolymerLocus):
class Meta(obj_model.core.Model.Meta):
attribute_order = ('id', 'cell', 'polymer', 'name', 'pribnow_start', 'pribnow_end', 'comments')


class TranscriptionUnitLocus(PolymerLocus):
""" Knowledge about an open reading frame
Expand Down Expand Up @@ -691,7 +693,6 @@ def get_5_prime(self):
else:
return self.end


class GeneLocus(PolymerLocus):
""" Knowledge of a gene
Expand Down Expand Up @@ -774,7 +775,6 @@ def deserialize(cls, value, objects):
'No species type and compartment with primary attribute values "{}" and "{}"'.format(
species_type_id, compartment_id)]))


class Reaction(KnowledgeBaseObject):
""" Knowledge of reactions
Expand Down

0 comments on commit 8b0e033

Please sign in to comment.