diff --git a/tests/test_core.py b/tests/test_core.py index 9f81c83..c400e37 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -172,7 +172,8 @@ def test(self): self.tmp_dirname = tempfile.mkdtemp() filepath = os.path.join(self.tmp_dirname, 'test_seq.fasta') with open(filepath, 'w') as f: - f.write('>dna1\nACGTACGT\n') + f.write('>dna1\nACGTACGT\n' + '>dna2\nACGTACGTNNNN\n') dna = core.DnaSpeciesType(id='dna1', name='dna1', sequence_path=filepath, circular=False, double_stranded=False, ploidy=2) @@ -270,6 +271,19 @@ def test(self): self.assertAlmostEqual(dna.get_mol_wt(), exp_mol_wt, places=0) + # If there are N's in the DNA sequence + dna2 = core.DnaSpeciesType(id='dna2', sequence_path=filepath, + circular=False, double_stranded=True) + + L = dna2.get_len() + self.assertEqual(dna2.get_empirical_formula(), + chem.EmpiricalFormula('C10H12N5O6P') * 3 * 2 + + chem.EmpiricalFormula('C9H12N3O7P') * 3 * 2 + + chem.EmpiricalFormula('C10H12N5O7P') * 3 * 2 + + chem.EmpiricalFormula('C10H13N2O8P') * 3 * 2 + - chem.EmpiricalFormula('OH') * (L - 1) * 2 + ) + shutil.rmtree(self.tmp_dirname) diff --git a/wc_kb/core.py b/wc_kb/core.py index 56070d5..a8bb904 100644 --- a/wc_kb/core.py +++ b/wc_kb/core.py @@ -1594,16 +1594,25 @@ def get_empirical_formula(self): :math:`N_A * dAMP + N_C * dCMP + N_G * dGMP + N_T * dTMP - L * OH` + N's in the sequence will be distributed into the four bases by preserving the original ratio + Returns: :obj:`chem.EmpiricalFormula`: empirical formula """ seq = self.get_seq() - n_a = seq.count('A') - n_c = seq.count('C') - n_g = seq.count('G') - n_t = seq.count('T') - l = len(seq) - + n_a = seq.upper().count('A') + n_c = seq.upper().count('C') + n_g = seq.upper().count('G') + n_t = seq.upper().count('T') + n_n = seq.upper().count('N') + + l = len(seq) + known_bases = n_a + n_c + n_g + n_t + n_a += round(n_a / known_bases * n_n) + n_c += round(n_c / known_bases * n_n) + n_g += round(n_g / known_bases * n_n) + n_t = l - (n_a + n_c + n_g) + if self.double_stranded: n_a = n_a + n_t n_t = n_a