In [17]:
import json

import cogent
from numpy import allclose

import gapped

In [26]:
treestring = '((AmelF2,AdorF1)twoBeesF2,BterF3,OsmaF4);'

In [27]:
tree = cogent.LoadTree(treestring=treestring)
aln = cogent.LoadSeqs('../data/alternate.fasta', moltype=cogent.DNA)
aln = aln.takeSeqs(tree.getTipNames())

In [28]:
print tree.asciiArt()

                    /-AmelF2
          /twoBeesF2
         |          \-AdorF1
-root----|
         |--BterF3
         |
          \-OsmaF4


In [29]:
doc = {'aln' : str(aln), 'tree' : str(tree)}

In [30]:
cnfgtr_result = gapped.ml(doc, model='CNFGTR', model_gaps=True, omega_indep=False, indel_indep=False)

In [31]:
with open('small_cnfgtr.json', 'w') as lf_out:
    json.dump(cnfgtr_result['lf'], lf_out)
aln.writeToFile('small_aln.fasta')

In [21]:
model = lambda: gapped.CNFGTR(optimise_motif_probs=True, model_gaps=True)
cnfgtr = gapped.inflate_likelihood_function(cnfgtr_result['lf'], model)

  is_independent)
  is_independent)
  is_independent)
  is_independent)
  is_independent)


In [23]:
from numpy.testing import assert_array_less

{'AdorF2': 'ATGAAGATTCCAGCAATA---TTCGTCACGTCTCTGCTG---GTCTGG---GGATTGGCCGAGGGCCGCGTGATT---AAT------------CAGTCCCTGAAGTCGAACGTGGATATTCAAGGA---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------GCAGCAGGACAAGTCGGCGATGGATCTTACGTGTTTGGCTCCTCCATAGAAAACGCCCTAAAAGTT------GCTCGAGCGTCTGAGAACGTGGGCCTGAATCTGGAA------------------------TTGAACGCAGGCGCGCGTGCTGCCAGTGTTGCCGCTGCTGCCCAGGCCAAGAATACAGAGGCTGCGGAAGCAGGAGCAAACGCCGCTCTGGCTGCCGCCATTGCCAAACGGGAGGAAGCGATTAAAGCCAGCGAGATAGCAAGCCAGTTATTGACCAATGCAGCAAAGGCGGCAGAGGCAACTGTATCGGCAACGAAGAGGGCAGCACAATTGACGGCTGCGGCGAAAGAAGCAACCAGAGCTTCTGCAGCTGCTGCTGAAGCTGCCACGGAGGCCCAGGTAAAGGCTAACGCCGATTCGATCATCACGAAACGGGCTGCGATTGCCGAGGCTCAAGCTGCAGCGGAAGCTCAAGTTAAGGCAGCAATCGCC------AGGAAATCGGCAGCGAACTTTTTGGCTAAGGCTCAAGTAGCAGCTGCCACGGAATCCGAGGCCACAAAACTCGCGGCCGAAGCCGCAGTGGCATTAACAAACGCAGAAGTCGCCGTGAACCAAGCTAGGAATGCACAGGCAACCGCCTCGACTCAAG

In [9]:
for node in cnfgtr.tree.getEdgeVector(include_root=False):
    node.Length = cnfgtr.getParamValue('length', edge=node.Name)
cnfgtr.tree.writeToFile('../results/cnfgtr.nwk',with_distances=True)

In [10]:
print cnfgtr

Likelihood Function Table
   A/C       A/G       A/T       C/G       C/T     indel     omega
------------------------------------------------------------------
0.8733    2.3427    1.1439    0.8136    1.8667    0.1366    0.3440
------------------------------------------------------------------
       edge         parent    length
------------------------------------
     AmelF2      twoBeesF2    0.0865
     AdorF2      twoBeesF2    0.0925
  twoBeesF2    threeBeesF2    0.5586
     BterF2    threeBeesF2    0.4722
threeBeesF2         rootF2    1.2950
     OsmaF2      twoAntsF2    0.7036
     MforF2      twoAntsF2    0.7331
  twoAntsF2         rootF2    2.2062
     rootF2           root    0.3746
     AmelF3      twoBeesF3    0.1163
     AdorF3      twoBeesF3    0.0753
  twoBeesF3    threeBeesF3    0.5753
     BterF3    threeBeesF3    0.5324
threeBeesF3         rootF3    0.6267
     OsmaF3      twoAntsF3    0.6617
     MforF3      twoAntsF3    0.5992
  twoAntsF3         rootF3    1.8686
   

In [11]:
raw_length = len(aln)
aln = aln.withoutTerminalStopCodons()
filt = lambda x: set(''.join(x)) <= set(cogent.DNA).union({'-'})
aln = aln.filtered(filt, motif_length=3)
assert len(aln) == raw_length
cnfgtr.setAlignment(aln)

In [12]:
anc_aln = gapped.joint(cnfgtr)

In [13]:
anc_aln.writeToFile('../results/joint.fasta')

In [14]:
pi = cnfgtr.getMotifProbsByNode()['root'].asarray()
P = cnfgtr.getPsubForEdge('rootF1')
assert allclose(pi.dot(P), pi)

In [15]:
omega = cnfgtr.getParamValue('omega')
pi = cnfgtr.getMotifProbs()
Q = cnfgtr.getRateMatrixForEdge('rootF1')
cond_p = pi['CCG'] / sum(pi['CC'+c] for c in 'ACGT')
ref_cell = Q['CCT']['CCG']/cond_p
cond_p = pi['CCC'] / sum(pi['CC'+c] for c in 'ACGT')
assert allclose(Q['CCA']['CCC']/cond_p/ref_cell, cnfgtr.getParamValue('A/C'))
assert allclose(Q['---']['CCC']/pi['CCC']/ref_cell, cnfgtr.getParamValue('indel'))
R = Q.asarray()/pi.asarray()
assert allclose(R.T, R)