# Joint Ancestral State Reconstruction for the Aculeate Hymenoptera Silk Fibroins
This notebook fits a CNFGTR model that includes a gap state then performs joint ancestral state reconstruction using the algorithm in Pupko et al. (Mol. Biol. Evol. 17(6):890–896. 2000).
### Setup
Note that the cogent that is required here is from [a special PyCogent](https://github.com/BenKaehler/pycogent/tree/codon_gaps).

`gapped` is the Python module in this repo.

In [1]:
import cogent
from numpy import allclose

import gapped

### Load the alignment and the tree

In [2]:
aln = cogent.LoadSeqs('../results/aligned_dna.fasta', moltype=cogent.DNA)
tree = cogent.LoadTree('../data/tree.nwk')

In [3]:
print tree.asciiArt()

                                        /-AmelF2
                              /twoBeesF2
                    /threeBeesF2        \-AdorF2
                   |         |
          /rootF2--|          \-BterF2
         |         |
         |         |          /-OsmaF2
         |          \twoAntsF2
         |                    \-MforF2
         |
         |                              /-AmelF3
         |                    /twoBeesF3
         |          /threeBeesF3        \-AdorF3
         |         |         |
         |-rootF3--|          \-BterF3
         |         |
-root----|         |          /-OsmaF3
         |          \twoAntsF3
         |                    \-MforF3
         |
         |                                        /-AmelF1
         |                              /twoBeesF1
         |                    /threeBeesF1        \-AdorF1
         |                   |         |
         |          /rootF1--|          \-BterF1
         |         |         |
         |

### Fit CNFGTR with gaps

In [4]:
doc = {'aln' : str(aln), 'tree' : str(tree)}

In [5]:
cnfgtr_result = gapped.ml(doc, model='CNFGTR', model_gaps=True, omega_indep=False, indel_indep=False)

In [6]:
cnfgtr_result

{'gc': 'Standard Nuclear',
 'indel_indep': False,
 'lf': {'EN': {'AdorF1': 0.084610422698122231,
   'AdorF2': 0.10284210923051362,
   'AdorF3': 0.070887674377659554,
   'AdorF4': 0.064114585272755376,
   'AmelF1': 0.077731254245136577,
   'AmelF2': 0.068178047568438149,
   'AmelF3': 0.11176782658153327,
   'AmelF4': 0.096903348066619849,
   'BterF1': 0.27285797126168326,
   'BterF2': 0.44013850362981222,
   'BterF3': 0.47020768677609204,
   'BterF4': 1.0002048983767631,
   'MforF1': 1.027998843134216,
   'MforF2': 0.72270893316495544,
   'MforF3': 0.62603668314975258,
   'MforF4': 0.73312605893240823,
   'OsmaF1': 0.29916015094249315,
   'OsmaF2': 0.73158821179844091,
   'OsmaF3': 0.60497233657212324,
   'OsmaF4': 0.92607426120785885,
   'rootF1': 0.67023525251193894,
   'rootF1F4': 0.20576537998762029,
   'rootF2': 0.59517840445787074,
   'rootF3': 0.88487988980347965,
   'rootF4': 0.37499999687030805,
   'threeBeesF1': 1.7510150581038295,
   'threeBeesF2': 1.3528631021156061,
   'thr

In [7]:
model = lambda: gapped.CNFGTR(optimise_motif_probs=True, model_gaps=True)
cnfgtr = gapped.inflate_likelihood_function(cnfgtr_result['lf'], model)

  is_independent)
  is_independent)
  is_independent)
  is_independent)
  is_independent)
  is_independent)
  is_independent)


### Write out the CNFGTR tree for plotting using another application

In [8]:
for node in cnfgtr.tree.getEdgeVector(include_root=False):
    node.Length = cnfgtr.getParamValue('length', edge=node.Name)
cnfgtr.tree.writeToFile('../results/cnfgtr.nwk',with_distances=True)

In [9]:
print cnfgtr

Likelihood Function Table
   A/C       A/G       A/T       C/G       C/T     indel     omega
------------------------------------------------------------------
0.8673    2.3580    1.1493    0.8258    1.9826    0.1308    0.3365
------------------------------------------------------------------
       edge         parent    length
------------------------------------
     AmelF2      twoBeesF2    0.0682
     AdorF2      twoBeesF2    0.1028
  twoBeesF2    threeBeesF2    0.5313
     BterF2    threeBeesF2    0.4401
threeBeesF2         rootF2    1.3529
     OsmaF2      twoAntsF2    0.7316
     MforF2      twoAntsF2    0.7227
  twoAntsF2         rootF2    1.8245
     rootF2           root    0.5952
     AmelF3      twoBeesF3    0.1118
     AdorF3      twoBeesF3    0.0709
  twoBeesF3    threeBeesF3    0.5863
     BterF3    threeBeesF3    0.4702
threeBeesF3         rootF3    0.9185
     OsmaF3      twoAntsF3    0.6050
     MforF3      twoAntsF3    0.6260
  twoAntsF3         rootF3    1.4745
   

### Perform the joint ancestral state reconstruction

In [10]:
cnfgtr.setAlignment(aln)
anc_aln = gapped.joint(cnfgtr)

### Write out the ancestral state reconstructions as an alignment

In [11]:
anc_aln.writeToFile('../results/joint.fasta')