In [24]:
from collections import Counter

import numpy as np
from cogent3.app import io
from cogent3.evolve import distance
from cogent3.phylo import nj
from cogent3.evolve.models import JC69
from cogent3.evolve.fast_distance import ParalinearPair
from cogent3 import DNA

In [5]:
dstore = io.get_data_store("../data/horse_pig_bats-filtered.tinydb")
loader = io.load_db()
dstore.describe

record type,number
completed,878
incomplete,122
logs,1


In [7]:
trees = []
for aln_name in dstore:
    aln = loader(aln_name)
    d = distance.EstimateDistances(aln[2::3], submodel=JC69())
    d.run(show_progress=False)
    tree = nj.nj(d.get_pairwise_distances(), show_progress=False)
    tree = tree.root_at_midpoint()
    trees.append(tree)

In [8]:
len(trees)

878

In [11]:
ghb_siblings = Counter()
ordered_siblings = []
for tree in trees:
    tree = tree.unrooted()
    for sibling in tree.get_node_matching_name('Greater horseshoe bat').parent.children:
        if sibling.name in ('Microbat', 'Pig', 'Horse'):
            ghb_siblings[sibling.name] += 1
            ordered_siblings.append(sibling.name)

In [12]:
ghb_siblings

Counter({'Microbat': 638, 'Pig': 114, 'Horse': 126})

In [13]:
ghb_siblings['Microbat']/len(trees)

0.7266514806378133

In [25]:
trees = []
for aln_name in dstore:
    aln = loader(aln_name)
    d = ParalinearPair(moltype=DNA, alignment=aln[2::3])
    d.run(show_progress=False)
    if np.isnan(d.get_pairwise_distances()).any():
        continue
    tree = nj.nj(d.get_pairwise_distances(), show_progress=False)
    tree = tree.root_at_midpoint()
    trees.append(tree)

In [26]:
len(trees)

872

In [27]:
ghb_siblings = Counter()
ordered_siblings = []
for tree in trees:
    tree = tree.unrooted()
    for sibling in tree.get_node_matching_name('Greater horseshoe bat').parent.children:
        if sibling.name in ('Microbat', 'Pig', 'Horse'):
            ghb_siblings[sibling.name] += 1
            ordered_siblings.append(sibling.name)

In [28]:
ghb_siblings

Counter({'Microbat': 637, 'Pig': 116, 'Horse': 119})

In [30]:
ghb_siblings['Microbat']/878

0.7255125284738041