In [244]:
import pandas as pd
import tskit as tsk
from IPython.display import Markdown, display

%run -i "../isRecipMonophyletic.py"

ts = tsk.load("../../data/sim/IM_m0_T1_chr1_1e4.trees")


In [245]:
pop_by_node = pd.DataFrame({
    "pop": [ts.tables.nodes[leaf].population for leaf in ts.samples()], # Get the population of each leaf
})

In [246]:
# Create standalone pandas DataFrame for easier manipulation of data than tskit's tables...

pd_sequence = pd.DataFrame(
    {
        'span': [tree.span for tree in ts.trees()],
        'bounds': [(tree.interval.left, tree.interval.right) for tree in ts.trees()],
        'treeObj' : ts.aslist()
    },
    index=[tree.index for tree in ts.trees()]
)

In [247]:
# Runs the reciprocally monophyletic test on each tree and saves the boolean result in the monophyletic column

pd_sequence['monophyletic'] = pd_sequence.apply(lambda x: isRecipMonophyletic(x.treeObj, pop_by_node), axis=1)

In [251]:
percentage = pd_sequence[pd_sequence['monophyletic'] == True]['span'].sum() / pd_sequence['span'].sum() # Calculate the percentage of the sequence that is monophyletic

# Fancy display of the result
display(Markdown("".join(("# Result : the sequence is monophyletic in **", str(percentage*100), " %** of the sequence."))))

# Result : the sequence is monophyletic in **9.013 %** of the sequence.