# `pulsar`
The

Strand Symmetric Spectral

Method

In [1]:
from itertools import combinations
from collections import Counter, defaultdict

from cogent3 import load_aligned_seqs, PhyloNode
import numpy as np
import networkx as nx
from sklearn.cluster import SpectralClustering

In [2]:
import tensorflow as tf
import tensorflow_probability as tfp
tfd = tfp.distributions
tfb = tfp.bijectors

tf.executing_eagerly()  # need to check whether this is the default for tensorflow > 2

2021-11-12 06:56:11.503331: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcudart.so.10.1


True

In [3]:
# this stops tensorflow from snaffling all of the GPU
# thanks https://stackoverflow.com/questions/34199233/how-to-prevent-tensorflow-from-allocating-the-totality-of-a-gpu-memory
gpus = tf.config.experimental.list_physical_devices('GPU')
for gpu in gpus:
  tf.config.experimental.set_memory_growth(gpu, True)

2021-11-12 06:56:14.498878: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcuda.so.1
2021-11-12 06:56:14.556556: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2021-11-12 06:56:14.556819: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1716] Found device 0 with properties: 
pciBusID: 0000:01:00.0 name: NVIDIA GeForce RTX 2080 with Max-Q Design computeCapability: 7.5
coreClock: 1.095GHz coreCount: 46 deviceMemorySize: 7.79GiB deviceMemoryBandwidth: 357.69GiB/s
2021-11-12 06:56:14.556842: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcudart.so.10.1
2021-11-12 06:56:14.558044: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcublas.so.10
2021-11-12 06:56:14.559138: I tensorflow/s

## Data import
Reads an alignments and creates a list of 4 x 4 x 4 joint frequencies tensors.

In [4]:
def get_triples(aln, nuc_order='ACGT', codon_position=None, verbose=False):
    if codon_position:
        aln = aln[codon_position - 1::3]
    aln = aln.no_degenerates()
    if verbose:
        print(f'Got {len(aln)} positions')
    assert len(aln) <= np.iinfo(np.int32).max
    triples = []
    nuc_map = {n:i for i, n in enumerate(nuc_order)}
    for triple in combinations(range(aln.num_seqs), 3):
        F = np.zeros([4, 4, 4], dtype=np.int32)
        subaln = aln.get_sub_alignment(seqs=triple)
        for a, b, c in subaln:
            F[nuc_map[a], nuc_map[b], nuc_map[c]] += 1
        triples.append([tuple(subaln.names), F])
    return triples

## Triple fitting functions
Collection of functions for concurrent fitting of many triples on CPUs and GPUs. Model is rooted, continuous-time, and strand-symmetric.

Also some functions for using Akaike-ish weights to build Semple and Steel-ish graphs.

In [10]:
@tf.function()
def transform_P_matrix(params, t_param):
    params = tf.exp(params)
    t = tf.exp(t_param)
    Q0 = tf.concat([[-tf.reduce_sum(params[0])], params[0]],
                   axis=0)
    Q1 = tf.concat([[params[1,0]], [-tf.reduce_sum(params[1])], params[1,1:]],
                   axis=0)
    Q2 = tf.concat([params[2,0:2], [-tf.reduce_sum(params[2])], [params[2,2]]],
                   axis=0)
    Q3 = tf.concat([params[3], [-tf.reduce_sum(params[3])]],
                   axis=0)
    Q = tf.concat([[Q0], [Q1], [Q2], [Q3]], axis=0)
    return tf.linalg.expm(Q)

@tf.function()
def transform(params):
    zero = tf.constant(0, dtype=tf.float32)
    pi = tfb.SoftmaxCentered()(params[0])
    Pa = transform_P_matrix(params[1:5], params[5,0])
    Pm = transform_P_matrix(params[1:5], zero)
    Pb = transform_P_matrix(params[1:5], params[5,1])
    Pc = transform_P_matrix(params[1:5], params[5,2])
    return pi, Pa, Pm, Pb, Pc
    
@tf.function()
def _loss(params_data):
    params, data = params_data
    pi, Pa, Pm, Pb, Pc = transform(params)
    J = tf.einsum('i,ij,ik,ku,kv', pi, Pa, Pm, Pb, Pc)
    loss = tf.reduce_sum(tf.keras.losses.KLDivergence()(J, data))
    return loss
    
@tf.function()
def loss(params, data):
    return tf.reduce_sum(tf.vectorized_map(_loss, (params, data)))

@tf.function()
def training_step(parameters, data, optimizer):
    with tf.GradientTape() as tape:
        loss_value = loss(parameters, data)
    gradients = tape.gradient(loss_value, parameters)
    return loss_value, gradients

# thanks https://github.com/mlgxmez/thelongrun_notebooks/blob/master/MLE_tutorial.ipynb
def mle_fit(data, loss, parameters, optimizer, steps=500, verbose=False):
    for i in range(steps):
        loss_value, gradients = training_step(parameters, data, optimizer)
        optimizer.apply_gradients([(gradients, parameters)])
        
        if i % 100 == 0:
            if verbose:
                iter_info = f"Step: {optimizer.iterations.numpy()}, initial loss: {loss_value.numpy()}"
                print(iter_info)

def fit_triples(triples, learning_rate=0.01, steps=3000, verbose=False):
    K = 3*len(triples)
    cherry_loc = {}
    data = []
    for names, F in triples:
        J = (F/F.sum()).astype(np.float32)
        for ix in [[0, 1, 2], [1, 2, 0], [2, 0, 1]]:
            data.append(J.transpose(ix))

    normal_initializer = tf.random_normal_initializer()
    parameters = tf.Variable(normal_initializer(shape=[K, 6, 3], dtype=tf.float32), name='params')
    data = tf.constant(data)
    
    if verbose:
        print(f'Fitting {data.shape[0]} triples')
    optimizer = tf.optimizers.Adam(learning_rate=learning_rate)
    mle_fit(data, loss, parameters, optimizer, steps=steps, verbose=verbose)
    
    losses = tf.vectorized_map(_loss, (parameters, data)).numpy()
    losses = [losses[i:i+3] for i in range(0, len(losses), 3)]
    return losses

def cherry_weights(ls, N):
    ls = N*ls
    delta = ls - ls.min()
    weights = np.exp(-delta)
    weights = weights/weights.sum()
#    return weights
    h = (-weights*np.log(weights))[weights != 0.].sum()
#     print(ls)
#     print(weights)
#     print((ls == ls.min())/(h + 1))
#     print()
#     return (ls == ls.min())/(h + 1)
    return weights/(h + 1)  # this was my favourite
    if h < 1:
        return weights
#        return ls == ls.min()
    return np.zeros(3, dtype=bool)

def get_edges(triples, losses):
    edges = Counter()
    for losses, (names, F) in zip(losses, triples):
        weights = cherry_weights(losses, F.sum())
        for name, weight in zip(names, weights):
            edges[frozenset(names) - {name}] += weight
    # for edge in edges:
    #   edges[edge] = np.exp(edges[edge])
    return edges

def get_Ps(cherry_names, triples, fits):
    Ps = {}
    ixes = [[0, 1, 2], [1, 2, 0], [2, 0, 1]]
    for (names, _), triple_fit in zip(triples, fits):
        if set(cherry_names) < set(names):
            for name, ix, fit in zip(names, ixes, triple_fit):
                if name not in cherry_names:
                    if names[ix[1]] == cherry_names[0]:
                        return fit[-2], fit[-1]
                    return fit[-1], fit[-2]

## Tree building algorithm
Where the magic happens.

In [11]:
def pulsar(triples, learning_rate=0.01, steps=3000, verbose=False):
    losses = fit_triples(triples, learning_rate=learning_rate,
                         steps=steps, verbose=verbose)
    tree = pulsar_tree(triples, losses, verbose=verbose)
    return tree

def edges_to_graph(edges):
    G = nx.Graph()
    for edge, weight in edges.items():
        G.add_edge(*edge, weight=weight)
    return G

def normalised_cut(edges, verbose=False):
    tips = np.unique([t for p in edges.keys() for t in p])
    affinity = np.zeros((len(tips),)*2)
    for i, tipi in enumerate(tips):
        for j, tipj in enumerate(tips):
            if i == j:
                break
            affinity[i, j] = edges[frozenset((tipi, tipj))]
    affinity += affinity.T
    sc = SpectralClustering(2, affinity='precomputed',  # random_state=0,
                            assign_labels='discretize')
    ix = sc.fit_predict(affinity).astype(bool)
    partition = list(tips[ix]), list(tips[np.logical_not(ix)])
    if verbose:
        G = edges_to_graph(edges)
        cut_value = nx.cut_size(G, partition[0], weight='weight')
        print(f'Cut value: {cut_value}, Partition:\n{partition}')
    return partition
    
def min_cut(edges, verbose=False):
    G = edges_to_graph(edges)
    cut_value, partition = nx.stoer_wagner(G)
    if verbose:
        print(f'Cut value: {cut_value}, Partition:\n{partition}')
    return partition

def pulsar_tree(triples, losses, verbose=False):
    edges = get_edges(triples, losses)
    if verbose:
        print('Graph:')
        for edge, weight in edges.items():
            print(edge, weight)
    partition = normalised_cut(edges, verbose)
    assert len(partition) == 2, 'polytomy detected. bailing'
    this_node = PhyloNode()
    for part in partition:
        if len(part) <= 1:
            this_node.append(PhyloNode(part.pop()))
            continue
        elif len(part) == 2:
            child = PhyloNode()
            for grandchild in part:
                child.append(PhyloNode(grandchild))
            this_node.append(child)
            continue
    
        part = set(part)
        part_losses = []
        part_triples = []
        for losses_for_names, (names, F) in zip(losses, triples):
            if set(names) <= part:
                part_losses.append(losses_for_names)
                part_triples.append((names, F))
        this_node.append(pulsar_tree(part_triples, part_losses, verbose=verbose))
    return this_node

# Some examples
## Example 1
Fit a rooted phylogeny of 5 mammals.

In [12]:
# aln = load_aligned_seqs('/home/ben/Data/pentads/ENSG00000197102.fa.gz', moltype="dna")
aln = load_aligned_seqs('/home/ben/Data/pentads/ENSG00000131018.fa.gz', moltype="dna")
# aln = load_aligned_seqs('/home/ben/Data/pentads/ENSG00000179869.fa.gz', moltype="dna")
# aln = load_aligned_seqs('brca1.fasta', moltype='dna')

In [30]:
subaln = aln.take_seqs(['SpermWhale', 'HumpbackW', 'Rhino'])
subaln

0,1
,0
Rhino,TGTGGCACGAATACTCATGCCAGCTCATTGCAGCATGAGAACAGCAGTGTATTACTCACT
SpermWhale,........AG...................A........A.........T...........
HumpbackW,........AG...................A..A.....A.........T...........


In [27]:
subaln = aln.get_similar(aln.take_seqs(['Human']).seqs[0], min_similarity=0.82)
#                         min_similarity=0.84)
subaln

0,1
,0
Dog,TGTGGCACAAATACTCATGCCAGCTCATTACAGCATGAGAACAGCAGTTTATTACTCACT
FreeTaile,.........G.........................................C........
LittleBro,.........G.........................................C.......C
LeafNose,..........................T.....TT.....C....................
Horse,.............................G..............................
Rhino,........G....................G..................G...........
Pangolin,............................................................
Cat,....C...........G...........................................
Llama,.........G..................................................


### All at once
First run `pulsar` all the way through.

In [13]:
%%time
triples = get_triples(aln, codon_position=2, verbose=True)
tree = pulsar(triples, verbose=True)
print(tree.ascii_art())

Got 1379 positions
Fitting 30 triples


2021-11-12 06:59:18.719070: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcublas.so.10
2021-11-12 06:59:18.869455: I tensorflow/core/kernels/cuda_solvers.cc:180] Creating CudaSolver handles for stream 0x55b40a1ef020
2021-11-12 06:59:18.869588: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcusolver.so.10
2021-11-12 06:59:19.035456: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcublas.so.10


Step: 1, initial loss: 9.560134887695312
Step: 101, initial loss: 4.206894874572754
Step: 201, initial loss: 2.543050765991211
Step: 301, initial loss: 2.3533973693847656
Step: 401, initial loss: 2.2965641021728516
Step: 501, initial loss: 2.2717273235321045
Step: 601, initial loss: 2.258497714996338
Step: 701, initial loss: 2.2505571842193604
Step: 801, initial loss: 2.2454094886779785
Step: 901, initial loss: 2.2418975830078125
Step: 1001, initial loss: 2.23939847946167
Step: 1101, initial loss: 2.2375776767730713
Step: 1201, initial loss: 2.23620867729187
Step: 1301, initial loss: 2.235171318054199
Step: 1401, initial loss: 2.234363555908203
Step: 1501, initial loss: 2.233720541000366
Step: 1601, initial loss: 2.2332069873809814
Step: 1701, initial loss: 2.2327933311462402
Step: 1801, initial loss: 2.232449531555176
Step: 1901, initial loss: 2.232171058654785
Step: 2001, initial loss: 2.2319326400756836
Step: 2101, initial loss: 2.231731414794922
Step: 2201, initial loss: 2.23154687

### Min Cut
```
          /-Orangutan
---------|
         |          /-HairyArma
          \--------|
                   |          /-Sloth
                    \--------|
                             |          /-Pangolin
                              \--------|
                                       |          /-Chimpanzee
                                        \--------|
                                                 |          /-Gorilla
                                                  \--------|
                                                           |          /-FlyingLem
                                                            \--------|
                                                                     |          /-Rhesus
                                                                      \--------|
                                                                               |          /-Human
                                                                                \--------|
                                                                                         |          /-Llama
                                                                                          \--------|
                                                                                                   |          /-HowlerMon
                                                                                                    \--------|
                                                                                                             |          /-HumpbackW
                                                                                                              \--------|
                                                                                                                       |          /-Horse
                                                                                                                        \--------|
                                                                                                                                 |          /-Rhino
                                                                                                                                  \--------|
                                                                                                                                            \-SpermWhale
```

### Normalised Cut
```
                              /-Pangolin
                    /--------|
                   |         |          /-Rhino
                   |          \--------|
                   |                   |          /-HumpbackW
          /--------|                    \--------|
         |         |                              \-SpermWhale
         |         |
         |         |          /-Horse
         |          \--------|
         |                    \-Llama
---------|
         |                    /-HairyArma
         |          /--------|
         |         |         |          /-FlyingLem
         |         |          \--------|
         |         |                    \-Sloth
          \--------|
                   |                    /-Gorilla
                   |          /--------|
                   |         |          \-HowlerMon
                   |         |
                    \--------|                    /-Human
                             |          /--------|
                             |         |          \-Orangutan
                              \--------|
                                       |          /-Chimpanzee
                                        \--------|
                                                  \-Rhesus
```

### More Tips
```
                              /-LittleBro
                    /--------|
                   |         |          /-FreeTaile
                   |          \--------|
                   |                    \-LeafNose
                   |
                   |                              /-Hippo
          /--------|                    /--------|
         |         |                   |          \-Pangolin
         |         |          /--------|
         |         |         |         |          /-Pig
         |         |         |          \--------|
         |         |         |                   |          /-HumpbackW
         |         |         |                    \--------|
         |          \--------|                              \-SpermWhale
         |                   |
         |                   |                    /-Cat
         |                   |          /--------|
         |                   |         |          \-Rhino
         |                    \--------|
         |                             |                    /-Horse
         |                             |          /--------|
         |                              \--------|          \-Llama
         |                                       |
         |                                        \-Dog
---------|
         |                                        /-Mole
         |                              /--------|
         |                             |         |          /-FlyingSqu
         |                             |          \--------|
         |                    /--------|                    \-Gorilla
         |                   |         |
         |                   |         |          /-FlyingLem
         |                   |          \--------|
         |                   |                    \-Galago
         |          /--------|
         |         |         |                    /-Orangutan
         |         |         |          /--------|
         |         |         |         |         |          /-Chimpanzee
         |         |         |         |          \--------|
         |         |          \--------|                    \-Human
         |         |                   |
         |         |                   |          /-HowlerMon
         |         |                    \--------|
          \--------|                              \-Rhesus
                   |
                   |                              /-AfricanEl
                   |                    /--------|
                   |                   |         |          /-Dugong
                   |                   |          \--------|
                   |          /--------|                    \-Manatee
                   |         |         |
                   |         |         |          /-Aardvark
                    \--------|          \--------|
                             |                    \-AsianElep
                             |
                             |          /-NineBande
                              \--------|
                                       |          /-Anteater
                                        \--------|
                                                 |          /-HairyArma
                                                  \--------|
                                                            \-Sloth
CPU times: user 10h 25min 10s, sys: 45min 20s, total: 11h 10min 31s
Wall time: 5h 45min 42s
```

## Example 2
### A single iteration
Now run through a single iteration of the algorithm.
#### Fit triples
Fits rooted, strand-symmetric, continuous-time models to every taxon triple.

In [15]:
triples = get_triples(aln, codon_position=2, verbose=True)
losses = fit_triples(triples, verbose=True, steps=1000)

Got 1379 positions
Fitting 30 triples
Step: 1, initial loss: 9.558014869689941
Step: 101, initial loss: 7.475912094116211
Step: 201, initial loss: 4.035521030426025
Step: 301, initial loss: 2.48883056640625
Step: 401, initial loss: 2.0289595127105713
Step: 501, initial loss: 1.8453056812286377
Step: 601, initial loss: 1.7498201131820679
Step: 701, initial loss: 1.6919828653335571
Step: 801, initial loss: 1.6521508693695068
Step: 901, initial loss: 1.6159394979476929


#### Create $S_\mathcal{T}\left/E^\text{max}_\mathcal{T}\right.$
Creates the edges in Semple and Steel's $S_\mathcal{T}\left/E^\text{max}_\mathcal{T}\right.$ graph, at least as I understand it.

In [16]:
edges = get_edges(triples, losses)
edges

Counter({frozenset({'Human', 'Mouse'}): 2.364182233810425,
         frozenset({'Dog', 'Mouse'}): 0.22597325593233109,
         frozenset({'Dog', 'Human'}): 0.0945690888513866,
         frozenset({'Human', 'Opossum'}): 2.2734654247760773,
         frozenset({'Dog', 'Opossum'}): 0.026277747842121002,
         frozenset({'Human', 'Platypus'}): 1.008799790404737,
         frozenset({'Dog', 'Platypus'}): 0.8131145063358076,
         frozenset({'Mouse', 'Opossum'}): 1.1069583594799042,
         frozenset({'Mouse', 'Platypus'}): 0.18988226354122162,
         frozenset({'Opossum', 'Platypus'}): 1.8967775851488113})

#### Find the root by partitioning on the minimum cut
Perform the minimum cut to partition our tip set into two, one either side of the root.

In [17]:
G = nx.Graph()
for edge, weight in edges.items():
    G.add_edge(*edge, weight=weight)
cut_value, partition = nx.stoer_wagner(G)
print(f'Cut value: {cut_value}, Partition:\n{partition}')

Cut value: 1.1599345989616463, Partition:
(['Dog'], ['Mouse', 'Platypus', 'Human', 'Opossum'])


In [18]:
from sklearn.cluster import SpectralClustering

tips = list(set(t for p in edges.keys() for t in p))
affinity = np.zeros((len(tips),)*2)
for i, tipi in enumerate(tips):
    for j, tipj in enumerate(tips):
        if i == j:
            break
        affinity[i, j] = edges[frozenset((tipi, tipj))]
affinity += affinity.T
affinity
sc = SpectralClustering(2, affinity='precomputed',
                        assign_labels='discretize')
print
print(sc.fit_predict(affinity))
print(tips)

[0 0 1 1 0]
['Human', 'Opossum', 'Platypus', 'Dog', 'Mouse']


The rest is left as an exercise for the reader (or just look in `pulsar` above) - the algorithm continues recursively.

In [37]:
w = np.ones(3)/3
(-w*np.log(w)).sum()

1.0986122886681096

## Example 3
### Let's get systematic
For future reference: joblib and tensorflow do not play nice together.

In [14]:
from cogent3.app import io

In [15]:
dstore = io.get_data_store("../data/horse_pig_bats-filtered.tinydb")
loader = io.load_db()
dstore.describe

record type,number
completed,878
incomplete,122
logs,1


In [16]:
num_alns = len(dstore)
all_triples = []
for aln_name in dstore:
    aln = loader(aln_name)
    triples = get_triples(aln, codon_position=2, verbose=False)
    all_triples.extend(triples)
losses = fit_triples(all_triples, steps=4000, verbose=True)

Fitting 10536 triples




Step: 1, initial loss: 5544.88623046875
Step: 101, initial loss: 2365.966796875
Step: 201, initial loss: 1211.08447265625
Step: 301, initial loss: 1039.599609375
Step: 401, initial loss: 980.8960571289062
Step: 501, initial loss: 953.0887451171875
Step: 601, initial loss: 937.4171142578125
Step: 701, initial loss: 927.705322265625
Step: 801, initial loss: 921.2186889648438
Step: 901, initial loss: 916.57958984375
Step: 1001, initial loss: 913.166748046875
Step: 1101, initial loss: 910.42578125
Step: 1201, initial loss: 908.3472290039062
Step: 1301, initial loss: 906.527099609375
Step: 1401, initial loss: 905.133056640625
Step: 1501, initial loss: 904.0201416015625
Step: 1601, initial loss: 903.0006103515625
Step: 1701, initial loss: 902.0267333984375
Step: 1801, initial loss: 901.1062622070312
Step: 1901, initial loss: 900.1434326171875
Step: 2001, initial loss: 899.4541625976562
Step: 2101, initial loss: 898.8173217773438
Step: 2201, initial loss: 898.2379150390625
Step: 2301, initial

In [17]:
trees = []
for i in range(0, len(losses), 4):
    tree = pulsar_tree(all_triples[i:i+4], losses[i:i+4])
    trees.append(tree)



In [18]:
ghb_siblings = Counter()
for tree in trees:
    tree = tree.unrooted()
    for sibling in tree.get_node_matching_name('Greater horseshoe bat').parent.children:
        if sibling.name in ('Microbat', 'Pig', 'Horse'):
            ghb_siblings[sibling.name] += 1

In [19]:
ghb_siblings

Counter({'Horse': 277, 'Microbat': 326, 'Pig': 275})

In [20]:
assert sum(ghb_siblings.values()) == len(trees)
1 - ghb_siblings['Microbat']/len(trees)

0.6287015945330297

In [122]:
print(tree.ascii_art())

                    /-Horse
          /--------|
         |         |          /-Microbat
---------|          \--------|
         |                    \-Pig
         |
          \-Greater horseshoe bat


In [235]:
pulsar_tree(all_triples[0:4], losses[0:4], verbose=True)

Graph:
frozenset({'Pig', 'Microbat'}) 0.6001753807067871
frozenset({'Pig', 'Greater horseshoe bat'}) 0.6071698367595673
frozenset({'Greater horseshoe bat', 'Microbat'}) 1.024835467338562
frozenset({'Horse', 'Microbat'}) 1.0389626026153564
frozenset({'Horse', 'Greater horseshoe bat'}) 0.3026467395318093
frozenset({'Pig', 'Horse'}) 0.6000426709651947
Cut value: 2.527700258542353, Partition:
(['Greater horseshoe bat', 'Pig'], ['Horse', 'Microbat'])


Tree("((Greater_horseshoe_bat,Pig),(Horse,Microbat));")

In [237]:
pulsar_tree(all_triples[0:4], losses[0:4], verbose=True)

Graph:
frozenset({'Pig', 'Microbat'}) 0.6592081189155579
frozenset({'Pig', 'Greater horseshoe bat'}) 0.6670434474945068
frozenset({'Greater horseshoe bat', 'Microbat'}) 0.8335337042808533
frozenset({'Horse', 'Microbat'}) 0.8486621677875519
frozenset({'Horse', 'Greater horseshoe bat'}) 0.33249066902624236
frozenset({'Pig', 'Horse'}) 0.6590619087219238
Cut value: 2.4842944009445773, Partition:
(['Greater horseshoe bat', 'Pig'], ['Horse', 'Microbat'])


Tree("((Greater_horseshoe_bat,Pig),(Horse,Microbat));")

In [165]:
len(trees)

878

In [173]:
tree.unrooted()

Tree("(Greater_horseshoe_bat,Horse,(Microbat,Pig));")