In [2]:
from ete3 import Tree
import csv
import os.path
import random
from ete3 import NCBITaxa
ncbi = NCBITaxa()

In [12]:
def process_tree(tree, pfam):
    """processes a tree based on their pfams, annotates them, removes wrong annotations, filters out trees without prokaryotes, splits them up by acquisition groups and writes these subtrees to a directory"""
    annotate(tree)
    delete_wrong_annotations(tree)
    if not tree.search_nodes(prok_euk = 'Prokaryote'):
        print("No Prokaryotes in tree")
        return None 
    ac_groups = get_acgroups(tree)
    for ac_group in ac_groups:
        subtree = tree.copy()
        ac_group  = subtree.search_nodes(name = ac_group.name)[0]
        process_ac_group(ac_group, subtree, pfam)

def process_ac_group(ac_group, subtree, pfam):
    """roots the main tree on a random prokaryote, processes an acquisition group within, takes a sister group and prunes the tree, roots this tree on the farthest leaf and writes subtree to a directory """
    root_random_prok(subtree)
    print(ac_group.up.name)
    if ac_group.up.name != "":
        print("AC GROUP:",ac_group.up.name)
        print("AC GROUP FIRST INDEX",ac_group.up.name[0])
        while ac_group.up.name != "" and ac_group.up.name[0] == 'U':
            ac_group = ac_group.up

    if len(subtree.search_nodes(prok_euk = 'Prokaryote')) > 1:
        lcas = get_prokaryotic_sister(ac_group, subtree)
        ancestries = []
        for lca in lcas:
            pot_ancestry = classify_sister(lca[0])
            ancestries.append(pot_ancestry)
        if ancestries[0] == ancestries[1]:
            farthest_leaf = reroot(ac_group, subtree) # Root on the farthest leaf
        else:
            order = ('Alphaproteobacteria', 'Asgard archaea', 'ABG proteobacteria', 'Asgard+TACK group', 'Betaproteobacteria', 'Gammaproteobacteria', 'TACK archaea')
            for ancestry in order:
                if ancestry in ancestries:
                    index = ancestries.index(ancestry)
                    lca_sel = lcas[index][1]
                    if index == 1:
                        root = ac_group.get_sisters()[0] # Root on the old sister
                        subtree.set_outgroup(root)
                    break
            else:
                old_sister_leaves = [leaf.name for leaf in ac_group.get_sisters()[0]]
                farthest_leaf = reroot(ac_group, subtree) # Root on the farthest leaf
    sister = get_sister(ac_group)
    prune_tree(subtree, sister, ac_group)
#     save_tree(pfam, subtree, ac_group)


In [3]:
def annotate(tree):
    "adds annotations for prokaryotes, eukaryotes and LECAs"
    for leaf in tree:
        if leaf.name[0].isdigit():
            taxid = int(leaf.name[:leaf.name.find('.')])
            leaf.add_features(taxid = taxid, prok_euk = 'Prokaryote')
        else:
            taxid = leaf.name[0:4]
            leaf.add_features(taxid = taxid, prok_euk = 'Eukaryote')
    for node in tree.traverse('postorder'):
        if not node.name:
            pass
        else:
            if node.name[0] == "D" and node.name[1].isdigit():
                node.add_features(event = 'Duplication')
            if node.name[0] == "U" and node.name[1].isdigit():
                node.add_features(event = 'Unknown')
            if node.name[0] == "O" and node.name[2].isdigit():
                node.add_features(LECA = 'RegExPlaceholder')

In [4]:
def get_acgroups(tree):
    """returns a list of acquisition groups per given tree"""
    ac_groups = []
    for node in tree.traverse():
        if is_acgroup_name(node.name):
            ac_groups.append(node)
    return ac_groups

def is_acgroup_name(name):
    """determines whether a node is an acquisition group or not"""
    if name:
        return name[0] == "D" and name[1].isdigit() and "." in name and name.split(".")[1] == "1"  
    else:
        return False

In [5]:
def root_random_prok(subtree):
    """roots the tree on a random prokaryote"""
    root = random.choice(subtree.search_nodes(prok_euk = 'Prokaryote')) # Root on random prokaryotic sequence
    subtree.set_outgroup(root)

In [6]:
def get_prokaryotic_sister(euk_clade, tree):
    """Determines both possible prokaryotic sister groups in an unrooted way or a rooted way using the rooting on the farthest leaf"""
    sister = euk_clade.get_sisters() # Should be checked if there are any eukaryotic sequences in the sister group
    if len(sister) == 1:
        prok_leaves_sister = sister[0].search_nodes(prok_euk = 'Prokaryote')
    else: # In case of multifurcation: take all sisters (written out for clarity, but does not have to be split between bifurcating and multifurcating)
        prok_leaves_sister = []
        for sis in sister:
            prok_leaves_sister.extend(sis.search_nodes(prok_euk = 'Prokaryote'))
    other_prok_leaves = set(tree.search_nodes(prok_euk = 'Prokaryote')) - set(prok_leaves_sister)
    lcas = []
    for i, group in enumerate([prok_leaves_sister, other_prok_leaves]):
        prok_taxids = []
        for prok in group: # Collect tax ids of prokaryotic sister leaves
            prok_taxids.append(prok.taxid)
        sp_tree = ncbi.get_topology(prok_taxids) # Get NCBI species tree, to get the identity of the LCA
        lca = sp_tree.taxid
        if lca == 1224: # Proteobacteria
            for proteo in sp_tree:
                lineage = ncbi.get_lineage(proteo.name)
                if not 28211 in lineage and not 1236 in lineage and not 28216 in lineage: # So, not an alpha/gamma/beta
                    lca_name = 'Proteobacteria'
                    break
            else: # So, only alpha/beta/gamma proteobacteria
                lca = 'abgprot'
                lca_name = 'ABG proteobacteria'
        elif lca == 2157: # Archaea
            for arch in sp_tree:
                lineage = ncbi.get_lineage(arch.name)
                if not 1935183 in lineage and not 1783275 in lineage: # So, not an Asgard or TACK
                    lca_name = 'Archaea'
                    break
            else: # So, only Asgards + TACK
                lca = 'asgtack'
                lca_name = 'Asgard+TACK group'
        else:
            lca_name = ncbi.translate_to_names([lca])[0]
        lcas.append((lca, lca_name))
    return lcas

def classify_sister(lca): # Added alpha/beta/gamma proteo superclass and TACK+Asgard supersuperphylum
    """Classifies the prokaryotic sister-group"""
    if lca == 'abgprot':
        return 'ABG proteobacteria'
    elif lca == 'asgtack':
        return 'Asgard+TACK group'
    ancestors = ncbi.get_lineage(lca)
    if 28211 in ancestors:
        return 'Alphaproteobacteria'
    elif 1935183 in ancestors:
        return 'Asgard archaea'
    elif 28216 in ancestors:
        return 'Betaproteobacteria'
    elif 1236 in ancestors:
        return 'Gammaproteobacteria'
    elif 1783275 in ancestors:
        return 'TACK archaea'
    else:
        desired = 'phylum'
        if 1224 in ancestors:
            desired = 'class'
        ranks = ncbi.get_rank(ancestors)
        names = ncbi.get_taxid_translator(ancestors)
        for taxon in ranks: # Return phylum and if that is not present, then lowest group
            if ranks[taxon] == desired:
                return names[taxon]
        else:
            return names[ancestors[-1]]
        
def reroot(euk_clade, tree):
    """Reroots the tree on the farthest leaf from the eukaryotic clade"""
    tree.set_outgroup(euk_clade) # Root on this eukaryotic clade
    sister = euk_clade.get_sisters()[0]
    farthest = sister.get_farthest_leaf()[0]
    tree.set_outgroup(farthest) # Root on the leaf farthest from this eukaryotic clade (can be a false positive for example)
    return farthest.name

In [7]:
def get_sister(ac_group):
    """takes the sistergroup to the acquisition node, checks whether this contains prokaryotes, then takes up to five organisms from it"""
    sister_list = []
    sister_group = ac_group.get_sisters()[0]
    prok_sister_leaves = sister_group.search_nodes(prok_euk = 'Prokaryote')
    sister_group = prok_sister_leaves
    for leaf in sister_group:
        sister_list.append(leaf)
    sister_list = sister_list[:5]
    return sister_list

In [8]:
def prune_tree(subtree, sister_list, ac_group):
    """prunes a copy of the main tree by the given acquisition and sister group"""
    keep_sequences = sister_list
    for leaf in ac_group.get_leaves():
          keep_sequences.append(leaf)
    subtree.prune(keep_sequences)

In [9]:
def root_farthest_leaf(subtree, ac_group): #TODO: make sure it only roots on the sister group
    "roots the subtree on the farthest leaf from the node of the acquisition group"
    farthest_leaf = subtree.get_farthest_leaf(ac_group)
    subtree.set_outgroup(farthest_leaf[0])
    
#     root on sister group

In [10]:
def save_tree(pfam, subtree, ac_group):
    "writes the subtree and its sequences to a directory"
    print("WRITING FILE")
    name = ac_group.name.split(".")[0]
    filename = "Untitled Folder/" + pfam + "_" + name + ".nw" 
    print(filename)
    subtree.write(features = ["LECA"], format=8, outfile=filename)
    seqs = []
    for leaf in subtree:
        seqs.append(leaf.name)
    textfile = open("seqs", "w")
    for element in seqs:
        textfile.write(element + "\n")
    textfile.close()
    seqsname = pfam + "_" + name + "_seqs.txt"
    os.rename(r'./seqs',r'./%s' % seqsname)
    os.replace(r"./%s" % seqsname, "Untitled Folder/%s" % seqsname)


In [11]:
def delete_wrong_annotations(tree):
    "Deletes wrongful annotations combining different leaves that cause issues for MCMCTree"
    for node in tree.traverse():
        if node.name != "" and not node.is_leaf() and node.name[0].isdigit():
            node.name=""

In [66]:
# t=Tree("../Data/Full/full_trees/PF00071_annotated_tree.nw", format=1)
# process_tree(t, "PF00071")

for file in os.listdir('../Data/Full/full_trees'):
    pfam = file.split("_")[0]
    print(pfam)
    tree = Tree( "../Data/Full/full_trees/" + file, format=1)
    process_tree(tree,pfam)




PF00027

WRITING FILE
Untitled Folder/PF00027_D10.nw

WRITING FILE
Untitled Folder/PF00027_D7.nw
U4.1
AC GROUP: U4.1
AC GROUP FIRST INDEX U
WRITING FILE
Untitled Folder/PF00027_U4.nw
U5.6
AC GROUP: U5.6
AC GROUP FIRST INDEX U
WRITING FILE
Untitled Folder/PF00027_U5.nw
U6.1
AC GROUP: U6.1
AC GROUP FIRST INDEX U
WRITING FILE
Untitled Folder/PF00027_U6.nw
PF00028
PF00030
PF00031
PF00032
PF00033
PF00034
PF00035

WRITING FILE
Untitled Folder/PF00035_D2.nw
PF00041

WRITING FILE
Untitled Folder/PF00041_D8.nw
U9.1
AC GROUP: U9.1
AC GROUP FIRST INDEX U
WRITING FILE
Untitled Folder/PF00041_U9.nw
PF00042
PF00043
PF00044
PF00046
No Prokaryotes in tree
PF00051
No Prokaryotes in tree
PF00052
PF00056
PF00059

WRITING FILE
Untitled Folder/PF00059_D1.nw
PF00060
No Prokaryotes in tree
PF00063
No Prokaryotes in tree
PF00067

WRITING FILE
Untitled Folder/PF00067_D3.nw
PF00068
PF00069

WRITING FILE
Untitled Folder/PF00069_D1.nw

WRITING FILE
Untitled Folder/PF00069_D2.nw
U10.2
AC GROUP: U10.2
AC GROUP FIRS

WRITING FILE
Untitled Folder/PF00270_D2.nw

WRITING FILE
Untitled Folder/PF00270_D3.nw
U14.2
AC GROUP: U14.2
AC GROUP FIRST INDEX U
WRITING FILE
Untitled Folder/PF00270_U14.nw

WRITING FILE
Untitled Folder/PF00270_D4.nw

WRITING FILE
Untitled Folder/PF00270_D20.nw

WRITING FILE
Untitled Folder/PF00270_D19.nw

WRITING FILE
Untitled Folder/PF00270_D8.nw
PF00271

WRITING FILE
Untitled Folder/PF00271_D6.nw
U4.3
AC GROUP: U4.3
AC GROUP FIRST INDEX U
WRITING FILE
Untitled Folder/PF00271_U4.nw
U1.4
AC GROUP: U1.4
AC GROUP FIRST INDEX U
WRITING FILE
Untitled Folder/PF00271_U1.nw

WRITING FILE
Untitled Folder/PF00271_D23.nw

WRITING FILE
Untitled Folder/PF00271_D13.nw
U21.6
AC GROUP: U21.6
AC GROUP FIRST INDEX U
WRITING FILE
Untitled Folder/PF00271_U21.nw
U38.1
AC GROUP: U38.1
AC GROUP FIRST INDEX U
WRITING FILE
Untitled Folder/PF00271_U38.nw

WRITING FILE
Untitled Folder/PF00271_D12.nw

WRITING FILE
Untitled Folder/PF00271_D33.nw
U25.1
AC GROUP: U25.1
AC GROUP FIRST INDEX U
WRITING FILE
Untitl

WRITING FILE
Untitled Folder/PF00610_D1.nw
PF00611
No Prokaryotes in tree
PF00613
No Prokaryotes in tree
PF00615
No Prokaryotes in tree
PF00616
No Prokaryotes in tree
PF00617
U1.1
AC GROUP: U1.1
AC GROUP FIRST INDEX U
WRITING FILE
Untitled Folder/PF00617_U1.nw
PF00618
No Prokaryotes in tree
PF00620
U1.1
AC GROUP: U1.1
AC GROUP FIRST INDEX U
WRITING FILE
Untitled Folder/PF00620_U1.nw
PF00621

WRITING FILE
Untitled Folder/PF00621_D1.nw
PF00622

WRITING FILE
Untitled Folder/PF00622_D3.nw
U2.3
AC GROUP: U2.3
AC GROUP FIRST INDEX U
WRITING FILE
Untitled Folder/PF00622_U2.nw
PF00623

WRITING FILE
Untitled Folder/PF00623_D2.nw
PF00625
PF00626

WRITING FILE
Untitled Folder/PF00626_D2.nw

WRITING FILE
Untitled Folder/PF00626_D4.nw
U1.4
AC GROUP: U1.4
AC GROUP FIRST INDEX U
WRITING FILE
Untitled Folder/PF00626_U1.nw
PF00628
No Prokaryotes in tree
PF00629
PF00630
No Prokaryotes in tree
PF00631
No Prokaryotes in tree
PF00632
No Prokaryotes in tree
PF00635
No Prokaryotes in tree
PF00636
PF00637
No 


WRITING FILE
Untitled Folder/PF01131_D1.nw
PF01132
PF01133
No Prokaryotes in tree
PF01134
PF01135

WRITING FILE
Untitled Folder/PF01135_D1.nw
PF01136
PF01137

WRITING FILE
Untitled Folder/PF01137_D1.nw
PF01138

WRITING FILE
Untitled Folder/PF01138_D3.nw

WRITING FILE
Untitled Folder/PF01138_D5.nw
PF01139
PF01142

WRITING FILE
Untitled Folder/PF01142_D1.nw
PF01144
PF01145
U8.2
AC GROUP: U8.2
AC GROUP FIRST INDEX U
WRITING FILE
Untitled Folder/PF01145_U8.nw
PF01148
PF01149
PF01150
U1.3
AC GROUP: U1.3
AC GROUP FIRST INDEX U
WRITING FILE
Untitled Folder/PF01150_U1.nw
PF01151
No Prokaryotes in tree
PF01152
PF01154
PF01155
PF01156
PF01157
PF01158
No Prokaryotes in tree
PF01159
No Prokaryotes in tree
PF01161
U1.1
AC GROUP: U1.1
AC GROUP FIRST INDEX U
WRITING FILE
Untitled Folder/PF01161_U1.nw
PF01163
PF01165
PF01167
No Prokaryotes in tree
PF01168
PF01169
PF01170
PF01171
PF01172
PF01174
PF01175
PF01176
PF01177
PF01179
PF01180
PF01182
PF01183
PF01184
PF01186
PF01187
PF01189
PF01191
PF01193
PF0

PF01791
PF01793
No Prokaryotes in tree
PF01794
U3.6
AC GROUP: U3.6
AC GROUP FIRST INDEX U
WRITING FILE
Untitled Folder/PF01794_U3.nw
PF01795
PF01798

WRITING FILE
Untitled Folder/PF01798_D1.nw
PF01799
U1.2
AC GROUP: U1.2
AC GROUP FIRST INDEX U
WRITING FILE
Untitled Folder/PF01799_U1.nw
PF01804
PF01805
No Prokaryotes in tree
PF01807
PF01808
PF01809
PF01810
PF01812
PF01813
PF01814
PF01817
PF01820
PF01822
PF01823
PF01828
PF01832
PF01833
U21.1
AC GROUP: U21.1
AC GROUP FIRST INDEX U
WRITING FILE
Untitled Folder/PF01833_U21.nw

WRITING FILE
Untitled Folder/PF01833_D14.nw

WRITING FILE
Untitled Folder/PF01833_D15.nw

WRITING FILE
Untitled Folder/PF01833_D11.nw
PF01834
No Prokaryotes in tree
PF01835
PF01841
PF01842
PF01843
No Prokaryotes in tree
PF01846
No Prokaryotes in tree
PF01847
PF01849

WRITING FILE
Untitled Folder/PF01849_D1.nw
PF01852
U1.2
AC GROUP: U1.2
AC GROUP FIRST INDEX U
WRITING FILE
Untitled Folder/PF01852_U1.nw
PF01853
No Prokaryotes in tree
PF01855
PF01857
No Prokaryotes in tr

WRITING FILE
Untitled Folder/PF02798_U2.nw
PF02799
No Prokaryotes in tree
PF02800

WRITING FILE
Untitled Folder/PF02800_D1.nw
PF02801
PF02803
PF02805
PF02806
PF02807
No Prokaryotes in tree
PF02811
PF02812
PF02814
PF02815
No Prokaryotes in tree
PF02816
No Prokaryotes in tree
PF02820
No Prokaryotes in tree
PF02823
PF02824
U2.5
AC GROUP: U2.5
AC GROUP FIRST INDEX U
WRITING FILE
Untitled Folder/PF02824_U2.nw
PF02825
No Prokaryotes in tree
PF02826
PF02830
PF02833
PF02834
PF02836
PF02837
PF02838
PF02840
No Prokaryotes in tree
PF02841
No Prokaryotes in tree
PF02843
PF02844
PF02847
No Prokaryotes in tree
PF02852
PF02854
No Prokaryotes in tree
PF02861
PF02862
No Prokaryotes in tree
PF02866
PF02867
PF02868
PF02870
PF02872
PF02873
PF02874
PF02875
PF02877
No Prokaryotes in tree
PF02878
PF02879
PF02880
PF02881
PF02882

WRITING FILE
Untitled Folder/PF02882_D1.nw
PF02883
No Prokaryotes in tree
PF02885
PF02886
No Prokaryotes in tree
PF02887

WRITING FILE
Untitled Folder/PF02887_D1.nw
PF02889

WRITING 


WRITING FILE
Untitled Folder/PF03725_D3.nw
PF03726
PF03727
PF03729
PF03730
No Prokaryotes in tree
PF03731
No Prokaryotes in tree
PF03732
No Prokaryotes in tree
PF03733
PF03734
PF03735
No Prokaryotes in tree
PF03737
PF03738
PF03739
PF03740
PF03741
PF03743
PF03746
PF03747
PF03748
PF03749
PF03755
PF03762
PF03764
U2.1
AC GROUP: U2.1
AC GROUP FIRST INDEX U
WRITING FILE
Untitled Folder/PF03764_U2.nw
PF03765
No Prokaryotes in tree
PF03767
PF03770
No Prokaryotes in tree
PF03772
PF03773
PF03775
PF03776
PF03781
PF03786
PF03788
PF03795
PF03796
PF03797
PF03798
No Prokaryotes in tree
PF03799
PF03800
No Prokaryotes in tree
PF03801
No Prokaryotes in tree
PF03803
PF03806
PF03807
PF03808
PF03810
No Prokaryotes in tree
PF03813
No Prokaryotes in tree
PF03815
PF03816
PF03819
PF03820
No Prokaryotes in tree
PF03822
No Prokaryotes in tree
PF03824
PF03825
PF03828
No Prokaryotes in tree
PF03831
PF03834
No Prokaryotes in tree
PF03835
No Prokaryotes in tree
PF03836
No Prokaryotes in tree
PF03839
No Prokaryotes 

WRITING FILE
Untitled Folder/PF04675_D1.nw
PF04676
No Prokaryotes in tree
PF04677

WRITING FILE
Untitled Folder/PF04677_D1.nw
PF04679

WRITING FILE
Untitled Folder/PF04679_D2.nw
PF04683
No Prokaryotes in tree
PF04685
PF04690
No Prokaryotes in tree
PF04695
No Prokaryotes in tree
PF04696
No Prokaryotes in tree
PF04699
No Prokaryotes in tree
PF04703
PF04706
PF04707
No Prokaryotes in tree
PF04712
No Prokaryotes in tree
PF04715
PF04716
No Prokaryotes in tree
PF04718
No Prokaryotes in tree
PF04719
No Prokaryotes in tree
PF04722
No Prokaryotes in tree
PF04724
PF04727
No Prokaryotes in tree
PF04729
No Prokaryotes in tree
PF04733
PF04734
PF04739
No Prokaryotes in tree
PF04749
No Prokaryotes in tree
PF04750
PF04752
PF04754
PF04756

WRITING FILE
Untitled Folder/PF04756_D1.nw
PF04757
No Prokaryotes in tree
PF04758
PF04760
PF04768
PF04773
PF04774
No Prokaryotes in tree
PF04775
PF04777
No Prokaryotes in tree
PF04784
PF04791
No Prokaryotes in tree
PF04794
PF04795
No Prokaryotes in tree
PF04800
PF0480

PF05866
PF05869
PF05870
PF05871
PF05872
PF05873
No Prokaryotes in tree
PF05875
PF05876
PF05879
No Prokaryotes in tree
PF05889
PF05890
No Prokaryotes in tree
PF05891
No Prokaryotes in tree
PF05899
PF05903
No Prokaryotes in tree
PF05907
No Prokaryotes in tree
PF05908
PF05914
No Prokaryotes in tree
PF05915
No Prokaryotes in tree
PF05916
U2.1
AC GROUP: U2.1
AC GROUP FIRST INDEX U
WRITING FILE
Untitled Folder/PF05916_U2.nw

WRITING FILE
Untitled Folder/PF05916_D1.nw
PF05918
No Prokaryotes in tree
PF05922
PF05935
PF05936
PF05940
PF05943
PF05947
PF05954
PF05958
PF05960
PF05962
PF05964
No Prokaryotes in tree
PF05965
No Prokaryotes in tree
PF05970

WRITING FILE
Untitled Folder/PF05970_D1.nw
PF05971
PF05977
PF05978
No Prokaryotes in tree
PF05981
PF05983
No Prokaryotes in tree
PF05985
PF05988
PF05990
PF05995
PF05997
No Prokaryotes in tree
PF06003
No Prokaryotes in tree
PF06011
No Prokaryotes in tree
PF06017
No Prokaryotes in tree
PF06025
No Prokaryotes in tree
PF06026
PF06027
No Prokaryotes in tr

WRITING FILE
Untitled Folder/PF07724_D2.nw
PF07726
PF07727
No Prokaryotes in tree
PF07728

WRITING FILE
Untitled Folder/PF07728_D2.nw

WRITING FILE
Untitled Folder/PF07728_D1.nw
PF07729
PF07730
PF07731
U1.3
AC GROUP: U1.3
AC GROUP FIRST INDEX U
WRITING FILE
Untitled Folder/PF07731_U1.nw
PF07732

WRITING FILE
Untitled Folder/PF07732_D2.nw
PF07733
PF07738

WRITING FILE
Untitled Folder/PF07738_D1.nw
PF07739
PF07741
No Prokaryotes in tree
PF07742
No Prokaryotes in tree
PF07743
PF07744
No Prokaryotes in tree
PF07745
PF07748
U2.1
AC GROUP: U2.1
AC GROUP FIRST INDEX U
WRITING FILE
Untitled Folder/PF07748_U2.nw
PF07749
No Prokaryotes in tree
PF07750
PF07751
PF07757
No Prokaryotes in tree
PF07766
PF07767
No Prokaryotes in tree
PF07773
No Prokaryotes in tree
PF07774
No Prokaryotes in tree
PF07779
No Prokaryotes in tree
PF07780
No Prokaryotes in tree
PF07786
PF07787
PF07792
No Prokaryotes in tree
PF07793
PF07798
PF07799
PF07802
No Prokaryotes in tree
PF07804
PF07807
No Prokaryotes in tree
PF07808

PF08760
PF08766
No Prokaryotes in tree
PF08767
No Prokaryotes in tree
PF08768
PF08771
No Prokaryotes in tree
PF08772
No Prokaryotes in tree
PF08773
No Prokaryotes in tree
PF08774
PF08777
No Prokaryotes in tree
PF08781
No Prokaryotes in tree
PF08783
No Prokaryotes in tree
PF08784
PF08785
No Prokaryotes in tree
PF08797

WRITING FILE
Untitled Folder/PF08797_D1.nw
PF08801
No Prokaryotes in tree
PF08803
PF08806
No Prokaryotes in tree
PF08811
PF08818
PF08825
No Prokaryotes in tree
PF08827
PF08837
PF08839
No Prokaryotes in tree
PF08840
PF08843
PF08857
PF08867
PF08877
PF08881
PF08883
PF08892
PF08894
PF08903
PF08906
PF08907
PF08908
PF08910
No Prokaryotes in tree
PF08911
No Prokaryotes in tree
PF08914
No Prokaryotes in tree
PF08920
No Prokaryotes in tree
PF08923
PF08939
PF08953
No Prokaryotes in tree
PF08969
No Prokaryotes in tree
PF08975
PF08982
PF08991
No Prokaryotes in tree
PF08996
No Prokaryotes in tree
PF09011
No Prokaryotes in tree
PF09032
No Prokaryotes in tree
PF09066
No Prokaryotes in t

WRITING FILE
Untitled Folder/PF10294_D2.nw

WRITING FILE
Untitled Folder/PF10294_D1.nw
U4.1
AC GROUP: U4.1
AC GROUP FIRST INDEX U
WRITING FILE
Untitled Folder/PF10294_U4.nw

WRITING FILE
Untitled Folder/PF10294_D5.nw
PF10296
No Prokaryotes in tree
PF10300
PF10307
No Prokaryotes in tree
PF10309
No Prokaryotes in tree
PF10312
No Prokaryotes in tree
PF10335
PF10342
PF10343
PF10345
PF10347
No Prokaryotes in tree
PF10350
No Prokaryotes in tree
PF10351
No Prokaryotes in tree
PF10354
PF10356
No Prokaryotes in tree
PF10357
No Prokaryotes in tree
PF10358
No Prokaryotes in tree
PF10363
No Prokaryotes in tree
PF10366
No Prokaryotes in tree
PF10367
No Prokaryotes in tree
PF10369
PF10371
PF10373
No Prokaryotes in tree
PF10374
No Prokaryotes in tree
PF10377
No Prokaryotes in tree
PF10382
No Prokaryotes in tree
PF10385
PF10390
No Prokaryotes in tree
PF10391
No Prokaryotes in tree
PF10392
No Prokaryotes in tree
PF10394
No Prokaryotes in tree
PF10396
PF10397
PF10403
No Prokaryotes in tree
PF10404
No Pr

No Prokaryotes in tree
PF12660
No Prokaryotes in tree
PF12678
No Prokaryotes in tree
PF12679
PF12680
PF12682
PF12689
PF12694
PF12695
PF12696
PF12697
PF12698
PF12701
No Prokaryotes in tree
PF12704
PF12705
PF12706
PF12708
PF12710
PF12717

WRITING FILE
Untitled Folder/PF12717_D1.nw
PF12719
No Prokaryotes in tree
PF12721
PF12722
No Prokaryotes in tree
PF12724
PF12726
No Prokaryotes in tree
PF12727
PF12728
PF12733
PF12738
No Prokaryotes in tree
PF12740
PF12742
No Prokaryotes in tree
PF12745
No Prokaryotes in tree
PF12746
PF12755
No Prokaryotes in tree
PF12752
No Prokaryotes in tree
PF12756
No Prokaryotes in tree
PF12762
PF12763
No Prokaryotes in tree
PF12766
PF12767
No Prokaryotes in tree
PF12769
PF12770
PF12771
PF12774
No Prokaryotes in tree
PF12775
No Prokaryotes in tree
PF12777
No Prokaryotes in tree
PF12780
No Prokaryotes in tree
PF12781
No Prokaryotes in tree
PF12783
No Prokaryotes in tree
PF12784
PF12787
PF12790
PF12796

WRITING FILE
Untitled Folder/PF12796_D102.nw
U95.1
AC GROUP: U95

WRITING FILE
Untitled Folder/PF13656_D1.nw
PF13657
PF13660
PF13661
PF13662
PF13663
PF13664
PF13667
PF13668
PF13669
PF13671
PF13672
PF13673
PF13675
PF13676
PF13677
PF13679
PF13683
PF13688
PF13691
PF13692
PF13695
No Prokaryotes in tree
PF13700
PF13704
PF13710
PF13715
PF13714
PF13716
No Prokaryotes in tree
PF13718
PF13720
PF13722
PF13725
PF13726
PF13727
PF13728
PF13732
PF13733
No Prokaryotes in tree
PF13737
PF13738
PF13740
PF13742
PF13746
PF13759
PF13761
PF13768
U1.4
AC GROUP: U1.4
AC GROUP FIRST INDEX U
WRITING FILE
Untitled Folder/PF13768_U1.nw
PF13771
No Prokaryotes in tree
PF13772
PF13774
No Prokaryotes in tree
PF13778
PF13787
PF13793
PF13802
PF13806
PF13807
PF13810
PF13812
No Prokaryotes in tree
PF13813
PF13815
No Prokaryotes in tree
PF13821
No Prokaryotes in tree
PF13826
PF13832
No Prokaryotes in tree
PF13833
U10.1
AC GROUP: U10.1
AC GROUP FIRST INDEX U
WRITING FILE
Untitled Folder/PF13833_U10.nw

WRITING FILE
Untitled Folder/PF13833_D9.nw
U11.2
AC GROUP: U11.2
AC GROUP FIRST INDEX 

No Prokaryotes in tree
PF14778
No Prokaryotes in tree
PF14779
No Prokaryotes in tree
PF14780
No Prokaryotes in tree
PF14781
No Prokaryotes in tree
PF14782
No Prokaryotes in tree
PF14783
PF14785
PF14791

WRITING FILE
Untitled Folder/PF14791_D1.nw
PF14792
U1.6
AC GROUP: U1.6
AC GROUP FIRST INDEX U
WRITING FILE
Untitled Folder/PF14792_U1.nw
PF14793
PF14796
No Prokaryotes in tree
PF14801
PF14802
No Prokaryotes in tree
PF14805
PF14806
No Prokaryotes in tree
PF14807
No Prokaryotes in tree
PF14808
No Prokaryotes in tree
PF14811
PF14815
PF14817
No Prokaryotes in tree
PF14821
PF14822
No Prokaryotes in tree
PF14825
No Prokaryotes in tree
PF14826
No Prokaryotes in tree
PF14832
PF14833
PF14836
No Prokaryotes in tree
PF14838
No Prokaryotes in tree
PF14842
PF14841
PF14844
No Prokaryotes in tree
PF14845
No Prokaryotes in tree
PF14846
No Prokaryotes in tree
PF14849
PF14850
PF14853
PF14858
No Prokaryotes in tree
PF14863
PF14864
PF14870
PF14868
No Prokaryotes in tree
PF14874

WRITING FILE
Untitled Folde

WRITING FILE
Untitled Folder/PF17171_U1.nw
PF17172
PF17175
No Prokaryotes in tree
PF17177
No Prokaryotes in tree
PF17180
No Prokaryotes in tree
PF17184
No Prokaryotes in tree
PF17186
PF17188
PF17189
PF17191
PF17200
PF17201
PF17203
PF17207

WRITING FILE
Untitled Folder/PF17207_D6.nw
PF17210
PF17215
No Prokaryotes in tree
PF17216
No Prokaryotes in tree
PF17221
No Prokaryotes in tree
PF17244
No Prokaryotes in tree
PF17246
No Prokaryotes in tree
PF17284
PF17286
PF17285
No Prokaryotes in tree
PF17291
PF17292
No Prokaryotes in tree
PF17297
PF17384
PF17389
PF17390
PF17391
PF17392
PF17403
No Prokaryotes in tree
PF17404
No Prokaryotes in tree
PF17405
No Prokaryotes in tree
PF17406
No Prokaryotes in tree
PF17407
No Prokaryotes in tree
PF17408
PF17432
PF17433
PF17450
PF17517
PF00001
No Prokaryotes in tree
PF00002
No Prokaryotes in tree
PF00003
No Prokaryotes in tree
PF00004

WRITING FILE
Untitled Folder/PF00004_D31.nw

WRITING FILE
Untitled Folder/PF00004_D38.nw

WRITING FILE
Untitled Folder/PF00

In [11]:
t=Tree("../Data/Full/full_trees/PF00071_annotated_tree.nw", format=1)
i = 0
for leaf in t:
    i = i + 1
    if t.search_nodes(event = 'Duplication'):
        print(i)