In [1]:
from ete3 import PhyloTree, NCBITaxa
import numpy as np
import pandas as pd

ncbi = NCBITaxa()

In [2]:
from ete3 import PhyloTree, Tree, NCBITaxa, TreeStyle
import numpy as np
import pandas as pd
ncbi = NCBITaxa()

# Load the tree

In [3]:
COG_name = "MtrH"

In [4]:

COG = PhyloTree(f"../Data/LGTAnalysis/{COG_name}/{COG_name}.nwk", format=1, sp_naming_function=lambda name: ncbi.get_lineage(name.split('.')[0])[3])
R = COG.get_midpoint_outgroup()
# and set it as tree outgroup
COG.set_outgroup(R)
COG.ladderize()

ts = TreeStyle()
ts.mode = "c"
ts.show_leaf_name = False

## Add taxonomic feature annotations 
To each of the leaves, add the taxonomic clade and kingdom of the species having the gene

In [5]:
for lf in COG.iter_leaves():
    lf.add_feature("kingdom", ncbi.get_lineage(lf.name.split('.')[0])[2])
    lf.add_feature("clade", ncbi.get_lineage(lf.name.split('.')[0])[3])

## Name all internal nodes

In [56]:
counter = 0
intnodes = []
for node in COG.traverse():
    if node.is_leaf()==False:
        counter += 1
        node.name = "I"+str(counter)
        intnodes.append(node.name)

In [57]:
intnodes = pd.DataFrame(index=list(set(intnodes)))
intnodes["Kingdom"] = ""
intnodes["Clade"] = ""

for node in COG.traverse():
    if node.is_leaf()==False:
        unique_kingdom = list(set([leaf.kingdom for leaf in node.iter_leaves()]))
        unique_clade = list(set([leaf.clade for leaf in node.iter_leaves()]))
        intnodes.Kingdom.loc[node.name] = unique_kingdom[0]
        intnodes.Clade.loc[node.name] = unique_clade[0]

intnodes

Unnamed: 0,Kingdom,Clade
I69,2157,28890
I8,2157,1783275
I34,2157,28890
I55,2157,28890
I63,2157,28890
...,...,...
I66,2157,28890
I40,2,1783272
I18,2,200940
I78,2157,28890


## Write as iTOL annotation file (https://itol.embl.de/help.cgi)

In [59]:
Colours = pd.DataFrame(data = ["#DFF1DE","#DDDBEA"], index = list(set(intnodes.Kingdom)), columns=["ColourCode"]) #make an index df matching colours to kingdoms

intnodes["annot"] = "range"
intnodes = intnodes.join(Colours, on='Kingdom')[["annot","ColourCode","Kingdom"]]
intnodes

Unnamed: 0,annot,ColourCode,Kingdom
I69,range,#DDDBEA,2157
I8,range,#DDDBEA,2157
I34,range,#DDDBEA,2157
I55,range,#DDDBEA,2157
I63,range,#DDDBEA,2157
...,...,...,...
I66,range,#DDDBEA,2157
I40,range,#DFF1DE,2
I18,range,#DFF1DE,2
I78,range,#DDDBEA,2157


In [69]:
COG.write(format = 1, outfile=f"../Data/LGTAnalysis/{COG_name}/{COG_name}_AnnotIntNodes.nwk")
with open('file.csv', 'a') as file:
    file.write('TREE_COLORS\nSEPARATOR TAB\nDATA\n')
    intnodes.to_csv(f"../Data/LGTAnalysis/{COG_name}/{COG_name}_AnnotIntNodes.csv", sep='\t', header = False)
