In [1]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
from matplotlib.patches import Patch
import seaborn as sns

import networkx as nx
import ete3
import dendropy

from scipy.cluster.hierarchy import dendrogram, linkage
from scipy.spatial.distance import squareform

import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

In [2]:
#define colours
cmap = {
    #Source 
    'Diseased': '#FF0000',
    'Cecal': '#0000FF',
    'Environmental': '#00FF00',
    #Outbreak
    'F2402': '#BEBEBE',
    'F9619': '#7FFFD4',
    'F6041': '#F0FFFF',
    'F9226': '#0000FF',
    'F9413': '#A52A2A',
    'F4957': '#DEB887',
    'F0012': '#7FFF00',
    'F8630': '#FF7F50',
    'F3862': '#FFF8DC',
    'F6245': '#00FFFF',
    'F7578': '#B8860B',
    'F2655': '#BDB76B',
    'F7360': '#8B008B',
    'F0731': '#556B2F',
    'F23315': '#FF8C00',
    'F4984': '#9932CC',
    'F9503': '#8B0000',
    'F0205': '#E9967A',
    'F0038':'#8FBC8F',
    #Phylogroup
    'A':'#483D8B',
    'B1': '#2F4F4F',
    'B2': '#00CED1',
    'C': '#9400D3',
    'cryptic': '#000000',
    'D': '#FF1493',
    'E': '#00BFFF',
    'F': '#1E90FF',
    'G': '#B22222',
    'U': '#000000',
    np.nan: '#FFFFFF'
}

In [3]:
genome_labels = pd.read_table('genome_label.csv', sep=',', index_col=0)


In [4]:
genome_labels

Unnamed: 0_level_0,Source,Outbreak,Phylogroup
Genome,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
EC_0012_1H1_D,Diseased,F0012,G
EC_0012_2H1_D,Diseased,F0012,G
EC_0012_2L1_D,Diseased,F0012,B2
EC_0012_3S1_D,Diseased,F0012,G
EC_0012_C1_H,Cecal,F0012,A
...,...,...,...
EC_E9DN_1_E,Environmental,,A
EC_E9DN_2_E,Environmental,,A
EC_E9FP_1_E,Environmental,,A
EC_E9L_1_E,Environmental,,D


In [5]:
source = {k:v for k, v in cmap.items() if k in genome_labels['Source'].unique()}
#outbreak = {k:v for k, v in cmap.items() if k in genome_labels['Outbreak'].unique()}
#phylo = {k:v for k, v in cmap.items() if k in genome_labels['Phylogroup'].unique()}

In [9]:
t = ete3.Tree('/media/haley/My Book/Ecoli_results/Ecoli_ARETE_Results_Organized/panaroo_results/results/tree_Ecoli/Ecoli_tree_file_gtr_gamma.treefile', format=1)
t.set_outgroup('EF')

# Define the annotation process for annotating the tree.
def rects_layout(node):
    if node.name in genome_labels.index:
        node.add_face(ete3.RectFace(height=10, width=40, 
                                    fgcolor=cmap[genome_labels.loc[node.name, 'Source']], 
                                    bgcolor=cmap[genome_labels.loc[node.name, 'Source']]),
                                    column=0, position='aligned')
        #node.add_face(ete3.RectFace(height=10, width=40, 
                            #fgcolor=cmap[genome_labels.loc[node.name, 'Outbreak']], 
                            #bgcolor=cmap[genome_labels.loc[node.name, 'Outbreak']]),
                           # column=1, position='aligned')
        #node.add_face(ete3.RectFace(height=10, width=40, 
                            #fgcolor=cmap[genome_labels.loc[node.name, 'Phylogroup']], 
                            #bgcolor=cmap[genome_labels.loc[node.name, 'Phylogroup']]),
                            #column=1, position='aligned')
 



In [10]:
def rects_layout(node):
    if node.name in genome_labels.index:
        node.add_face(ete3.RectFace(height=10, width=40, 
                                    fgcolor=cmap[genome_labels.loc[node.name, 'Source']], 
                                    bgcolor=cmap[genome_labels.loc[node.name, 'Source']]),
                                    column=0, position='aligned')
        #node.add_face(ete3.RectFace(height=10, width=40, 
                                    #fgcolor=cmap[genome_labels.loc[node.name, 'Outbreak']], 
                                    #bgcolor=cmap[genome_labels.loc[node.name, 'Outbreak']]),
                                    #column=1, position='aligned')
        #node.add_face(ete3.RectFace(height=10, width=40, 
                            #fgcolor=cmap[genome_labels.loc[node.name, 'Phylogroup']], 
                            #bgcolor=cmap[genome_labels.loc[node.name, 'Phylogroup']]),
                            #column=1, position='aligned')


In [13]:
circ_ts1 = ete3.TreeStyle()
circ_ts1.optimal_scale_level="full"
circ_ts1.root_opening_factor=1
circ_ts1.show_leaf_name=False
circ_ts1.mode='c'
circ_ts1.allow_face_overlap = True
circ_ts1.layout_fn = rects_layout


#for k, v in source.items():
 #   circ_ts1.legend.add_face(ete3.RectFace(16, 16, "black", v), column=0)
  #  circ_ts1.legend.add_face(ete3.TextFace(k, fsize=12, ftype='Arial', tight_text=True), column=1)

#for k, v in phylo.items():
 #   circ_ts1.legend.add_face(ete3.RectFace(16, 16, "black", v), column=4)
  #  circ_ts1.legend.add_face(ete3.TextFace(k, fsize=12, ftype='Arial', tight_text=True), column=5)


t.render("%%inline", tree_style=circ_ts1)
t.render('circular_phylo_tree_vertical_sourceonly.png', tree_style=circ_ts1, dpi=600)


{'nodes': [[199.0110169099628,
   198.247486543392,
   204.28874125412594,
   203.52521088755515,
   0,
   None],
  [317.55569736019044,
   198.27854058422838,
   321.55569736019044,
   202.27854058422838,
   1,
   None],
  [228.35947393153046,
   231.5476059084168,
   234.00953603021262,
   237.19766800709897,
   2,
   None],
  [252.45299426151595,
   200.61163068820036,
   256.62725551685577,
   204.78589194354018,
   3,
   None],
  [317.46672258231695,
   201.26596891075155,
   321.56729597013157,
   205.3665422985662,
   4,
   None],
  [262.0921156300494,
   202.2320868258526,
   266.33854153385613,
   206.47851272965934,
   5,
   None],
  [317.30172677329995,
   204.25275668360894,
   321.5002134699699,
   208.45124338027887,
   6,
   None],
  [317.0608169674097,
   207.23696634662022,
   321.3544933767285,
   211.53064275593903,
   7,
   None],
  [194.9890400793282,
   257.7221046910377,
   199.19700478731835,
   261.93006939902784,
   8,
   None],
  [254.78849803234368,
   230.7

In [42]:
t = ete3.Tree('/media/haley/My Book/Ecoli_results/Ecoli_ARETE_Results_Organized/panaroo_results/results/tree_Ecoli/Ecoli_tree_file_gtr_gamma.treefile', format=1)
t.set_outgroup('EF')

rect_ts = ete3.TreeStyle()
def rects_layout(node):
    if node.name in genome_labels.index:
        node.add_face(ete3.RectFace(height=5, width=50, 
                                    fgcolor=cmap[genome_labels.loc[node.name, 'Source']], 
                                    bgcolor=cmap[genome_labels.loc[node.name, 'Source']]),
                                    column=0, position='aligned')
        node.add_face(ete3.RectFace(height=5, width=50, 
                            fgcolor=cmap[genome_labels.loc[node.name, 'Outbreak']], 
                            bgcolor=cmap[genome_labels.loc[node.name, 'Outbreak']]),
                            column=1, position='aligned')
        node.add_face(ete3.RectFace(height=5, width=50, 
                            fgcolor=cmap[genome_labels.loc[node.name, 'Phylogroup']], 
                            bgcolor=cmap[genome_labels.loc[node.name, 'Phylogroup']]),
                            column=2, position='aligned')
 

rect_ts.layout_fn = rects_layout

for k, v in source.items():
    rect_ts.legend.add_face(ete3.RectFace(16,16, "black", v), column=0)
    rect_ts.legend.add_face(ete3.TextFace(k, fsize=12, ftype='Arial', tight_text=True), column=1)
for k, v in outbreak.items():
    rect_ts.legend.add_face(ete3.RectFace(16,16, "black", v), column=2)
    rect_ts.legend.add_face(ete3.TextFace(k, fsize=12, ftype='Arial', tight_text=True), column=3)
for k, v in phylo.items():
    rect_ts.legend.add_face(ete3.RectFace(16,16, "black", v), column=4)
    rect_ts.legend.add_face(ete3.TextFace(k, fsize=12, ftype='Arial', tight_text=True), column=5)



t.render("%%inline", tree_style=rect_ts)
t.render('rec_phylo_tree_vertical.png', tree_style=rect_ts, dpi=600)

{'nodes': [[0.5, 336.59800136089325, 4.5, 340.59800136089325, 0, None],
  [104.13879912244832, 9.0, 108.13879912244832, 13.0, 1, None],
  [104.13879912244832,
   664.1960027217865,
   108.13879912244832,
   668.1960027217865,
   2,
   None],
  [120.0346101325214, 44.0, 124.0346101325214, 48.0, 3, None],
  [140.86330177277313, 29.0, 144.86330177277313, 33.0, 4, None],
  [139.4909403809647, 59.0, 143.4909403809647, 63.0, 5, None],
  [143.1800913209599, 49.0, 147.1800913209599, 53.0, 6, None],
  [143.96698314311567, 69.0, 147.96698314311567, 73.0, 7, None],
  [133.0344096504092,
   1284.392005443573,
   137.0344096504092,
   1288.392005443573,
   8,
   None],
  [144.6488535204275,
   421.6914978027344,
   148.6488535204275,
   425.6914978027344,
   9,
   None],
  [169.14646519884298, 264.625, 173.14646519884298, 268.625, 10, None],
  [178.3517171185224, 130.25, 182.3517171185224, 134.25, 11, None],
  [188.8257464447199, 99.0, 192.8257464447199, 103.0, 12, None],
  [192.49972217168292, 89.