In [1]:
import Bio
from Bio import Phylo
from Bio.Phylo.TreeConstruction import DistanceCalculator
from Bio.Phylo.TreeConstruction import DistanceTreeConstructor
from Bio import AlignIO
import pandas as pd
import numpy as np
from jw_utils import jw_draw_tree
from plotly import graph_objects as go
import plotly.offline as pyo
from jw_utils import parse_fasta as pfa
import logomaker 
from matplotlib import pyplot as plt
import plotly.io as pio

In [12]:
csra_nj_tree = Phylo.read('csrA_trimmed99_njtree.nwk', "newick")
# Pick clades to get representative sequence for
clade_names = ['Inner531', 'Inner524', 'Inner535', 'Inner539', 'Inner521', 'Inner541',
               'Inner552', 'Inner551','Inner550', 'Inner545']

colors = [ 'rgba(144,174,178,1)','rgba(100,199,250,1)','rgba(150,230,200,1)',
          'rgba(55,81,77,1)','rgba(100,121,124,1)', 'rgba(200,200,100,1)',
          'rgba(220,100,100,1)','rgba(180,132,100,1)','rgba(58,151,110,1) ',
          'rgba(94,109,174,1)','rgba(182,89,76,1)']

clade_color_d = {clade:color for clade, color in zip(clade_names, colors)}

for name in clade_names:
    non_term_namelst = [cl.name for cl in csra_nj_tree.get_nonterminals()]
    if name not in non_term_namelst:
        print(name)
        

# make dict {root_clade_name:all_nonterminal child nodes}
in_node_d = {}
for clade in csra_nj_tree.get_nonterminals():
    if clade.name in clade_names:
        in_node_d[clade.name] = [cl.name for cl in clade.get_nonterminals()]

in_node_color_d = {}
for key in in_node_d:
    if key in clade_color_d.keys():
        in_node_color_d[clade_color_d[key]] = in_node_d[key]
        
# get terminals of each clade
non_term_clades = {cl.name:cl for cl in csra_nj_tree.get_nonterminals()}
clade_leaves = {}
clade_colors = {}
for clade, color in zip(clade_names, colors):
    clade_leaves[clade] = [cl.name for cl in non_term_clades.get(clade).get_terminals()]
    clade_colors[color] = [cl.name for cl in non_term_clades.get(clade).get_terminals()]

#Phylo.write(csra_nj_tree,'csrA_trimmed99_njtree.nwk', format='newick')

fig = jw_draw_tree.create_plotly_tree_t(csra_nj_tree, t_nodes_color_dict=clade_colors, 
                                        height=1500, i_node_color_dict=in_node_color_d)
fig = go.Figure(fig)
pyo.plot(fig)

'temp-plot.html'

In [3]:
pio.write_image(fig, 'colored_clusters.pdf')
fig.write_html('colored_clusters.html')

In [16]:
# make csv cols = proteins, clusters, colors
clade_color_d = {clade:color for clade, color in zip(clade_names, colors)}
clusters = []
proteins = []
colors = []
for cluster in clade_leaves.keys():
    for protein in clade_leaves[cluster]:
        clusters.append(cluster)
        proteins.append(protein)
        colors.append(clade_color_d[cluster])
df = pd.DataFrame()
df['proteins'] = proteins
df['clusters'] = clusters
df['colors'] = colors
df.to_csv('./protein_clusters.csv')
        