In [None]:
from Bio.Phylo.TreeConstruction import DistanceCalculator
from Bio.Phylo.TreeConstruction import DistanceTreeConstructor
from Bio import AlignIO
from Bio import Phylo

In [None]:
#### import alignment ##############
AlignmentFile = 'rplB.afa' # an alignemnt of rplB sequences, obtained by MUSCLE
aln = AlignIO.read(AlignmentFile, 'fasta')
print (aln)

In [None]:
###### distnances between aligned sequences
calculator = DistanceCalculator('blosum62')
dm = calculator.get_distance(aln)
#print (dm)

In [None]:
#### joining nodes to make a tree #####
constructor = DistanceTreeConstructor(calculator, 'nj')
tree = constructor.build_tree(aln)



In [5]:
#renaming things so I can make a nice annotation file for graphlan
alphas = ['Rhodopseudomonas_palustris', 'Agrobacterium_tumefaciens', 
         'Acidiphilium_cryptum', 'Erythrobacter_HL-111', 'Paracoccus_denitrificans',
         'Roseibaca_HL-91']
alpha_node = tree.common_ancestor(alphas)
alpha_node.name = "Alpha-proteobacteria"

bacilli = ['Bacillus_subtilis_NCIB3610', 'Bacillus_subtilis_168', 'Bacillus_cereus',
          'Listeria_monocytogenes', 'Paenibacillus_polymyxa', 
          'Streptococcus_agalactiae', 'Lactobacillus_casei']
bacilli_node = tree.common_ancestor(bacilli)
bacilli_node.name = "Bacilli"

bacteroidetes = ['Bacteroides_thetaiotaomicron', 'Bacteroides_fragilis', 
                'Prevotella_ruminicola', 'Chryseobacterium_indologenes', 
                'Cellulophaga_baltica', 'Algoriphagus_marincola']
bacteroidetes_node = tree.common_ancestor(bacteroidetes)
bacteroidetes_node.name = "Bacteroidetes"

clostridia = ['Clostridium_ljungdahlii', 'Faecalibacterium_prausnitzii', 
             'Coprococcus_comes', 'Ruminococcus_gnavus', 'Dorea_formicigenerans', 
             'Sulfobacillus_thermosulfidooxidans']
clostridia_node = tree.common_ancestor(clostridia)
clostridia_node.name = "Clostridia"

actinos = ['Mycobacterium_smegmatis', 'Rhodococcus_jostii', 'Cellulomonas_gilvus', 
          'Streptomyces_griseorubens', 'Streptomyces_venezuelae', 
          'Micrococcus_luteus', 'Bifidobacterium_bifidum', 'Bifidobacterium_longum']
actinos_node = tree.common_ancestor(actinos)
actinos_node.name = "Actinobacteria"

#it's well known that the beta-protebacteria branch from within the gammas. Because some things that are
#gamma, like Francisella are very distinct from E. coli
gammas = ['Pseudomonas_putida', 'Halomonas_HL-93', 'Halomonas_HL-48', 
         'Citrobacter_freundii', 'Legionella_pneumophila', 
         'Francisella_novicida']
gammas_node = tree.common_ancestor(gammas)
gammas_node.name = "Gamma-proteobacteria"

In [6]:
### now we will write out an annotation file
OutFileName = 'annotation_1_tree.txt'
Handle = open(OutFileName, 'w')

Handle.write("#put a pretty background for my large taxa groups\n")
Handle.write('Bacilli\tannotation\tBacilli\n')
Handle.write('Bacilli\tannotation_background_color\t#0000ff\n')
Handle.write('Alpha-proteobacteria\tannotation\tAlpha-proteobacteria\n')
Handle.write('Alpha-proteobacteria\tannotation_background_color\tr\n')
Handle.write('Bacteroidetes\tannotation\tBacteroidetes\n')
Handle.write('Bacteroidetes\tannotation_background_color\tk\n')
Handle.write('Clostridia\tannotation\tClostridia\n')
Handle.write('Clostridia\tannotation_background_color\tg\n')
Handle.write('Actinobacteria\tannotation\tActinobacteria\n')
Handle.write('Actinobacteria\tannotation_background_color\ty\n')
Handle.write('Gamma-proteobacteria\tannotation\tGamma-proteobacteria\n')
Handle.write('Gamma-proteobacteria\tannotation_background_color\tm\n')


Handle.write('#getting ride of the circles for interior nodes\n')
for node in tree.get_nonterminals():
    #if there start with 'Inner' then we don't want to have a node size
    if node.name[:5] == 'Inner':
        line = '%s\tclade_marker_size\t0\n'%(node.name)
    Handle.write(line)
Handle.close()


In [7]:
OutFileName = "rplB_0.xml" #_0 indicates that this is the first xml file. it will increment as it is annotated by graphlan
Phylo.write(tree, OutFileName, 'phyloxml')

1