## Load in modules

In [1]:
import pandas as pd
import math
from shapely.geometry import Point
from pyproj import Proj, transform
import dendropy
import tqdm
import json

import matplotlib.pyplot as plt
from matplotlib.colors import LinearSegmentedColormap, Normalize
import matplotlib.patches as mpatches
from matplotlib import cm


## Process phylogenetic tree

In [None]:
treefile = '../AL_loop_files/annotated_output/annotated_tree.nexus'
tree = dendropy.Tree.get(path=mcc_treefile, schema='nexus')

## name internal nodes
count = 0
for node in tree.preorder_node_iter():
    count += 1
    node.label = count
    
## store information from tree
edge_info = []
for edge in tqdm.tqdm(tree.postorder_edge_iter()):
    if edge.tail_node:
        head_node = edge.tail_node
        head_height = float(head_node.annotations['height'].value)
        
        tail_node = edge.head_node
        tail_height = float(tail_node.annotations['height'].value)
                                
        head_location = None
        tail_location = None

        head_ltla_amb_dists = {}
        tail_ltla_amb_dists = {}
        
        edge_info.append(
            {
                'head_node': head_node.label,
                'tail_node': tail_node.label,
                'length': edge.length,
                'head_location': head_location,
                'tail_location': tail_location,
                'head_height': head_height,
                'tail_height': tail_height
            }
        )


532it [00:24, 21.27it/s]

In [132]:
tree_edge_df = pd.DataFrame(edge_info)
tree_edge_df


Unnamed: 0,head_node,tail_node,length,geo_distance,head_lat_3395,head_long_3395,tail_lat_3395,tail_long_3395,head_lat_4326,head_long_4326,...,head_ltla,head_ltla_amb,tail_ltla,tail_ltla_amb,head_pc,tail_pc,head_date,head_dec_date,tail_date,tail_dec_date
0,2,3,0.015732,12.997507,6.669138e+06,-25058.272272,6.661902e+06,-44630.806402,51.45725531929582,-0.22510228975326144,...,E09000032,False,E07000213,False,SW15,TW16,2021-11-19,2021.882898,2021-11-25,2021.898630
1,7,8,0.012987,5.521659,6.684455e+06,-6786.849197,6.675715e+06,-5331.218560,51.54313851233109,-0.06096730364605962,...,E09000012,False,E09000028,False,E8,SE16,2021-12-01,2021.915780,2021-12-05,2021.928767
2,7,9,0.002028,4.714154,6.684455e+06,-6786.849197,6.692001e+06,-6150.981362,51.54313851233109,-0.06096730364605962,...,E09000012,False,E09000014,False,E8,N17,2021-12-01,2021.915780,2021-12-02,2021.917808
3,6,7,0.018557,12.443570,6.666111e+06,-14663.809979,6.684455e+06,-6786.849197,51.440268162594215,-0.13172724627236,...,E09000022,False,E09000012,False,SW2,E8,2021-11-24,2021.897223,2021-12-01,2021.915780
4,6,10,0.039763,28.726557,6.666111e+06,-14663.809979,6.620275e+06,-16813.461133,51.440268162594215,-0.13172724627236,...,E09000022,False,E07000211,False,SW2,RH6,2021-11-24,2021.897223,2021-12-09,2021.936986
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1419,1419,1425,0.005012,3.453906,6.677134e+06,-22946.929024,6.675458e+06,-28240.882460,51.50210823642505,-0.20613577065995106,...,E09000020,False,E09000018,False,W14,W4,2021-12-04,2021.923755,2021-12-05,2021.928767
1420,1417,1419,0.017300,2.952364,6.678773e+06,-18491.834367,6.677134e+06,-22946.929024,51.51129884578125,-0.16611497443124973,...,E09000033,False,E09000020,False,W2,W14,2021-11-27,2021.906455,2021-12-04,2021.923755
1421,1347,1417,0.003675,2.943354,6.677966e+06,-23156.617808,6.678773e+06,-18491.834367,51.50677606244757,-0.20801943705340428,...,E09000020,False,E09000033,False,W11,W2,2021-11-26,2021.902780,2021-11-27,2021.906455
1422,19,1347,0.020887,3.030715,6.673623e+06,-20969.632321,6.677966e+06,-23156.617808,51.48241880422508,-0.1883734121661239,...,E09000020,False,E09000020,False,SW10,W11,2021-11-18,2021.881893,2021-11-26,2021.902780


In [225]:
tree_edge_df.to_csv('../AL_loop_files/annotated_tree_df.tsv',
                   sep='\t', index=False)


In [226]:
tree.write(path='node_labelled.mcc.tree', schema='newick')
