In [1]:
import sys

import matplotlib.pyplot as plt
from collections import Counter
from collections import defaultdict
import numpy as np
import datetime as dt
import geopandas as gpd
import dendropy
import csv
from shapely.geometry import Point
import matplotlib.path as mpath
import matplotlib.patches as mpatches
from pyproj import Proj,transform
import tqdm
import pandas as pd
from matplotlib import cm
import matplotlib
import matplotlib.ticker as plticker
import os
from math import radians, cos, sin, asin, sqrt




In [2]:
uk_map = gpd.read_file("../../Data/UK_geog_data/UK_map.json")
uk_map = uk_map.to_crs("EPSG:3395")
england = uk_map.loc[uk_map["NAME_1"] == "England"]

utla_map = gpd.read_file("../../Data/UK_geog_data/UTLA_administrative_areas.json")
utla_map = utla_map.to_crs("EPSG:3395")
england_utla = utla_map.loc[utla_map['CODE'].str.startswith("E")]

pc_map = gpd.read_file("../../Data/UK_geog_data/England_postcode_districts.json")
pc_map.crs = "epsg:27700"
pc_map = pc_map.to_crs("epsg:3395")

inProj = Proj(init='epsg:4326')
outProj = Proj(init='epsg:3395')



  return _prepare_from_string(" ".join(pjargs))
  projstring = _prepare_from_string(" ".join((projstring, projkwargs)))
  return _prepare_from_string(" ".join(pjargs))
  projstring = _prepare_from_string(" ".join((projstring, projkwargs)))


In [4]:
def true_date(date):
    
    year = str(date).split(".")[0]
    decimal = f'0.{str(date).split(".")[1]}'
    
    if year == '2020':
        day_no = float(decimal) * 366
    else:
        day_no = float(decimal) * 365
        
    delta = dt.timedelta(day_no)
    
    start = dt.date(year=int(year), month=1, day=1)
    tr_date = start + delta
    
    return str(tr_date)

def decimal_date(date):
    
    if type(date) == str:
        date = dt.datetime.strptime(date,"%Y-%m-%d").date()
    year = date.year
    if year == 2020:
        div = 366
    else:
        div = 365
    start = dt.date(year=year, month=1, day=1)
    decimal = (date - start).days/div
    
    dec_date = year + decimal
    return dec_date

## Process MCC trees

In [5]:
def make_mcc_tree_file(mcc_file, lineage_name,most_recent_sample,outdir):

    tree = dendropy.Tree.get(path=mcc_file, schema="nexus")
    
    #first, name all of the nodes - currently all internal nodes don't have name
    count = 0
    for node in tree.preorder_node_iter():
        count += 1
        node.label = count
    
    #then extract info from the tree
    #NB head is child of tail, head is end going forwards in time
    edge_dict = defaultdict(dict)
    write_file = os.path.join(outdir, f'{lineage_name}_mcc_data.csv')
    with open(write_file, 'w') as fw:
        writer = csv.DictWriter(fw, fieldnames=["node1","node2","length","start_long_3395","start_lat_3395","end_lat_3395", "end_long_3395", "start_lat_4326", "start_long_4326","end_long_4326","end_lat_4326", "start_adm2", "end_adm2", "start_pc", "end_pc", "start_utla", "end_utla", "start_code", "end_code", "start_dec_date", "end_dec_date", "start_real_date", "end_real_date"])
        writer.writeheader()
        
        count = 0
        for edge in tqdm.tqdm(tree.postorder_edge_iter()):
            write_dict = {}
            if edge.tail_node:
                count += 1
                edge_dict[count] = {}
                node1 = edge.tail_node
                node2 = edge.head_node
                length = edge.length
                original_start_lat = node1.annotations['coordinates1'].value
                original_start_long = node1.annotations['coordinates2'].value
                original_end_lat = node2.annotations['coordinates1'].value
                original_end_long = node2.annotations['coordinates2'].value
                start_dec_date = most_recent_sample - float(node1.annotations['height'].value)
                end_dec_date = most_recent_sample - float(node2.annotations['height'].value)
                start_real_date = true_date(start_dec_date)
                end_real_date = true_date(end_dec_date)
                
                start_long,start_lat = transform(inProj,outProj, original_start_long, original_start_lat)
                end_long, end_lat = transform(inProj,outProj, original_end_long, original_end_lat)
                
                start_loc = None
                end_loc = None
                start_point = Point(float(start_long),float(start_lat))
                end_point = Point(float(end_long),float(end_lat))
                for location, polygon in zip(england["Multi_loc"], england["geometry"]):
                    if polygon.contains(start_point):
                        start_loc = location
                    if polygon.contains(end_point):
                        end_loc = location
                    if start_loc and end_loc:
                        break
                        
                start_pc = None
                end_pc = None
                for location, polygon in zip(pc_map["PostDist"], pc_map["geometry"]):
                    if polygon.contains(start_point):
                        start_pc = location
                    if polygon.contains(end_point):
                        end_pc = location
                    if start_pc and end_pc:
                        break
                        
                start_utla = None
                end_utla = None
                start_code = None
                end_code = None
                for location, polygon, code in zip(england_utla["FILE_NAME"], england_utla["geometry"], england_utla["CODE"]):
                    if polygon.contains(start_point):
                        start_utla = location
                        start_code = code
                    if polygon.contains(end_point):
                        end_utla = location
                        end_code = code
                    if start_utla and end_utla:
                        break
                        
                if start_utla == "GREATER_LONDON_AUTHORITY":
                    start_code="E13000001|E13000002"
                if end_utla == "GREATER_LONDON_AUTHORITY":
                    end_code="E13000001|E13000002"
                

                write_dict["node1"] = node1.label
                write_dict['node2'] = node2.label
                write_dict["length"] = length
                write_dict["start_lat_3395"] = start_lat
                write_dict["start_long_3395"] = start_long
                write_dict["end_lat_3395"] = end_lat
                write_dict["end_long_3395"] = end_long
                write_dict["start_lat_4326"] = original_start_lat
                write_dict["start_long_4326"] = original_start_long
                write_dict["end_lat_4326"] = original_end_lat
                write_dict["end_long_4326"] = original_end_long
                write_dict["start_adm2"] = start_loc
                write_dict["end_adm2"] = end_loc
                write_dict["start_pc"] = start_pc
                write_dict["end_pc"] = end_pc
                write_dict["start_dec_date"] = start_dec_date
                write_dict["end_dec_date"] = end_dec_date
                write_dict["start_real_date"] = start_real_date
                write_dict["end_real_date"] = end_real_date
                write_dict["start_utla"] = start_utla
                write_dict["end_utla"] = end_utla
                write_dict["start_code"] = start_code
                write_dict["end_code"] = end_code
                
                writer.writerow(write_dict)

                
def get_most_recent_tips(mrts, input_dir):
    for file in tqdm.tqdm(os.listdir(input_dir)):
        if file.endswith("csv"):
            name = file.strip(".csv")
            dates = []
            with open(os.path.join(input_dir,file)) as f:
                data = csv.DictReader(f)
                for l in data:
                    seq_name = l['name']
                    date = decimal_date(seq_name.split("|")[1])
                    dates.append(date)
                    
            mrt = max(dates)
            mrts[name] = mrt
    return mrts
          

In [6]:
mrts = {}
mrts = get_most_recent_tips(mrts, "../input_files/small_lineages/jclusterfunk_lists/")
mrts = get_most_recent_tips(mrts,"../input_files/large_lineages/")

100%|██████████| 280/280 [00:00<00:00, 390.73it/s]
100%|██████████| 21/21 [00:00<00:00, 29.86it/s]


In [9]:
unwanted_mccs = ["0_64", "1_218", "2_437", "0_74", "2_431", "0_204", "0_316", "0_75", "1_357", "1_460", "0_421", "0_266", "0_335", "0_342", "0_381", "0_103", "0_355", "1_10", "0_260", "0_372", "0_261", "0_34"]
#these did not converge properly due to lack of data and so have been removed from the analysis

overall_dict = defaultdict(dict)
for mcc_file in os.listdir("../results/small_lineages/mcc_trees/"):
    if mcc_file.endswith(".mcc"):
        name = mcc_file.strip(".mcc")
        print(name)
        if name not in unwanted_mccs:
            mcc_file = f'../results/small_lineages/mcc_trees/{mcc_file}'
            make_mcc_tree_file(mcc_file, name,mrts[name], "../results/MCC_files_all/")


1it [00:00,  5.66it/s]

0_101


11it [00:01,  6.71it/s]
1it [00:00,  7.05it/s]

0_103
0_105


11it [00:01,  7.53it/s]
0it [00:00, ?it/s]

0_107


47it [00:07,  6.30it/s]
1it [00:00,  6.10it/s]

0_108


21it [00:03,  6.43it/s]
1it [00:00,  5.34it/s]

0_110


9it [00:01,  6.65it/s]
0it [00:00, ?it/s]

0_126


21it [00:03,  6.53it/s]
1it [00:00,  5.50it/s]

0_131


7it [00:01,  6.68it/s]
1it [00:00,  6.22it/s]

0_137


11it [00:01,  6.61it/s]
1it [00:00,  6.40it/s]

0_138


9it [00:01,  6.37it/s]
1it [00:00,  5.83it/s]

0_139


21it [00:03,  6.14it/s]
1it [00:00,  6.46it/s]

0_148


29it [00:04,  6.63it/s]
1it [00:00,  6.64it/s]

0_149


55it [00:08,  6.49it/s]
1it [00:00,  6.21it/s]

0_150


9it [00:01,  6.84it/s]


0_151


265it [00:38,  6.86it/s]
0it [00:00, ?it/s]

0_152


135it [00:19,  6.77it/s]
0it [00:00, ?it/s]

0_154


71it [00:11,  6.18it/s]
1it [00:00,  6.22it/s]

0_156


31it [00:04,  6.66it/s]
0it [00:00, ?it/s]

0_160


57it [00:08,  6.84it/s]
1it [00:00,  6.75it/s]

0_163


17it [00:02,  7.43it/s]
1it [00:00,  5.91it/s]

0_177


33it [00:04,  7.00it/s]
1it [00:00,  5.76it/s]

0_185


7it [00:00,  7.34it/s]
1it [00:00,  7.36it/s]

0_187


25it [00:03,  6.63it/s]
1it [00:00,  5.45it/s]

0_189


9it [00:01,  7.38it/s]
1it [00:00,  7.09it/s]

0_191


53it [00:07,  6.69it/s]
1it [00:00,  6.18it/s]

0_201


11it [00:01,  7.25it/s]
1it [00:00,  6.35it/s]

0_204
0_207


17it [00:02,  6.84it/s]
0it [00:00, ?it/s]

0_210


65it [00:09,  6.61it/s]
1it [00:00,  6.23it/s]

0_212


11it [00:01,  7.18it/s]
1it [00:00,  5.68it/s]

0_216


11it [00:01,  6.84it/s]
1it [00:00,  6.37it/s]

0_217


33it [00:05,  6.45it/s]
1it [00:00,  6.59it/s]

0_218


13it [00:02,  6.29it/s]
0it [00:00, ?it/s]

0_219


13it [00:01,  6.78it/s]
0it [00:00, ?it/s]

0_222


209it [00:29,  7.07it/s]
1it [00:00,  6.29it/s]

0_227


39it [00:05,  6.92it/s]
0it [00:00, ?it/s]

0_228


107it [00:16,  6.65it/s]
1it [00:00,  6.39it/s]

0_230


17it [00:02,  6.63it/s]
1it [00:00,  6.36it/s]

0_231


49it [00:07,  6.77it/s]
1it [00:00,  7.60it/s]

0_232


69it [00:10,  6.90it/s]
1it [00:00,  6.29it/s]

0_235


15it [00:02,  6.85it/s]
1it [00:00,  6.38it/s]

0_237


9it [00:01,  6.52it/s]
1it [00:00,  5.74it/s]

0_238


9it [00:01,  7.34it/s]


0_239


2825it [06:52,  6.84it/s]
1it [00:00,  5.78it/s]

0_24


13it [00:02,  6.20it/s]
1it [00:00,  6.34it/s]

0_243


11it [00:01,  6.82it/s]
1it [00:00,  7.47it/s]

0_245


17it [00:02,  7.45it/s]
1it [00:00,  7.42it/s]

0_247


11it [00:01,  8.33it/s]


0_248


1609it [04:02,  6.64it/s]
1it [00:00,  5.43it/s]

0_253


11it [00:01,  6.49it/s]
0it [00:00, ?it/s]

0_256


71it [00:10,  6.60it/s]
0it [00:00, ?it/s]

0_257


59it [00:08,  6.57it/s]
1it [00:00,  6.43it/s]

0_258


33it [00:05,  5.83it/s]


0_259


383it [01:00,  6.36it/s]
1it [00:00,  6.93it/s]

0_260
0_261
0_262


21it [00:02,  7.04it/s]
1it [00:00,  5.71it/s]

0_263


23it [00:03,  6.50it/s]
1it [00:00,  5.70it/s]

0_264


9it [00:01,  6.78it/s]
1it [00:00,  6.62it/s]

0_266
0_275


11it [00:01,  7.10it/s]
1it [00:00,  5.75it/s]

0_286


23it [00:03,  5.92it/s]
1it [00:00,  6.14it/s]

0_287


9it [00:01,  7.03it/s]
0it [00:00, ?it/s]

0_288


115it [00:16,  7.03it/s]


0_293


515it [01:17,  6.63it/s]
0it [00:00, ?it/s]

0_297


141it [00:21,  6.47it/s]
0it [00:00, ?it/s]

0_303


165it [00:24,  6.62it/s]


0_304


1129it [02:51,  6.57it/s]
1it [00:00,  6.38it/s]

0_305


25it [00:03,  6.34it/s]


0_309


445it [01:06,  6.72it/s]
1it [00:00,  6.06it/s]

0_310


9it [00:01,  7.71it/s]
0it [00:00, ?it/s]

0_313


85it [00:11,  7.33it/s]
0it [00:00, ?it/s]

0_316
0_321


149it [00:22,  6.62it/s]
1it [00:00,  6.16it/s]

0_327


11it [00:01,  6.97it/s]
1it [00:00,  6.05it/s]

0_329


9it [00:01,  7.24it/s]
1it [00:00,  6.58it/s]

0_330


13it [00:01,  7.27it/s]
1it [00:00,  6.76it/s]

0_332


19it [00:02,  7.01it/s]


0_333


287it [00:41,  6.87it/s]
1it [00:00,  5.94it/s]

0_335
0_341


11it [00:01,  7.05it/s]
1it [00:00,  6.45it/s]

0_342
0_347


21it [00:03,  6.62it/s]
1it [00:00,  5.47it/s]

0_348


9it [00:01,  7.09it/s]
1it [00:00,  6.74it/s]

0_35


11it [00:01,  7.46it/s]
1it [00:00,  5.25it/s]

0_351


9it [00:01,  6.53it/s]
1it [00:00,  6.59it/s]

0_352


9it [00:01,  7.49it/s]
1it [00:00,  5.54it/s]

0_355
0_356


17it [00:02,  6.21it/s]
0it [00:00, ?it/s]

0_358


49it [00:08,  5.85it/s]
0it [00:00, ?it/s]

0_359


49it [00:08,  6.10it/s]
0it [00:00, ?it/s]

0_366


87it [00:13,  6.54it/s]


0_368


415it [01:07,  6.15it/s]
1it [00:00,  6.07it/s]

0_370


21it [00:03,  6.79it/s]
1it [00:00,  6.22it/s]

0_372
0_373


17it [00:02,  6.67it/s]
1it [00:00,  6.19it/s]

0_374


9it [00:01,  6.93it/s]


0_378


743it [01:50,  6.75it/s]
1it [00:00,  5.85it/s]

0_379


11it [00:01,  7.24it/s]
0it [00:00, ?it/s]

0_381
0_384


59it [00:09,  6.49it/s]
1it [00:00,  5.98it/s]

0_387


27it [00:03,  6.87it/s]
0it [00:00, ?it/s]

0_388


91it [00:14,  6.43it/s]
1it [00:00,  6.45it/s]

0_391


27it [00:03,  6.99it/s]
1it [00:00,  6.12it/s]

0_393


33it [00:05,  6.11it/s]
1it [00:00,  6.11it/s]

0_398


11it [00:01,  6.40it/s]
1it [00:00,  6.74it/s]

0_40


15it [00:02,  6.63it/s]
1it [00:00,  7.00it/s]

0_400


33it [00:04,  6.61it/s]
1it [00:00,  6.56it/s]

0_402


11it [00:01,  6.56it/s]
1it [00:00,  6.95it/s]

0_405


11it [00:01,  7.20it/s]


0_407


483it [01:23,  5.76it/s]
1it [00:00,  7.16it/s]

0_410


15it [00:01,  7.88it/s]
1it [00:00,  7.25it/s]

0_415


9it [00:01,  7.45it/s]
1it [00:00,  6.71it/s]

0_418


11it [00:01,  7.67it/s]


0_421
0_51


327it [00:48,  6.77it/s]
1it [00:00,  6.89it/s]

0_56


37it [00:05,  7.33it/s]
0it [00:00, ?it/s]

0_61


77it [00:11,  6.74it/s]
0it [00:00, ?it/s]

0_62


89it [00:12,  7.14it/s]
1it [00:00,  6.19it/s]

0_64
0_70


19it [00:02,  6.68it/s]
0it [00:00, ?it/s]

0_71


31it [00:04,  6.47it/s]
1it [00:00,  7.09it/s]

0_73


9it [00:01,  7.82it/s]
1it [00:00,  7.10it/s]

0_74
0_75
0_76


51it [00:07,  6.93it/s]
0it [00:00, ?it/s]

0_77


49it [00:07,  6.97it/s]
1it [00:00,  7.48it/s]

0_78


13it [00:01,  7.27it/s]


0_79


1283it [03:28,  6.17it/s]
1it [00:00,  5.97it/s]

0_81


37it [00:05,  6.49it/s]
1it [00:00,  6.62it/s]

0_88


15it [00:02,  7.02it/s]
1it [00:00,  6.01it/s]

0_89


21it [00:03,  6.43it/s]


0_95


397it [00:56,  6.99it/s]


0_97


267it [00:38,  7.02it/s]
1it [00:00,  6.02it/s]

0_98


31it [00:04,  6.79it/s]
1it [00:00,  6.34it/s]

0_99


9it [00:01,  7.90it/s]


1_10
1_105


457it [01:11,  6.35it/s]
0it [00:00, ?it/s]

1_11


15it [00:02,  5.06it/s]
1it [00:00,  7.44it/s]

1_122


15it [00:01,  7.73it/s]
0it [00:00, ?it/s]

1_127


101it [00:14,  6.77it/s]


1_142


269it [00:43,  6.24it/s]
1it [00:00,  5.58it/s]

1_145


9it [00:01,  6.09it/s]
1it [00:00,  6.87it/s]

1_15


9it [00:01,  7.64it/s]
1it [00:00,  6.10it/s]

1_159


9it [00:01,  7.13it/s]
1it [00:00,  6.43it/s]

1_171


53it [00:07,  6.89it/s]
0it [00:00, ?it/s]

1_180


171it [00:26,  6.55it/s]
1it [00:00,  6.17it/s]

1_187


23it [00:03,  6.78it/s]
1it [00:00,  6.34it/s]

1_188


11it [00:01,  7.40it/s]
1it [00:00,  6.39it/s]

1_189


7it [00:00,  7.42it/s]
1it [00:00,  6.76it/s]

1_194


21it [00:02,  7.23it/s]
0it [00:00, ?it/s]

1_218
1_222


15it [00:02,  6.34it/s]
1it [00:00,  5.91it/s]

1_226


19it [00:02,  6.64it/s]
0it [00:00, ?it/s]

1_240


89it [00:12,  6.85it/s]
1it [00:00,  6.33it/s]

1_243


11it [00:01,  7.22it/s]
1it [00:00,  6.32it/s]

1_244


45it [00:06,  6.81it/s]
1it [00:00,  7.41it/s]

1_259


41it [00:06,  6.44it/s]


1_26


2315it [05:49,  6.62it/s]
0it [00:00, ?it/s]

1_271


71it [00:10,  6.59it/s]


1_277


535it [01:20,  6.68it/s]
0it [00:00, ?it/s]

1_278


87it [00:13,  6.65it/s]
1it [00:00,  6.55it/s]

1_283


11it [00:01,  7.50it/s]
1it [00:00,  6.21it/s]

1_29


9it [00:01,  7.15it/s]
1it [00:00,  6.17it/s]

1_300


9it [00:01,  7.50it/s]
1it [00:00,  6.42it/s]

1_302


41it [00:06,  6.83it/s]
1it [00:00,  6.25it/s]

1_303


7it [00:00,  8.00it/s]
0it [00:00, ?it/s]

1_304


231it [00:35,  6.59it/s]


1_306


391it [00:58,  6.71it/s]


1_308


597it [01:25,  6.94it/s]
1it [00:00,  6.78it/s]

1_309


13it [00:01,  7.33it/s]
1it [00:00,  6.73it/s]

1_310


61it [00:09,  6.55it/s]


1_311


511it [01:16,  6.66it/s]
1it [00:00,  6.78it/s]

1_312


13it [00:01,  6.99it/s]


1_314


423it [01:02,  6.75it/s]
0it [00:00, ?it/s]

1_315


23it [00:04,  5.70it/s]
1it [00:00,  6.83it/s]

1_316


31it [00:04,  6.37it/s]
0it [00:00, ?it/s]

1_319


111it [00:16,  6.59it/s]
0it [00:00, ?it/s]

1_321


221it [00:32,  6.82it/s]
1it [00:00,  5.79it/s]

1_323


29it [00:04,  6.72it/s]
1it [00:00,  5.76it/s]

1_327


9it [00:01,  7.41it/s]
1it [00:00,  6.26it/s]

1_335


33it [00:05,  6.01it/s]
1it [00:00,  6.56it/s]

1_336


27it [00:04,  6.45it/s]
0it [00:00, ?it/s]

1_337


109it [00:17,  6.33it/s]
1it [00:00,  6.16it/s]

1_34


7it [00:00,  7.42it/s]
1it [00:00,  6.04it/s]

1_344


7it [00:01,  6.90it/s]


1_347


297it [00:40,  7.33it/s]
1it [00:00,  6.01it/s]

1_35


17it [00:02,  6.69it/s]
0it [00:00, ?it/s]

1_350


109it [00:15,  7.01it/s]
1it [00:00,  7.13it/s]

1_351


15it [00:02,  6.87it/s]
1it [00:00,  7.25it/s]

1_352


41it [00:05,  7.05it/s]
1it [00:00,  6.06it/s]

1_353


29it [00:04,  6.68it/s]
0it [00:00, ?it/s]

1_356


125it [00:16,  7.60it/s]
1it [00:00,  6.20it/s]

1_357
1_364


11it [00:01,  6.48it/s]
0it [00:00, ?it/s]

1_366


149it [00:22,  6.58it/s]


1_367


149it [00:21,  6.90it/s]


1_369


297it [00:42,  6.93it/s]
1it [00:00,  7.33it/s]

1_372


23it [00:03,  7.29it/s]
1it [00:00,  6.39it/s]

1_385


27it [00:04,  6.74it/s]
1it [00:00,  6.42it/s]

1_389


21it [00:02,  7.02it/s]
0it [00:00, ?it/s]

1_418


39it [00:06,  5.72it/s]
1it [00:00,  6.54it/s]

1_43


33it [00:04,  6.80it/s]
1it [00:00,  7.29it/s]

1_435


13it [00:01,  8.00it/s]
1it [00:00,  6.07it/s]

1_438


11it [00:01,  7.16it/s]
1it [00:00,  6.13it/s]

1_459


11it [00:01,  7.24it/s]
1it [00:00,  6.36it/s]

1_460
1_463


11it [00:01,  7.26it/s]
0it [00:00, ?it/s]

1_47


37it [00:05,  6.79it/s]
1it [00:00,  6.04it/s]

1_484


17it [00:02,  6.91it/s]
0it [00:00, ?it/s]

1_485


85it [00:12,  6.77it/s]
1it [00:00,  6.37it/s]

1_487


13it [00:01,  6.94it/s]
1it [00:00,  6.62it/s]

1_5


47it [00:07,  6.70it/s]
1it [00:00,  6.76it/s]

1_52


41it [00:06,  6.83it/s]


1_56


621it [01:31,  6.76it/s]
1it [00:00,  5.46it/s]

1_57


11it [00:01,  6.62it/s]
1it [00:00,  6.18it/s]

1_59


9it [00:01,  7.09it/s]
1it [00:00,  6.81it/s]

1_6


59it [00:09,  6.12it/s]
0it [00:00, ?it/s]

1_73


7it [00:01,  5.68it/s]


1_90


423it [01:02,  6.72it/s]
1it [00:00,  5.80it/s]

2_111


9it [00:01,  6.94it/s]
1it [00:00,  5.77it/s]

2_117


25it [00:04,  6.24it/s]
1it [00:00,  5.95it/s]

2_118


11it [00:01,  7.21it/s]
1it [00:00,  6.53it/s]

2_119


5it [00:00,  8.03it/s]
1it [00:00,  6.22it/s]

2_129


7it [00:00,  7.26it/s]
1it [00:00,  6.34it/s]

2_130


9it [00:01,  7.38it/s]


2_167


1839it [04:41,  6.53it/s]
1it [00:00,  6.70it/s]

2_170


43it [00:06,  6.48it/s]
1it [00:00,  6.37it/s]

2_171


15it [00:02,  6.79it/s]
1it [00:00,  6.37it/s]

2_176


9it [00:01,  7.70it/s]
1it [00:00,  6.13it/s]

2_183


33it [00:05,  6.55it/s]
0it [00:00, ?it/s]

2_218


45it [00:07,  6.35it/s]
1it [00:00,  6.17it/s]

2_234


17it [00:02,  6.47it/s]
0it [00:00, ?it/s]

2_249


39it [00:07,  5.22it/s]
0it [00:00, ?it/s]

2_252


3it [00:00,  6.05it/s]
0it [00:00, ?it/s]

2_265


21it [00:03,  5.92it/s]
1it [00:00,  6.63it/s]

2_267


19it [00:02,  6.68it/s]
1it [00:00,  5.78it/s]

2_276


9it [00:01,  7.18it/s]
1it [00:00,  6.41it/s]

2_279


9it [00:01,  7.71it/s]
1it [00:00,  6.12it/s]

2_300


5it [00:00,  8.23it/s]
1it [00:00,  6.36it/s]

2_307


13it [00:01,  6.87it/s]
1it [00:00,  6.13it/s]

2_308


27it [00:04,  5.80it/s]


2_309


399it [01:03,  6.32it/s]
1it [00:00,  7.10it/s]

2_320


17it [00:02,  7.77it/s]
1it [00:00,  6.24it/s]

2_323


9it [00:01,  6.35it/s]
0it [00:00, ?it/s]

2_332


245it [00:37,  6.49it/s]


2_335


257it [00:37,  6.79it/s]
1it [00:00,  7.16it/s]

2_34


9it [00:01,  8.21it/s]


2_340


27it [00:04,  6.72it/s]
1it [00:00,  6.38it/s]

2_342


9it [00:01,  6.76it/s]
1it [00:00,  5.50it/s]

2_344


7it [00:00,  7.52it/s]
1it [00:00,  6.70it/s]

2_345


19it [00:02,  6.95it/s]
1it [00:00,  6.29it/s]

2_346


11it [00:01,  7.13it/s]
0it [00:00, ?it/s]

2_347


57it [00:08,  6.47it/s]
1it [00:00,  6.22it/s]

2_349


29it [00:04,  7.11it/s]
1it [00:00,  6.25it/s]

2_367


11it [00:01,  7.14it/s]


2_370


301it [00:45,  6.68it/s]
1it [00:00,  6.44it/s]

2_371


9it [00:01,  7.39it/s]


2_396


771it [01:48,  7.08it/s]
0it [00:00, ?it/s]

2_404


233it [00:34,  6.72it/s]
1it [00:00,  6.03it/s]

2_405


21it [00:03,  6.53it/s]
0it [00:00, ?it/s]

2_407


75it [00:11,  6.71it/s]


2_429


267it [00:43,  6.19it/s]
1it [00:00,  7.06it/s]

2_431
2_434


17it [00:02,  7.12it/s]
1it [00:00,  5.27it/s]

2_437
2_439


9it [00:01,  7.11it/s]
0it [00:00, ?it/s]

2_440


65it [00:09,  6.64it/s]
1it [00:00,  5.45it/s]

2_442


11it [00:01,  6.30it/s]
0it [00:00, ?it/s]

2_447


79it [00:11,  6.61it/s]
1it [00:00,  6.64it/s]

2_448


9it [00:01,  7.18it/s]
1it [00:00,  5.68it/s]

2_457


9it [00:01,  7.29it/s]
1it [00:00,  6.27it/s]

2_50


21it [00:03,  6.90it/s]
0it [00:00, ?it/s]

2_518


99it [00:14,  6.76it/s]
1it [00:00,  6.82it/s]

2_523


9it [00:01,  6.87it/s]


2_530


427it [01:00,  7.06it/s]
1it [00:00,  5.77it/s]

2_543


11it [00:01,  6.37it/s]
1it [00:00,  6.50it/s]

2_61


43it [00:06,  6.56it/s]


2_88


799it [02:08,  6.24it/s]
1it [00:00,  6.64it/s]

2_89


27it [00:04,  6.59it/s]


In [None]:
for mcc_file in os.listdir("../results/large_lineages/"):
    if mcc_file.endswith(".mcc"):
        name = mcc_file.strip(".mcc")
        print(name)
        mcc_file = f'../results/large_lineages/{mcc_file}'
        make_mcc_tree_file(mcc_file, name, mrts[name], "../results/MCC_files_all/")

  start_long,start_lat = transform(inProj,outProj, original_start_long, original_start_lat)
  end_long, end_lat = transform(inProj,outProj, original_end_long, original_end_lat)
1336it [00:44, 28.90it/s]

## Process distances

In [10]:
def calc_distance(long1,lat1,long2,lat2):
    
     
    # radians which converts from degrees to radians.
    long1 = radians(long1)
    long2 = radians(long2)
    lat1 = radians(lat1)
    lat2 = radians(lat2)
      
    # Haversine formula
    dlong = long2 - long1
    dlat = lat2 - lat1
    a = sin(dlat / 2)**2 + cos(lat1) * cos(lat2) * sin(dlong / 2)**2
 
    c = 2 * asin(sqrt(a))
    
    # Radius of earth in kilometers. Use 3956 for miles
    r = 6371
      
    # calculate the result
    return(c * r)
     

In [11]:
#to check the function works like I"m expecting
london = (-0.1257,51.51)
manchester = (-2.244, 53.484)

print(calc_distance(london[0], london[1], manchester[0], manchester[1]))

262.162946805854


In [12]:
for file in os.listdir("../results/MCC_files_all/"):
    if file.endswith(".csv"):
        name = "_".join(file.strip(".csv").split("_")[0:2])
        
        fw = open(f"../results/movements/{name}_movements.csv", 'w')
        fw.write(f"date,distance\n")
        with open(os.path.join("../results/MCC_files_all/", file)) as f:
            data = csv.DictReader(f)
            for line in data:
                if line["start_pc"] != line["end_pc"]:
            
                    start = (float(line['start_long_4326']),float(line["start_lat_4326"]))
                    end = (float(line['end_long_4326']),float(line["end_lat_4326"]))

                    distance = calc_distance(start[0],start[1],end[0],end[1])
                    
                    fw.write(f'{line["start_real_date"]},{distance}\n')
                    
        fw.close()
                    
                    
