# Map of NBC Strains
This notebook utilise folium to map where the strain was collected. Requires GPS coordinates.

In [1]:
# Load Library
import pandas as pd
import folium
from folium.features import DivIcon
#from IPython.display import display, IFrame
import re

In [2]:
# function to convert dms to dd
# https://stackoverflow.com/questions/33997361/how-to-convert-degree-minute-second-to-degree-decimal


def dms2dd(degrees, minutes, seconds, direction):
    dd = float(degrees) + float(minutes)/60 + float(seconds)/(60*60);
    if direction == 'E' or direction == 'N':
        dd *= -1
    return dd;

def dd2dms(deg):
    d = int(deg)
    md = abs(deg - d) * 60
    m = int(md)
    sd = (md - m) * 60
    return [d, m, sd]

def parse_dms(dms):
    parts = re.split('[^\d\w]+', dms)
    lat = dms2dd(parts[0], parts[1], parts[2], parts[3])

    return (lat)

In [3]:
# load data
df_nbc = pd.read_csv('../data/df_strains.csv')
df_nbc.head()

Unnamed: 0,Strain ID,Soil sample name,Country of origin,Color of colony,Colony morphology,WT halo against AB?,WT halo against E. coli?,Frozen on:,Sent through co-ALE?,Sequenced?,...,Bioactive cmpd identified?,Comments,Strain,Description of soil sample,Location,GPS coordinates,Date of collection,Collected by:,Media for Isolation,Soil meta Comments
0,NBC 00001,Jernbanevej,Denmark,white,wrinkly,No,No,,,,...,,,NBC 00001,Backyard where Pep used to live in Lyngby,Lyngby,"55 16' 18"" N, 12 30' 4"" E",2016-07-25,Pep Charusanti,,
1,NBC 00002,Jernbanevej,Denmark,white,wrinkly,reduced ZOI,reduzed ZOI,,Yes,Yes,...,,,NBC 00002,Backyard where Pep used to live in Lyngby,Lyngby,"55 16' 18"" N, 12 30' 4"" E",2016-07-25,Pep Charusanti,,
2,NBC 00003,Lyngby lake,Denmark,white/brown,wrinkly,No,No,10/17/2016,,Yes,...,,,NBC 00003,Under a log near the water,Lyngby Sø,"55 46' 24"" N, 12 29' 34"" E",2016-07-25,Pep Charusanti,,
3,NBC 00005,Lyngby lake,Denmark,"black base, white aerial mycelia",bumpy,No,No,10/17/2016,,Yes,...,,,NBC 00005,Under a log near the water,Lyngby Sø,"55 46' 24"" N, 12 29' 34"" E",2016-07-25,Pep Charusanti,,
4,NBC 00006,Lyngby lake,Denmark,beige/cream,wrinkly,No,No,10/17/2016,Yes,,...,,,NBC 00006,Under a log near the water,Lyngby Sø,"55 46' 24"" N, 12 29' 34"" E",2016-07-25,Pep Charusanti,,


In [4]:
# cleaning gps data into dd format
data = []
for num, i in enumerate(df_nbc['GPS coordinates']):
    try:
        # split df
        parts = re.split('[^\d\w]+', i)
        #print(len(parts) == 4)
        
        # dd format
        if len(parts) == 4:
            gps = [float(str(parts[0])+'.'+str(parts[1])), 
                   float(str(parts[2])+'.'+str(parts[3])),
                  'dd']
            #print(gps)
        
        elif len(parts) == 6:
            gps = [float(str(parts[0])+'.'+str(parts[1])), 
                   float(str(parts[3])+'.'+str(parts[4])),
                  'dd']
            #print(gps, parts[2], parts[-1])
        # dmm
        
        elif len(parts) == 8:
            gps = [(str(parts[0])+'°'+str(parts[1])+"'"+str(parts[2])+" "+str(parts[3])),
                   (str(parts[4])+'°'+str(parts[5])+"'"+str(parts[6])+" "+str(parts[7])),
                  'dms']
        
        elif len(parts) == 10:
            gps = [(str(parts[0])+'°'+str(parts[1])+"'"+str(parts[2])+"."+str(parts[3])+" "+str(parts[4])),
                   (str(parts[5])+'°'+str(parts[6])+"'"+str(parts[7])+"."+str(parts[8])+" "+str(parts[9])),
                  'dms']
        
        elif len(parts) == 12:
            gps = [(str(parts[0])+'°'+str(parts[1])+"'"+str(parts[2])+" "+str(parts[3])),
                   (str(parts[4])+'°'+str(parts[5])+"'"+str(parts[6])+" "+str(parts[7])), 
                  'dms']
        
        else:
            i #print(len(gps[0]))
        #print(gps)
        if gps[2] == 'dms':
            gps[0] = parse_dms(gps[0])
            gps[1] = parse_dms(gps[1])
        #print(gps)
        
        data.append([num, gps[0], gps[1]])
        
    except TypeError:
        num, i
        
# add data to df
df_latlong = pd.DataFrame(data=data, columns=['index', 'lat', 'long']).set_index('index')
df_nbc = df_nbc.merge(df_latlong, left_index=True, right_index=True)
df_nbc.to_csv('../tables/df_cleaned.csv')

In [5]:
# drop data without GPS
df = df_nbc[df_nbc['lat'].notna()]
df

# notes that some of the coordinates in negative are wrong

Unnamed: 0,Strain ID,Soil sample name,Country of origin,Color of colony,Colony morphology,WT halo against AB?,WT halo against E. coli?,Frozen on:,Sent through co-ALE?,Sequenced?,...,Strain,Description of soil sample,Location,GPS coordinates,Date of collection,Collected by:,Media for Isolation,Soil meta Comments,lat,long
0,NBC 00001,Jernbanevej,Denmark,white,wrinkly,No,No,,,,...,NBC 00001,Backyard where Pep used to live in Lyngby,Lyngby,"55 16' 18"" N, 12 30' 4"" E",2016-07-25,Pep Charusanti,,,-55.271667,-12.501111
1,NBC 00002,Jernbanevej,Denmark,white,wrinkly,reduced ZOI,reduzed ZOI,,Yes,Yes,...,NBC 00002,Backyard where Pep used to live in Lyngby,Lyngby,"55 16' 18"" N, 12 30' 4"" E",2016-07-25,Pep Charusanti,,,-55.271667,-12.501111
2,NBC 00003,Lyngby lake,Denmark,white/brown,wrinkly,No,No,10/17/2016,,Yes,...,NBC 00003,Under a log near the water,Lyngby Sø,"55 46' 24"" N, 12 29' 34"" E",2016-07-25,Pep Charusanti,,,-55.773333,-12.492778
3,NBC 00005,Lyngby lake,Denmark,"black base, white aerial mycelia",bumpy,No,No,10/17/2016,,Yes,...,NBC 00005,Under a log near the water,Lyngby Sø,"55 46' 24"" N, 12 29' 34"" E",2016-07-25,Pep Charusanti,,,-55.773333,-12.492778
4,NBC 00006,Lyngby lake,Denmark,beige/cream,wrinkly,No,No,10/17/2016,Yes,,...,NBC 00006,Under a log near the water,Lyngby Sø,"55 46' 24"" N, 12 29' 34"" E",2016-07-25,Pep Charusanti,,,-55.773333,-12.492778
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1565,NBC 01571,Stenbjerg marsh,Denmark,,,,,,,,...,NBC 01571,marsh,Stenbjerg,"56°54'49.2""N 8°22'51.6""E",2021-03-06,Julie Nielsen,,,56.913611,8.380833
1566,NBC 01572,Stenbjerg marsh,Denmark,,,,,,,,...,NBC 01572,marsh,Stenbjerg,"56°54'49.2""N 8°22'51.6""E",2021-03-06,Julie Nielsen,,,56.913611,8.380833
1567,NBC 01573,Stenbjerg marsh,Denmark,,,,,,,,...,NBC 01573,marsh,Stenbjerg,"56°54'49.2""N 8°22'51.6""E",2021-03-06,Julie Nielsen,,,56.913611,8.380833
1568,NBC 01574,Stenbjerg marsh,Denmark,,,,,,,,...,NBC 01574,marsh,Stenbjerg,"56°54'49.2""N 8°22'51.6""E",2021-03-06,Julie Nielsen,,,56.913611,8.380833


In [7]:
# initiate map using a gps location as a starting point
m = folium.Map(location=[55.47166666666667, 10.654444444444445], zoom_start=8, tiles="openstreetmap")

# fill in data points for the map
for num, i in enumerate(data): # need to use the df rather than the raw coordinates
    # uses circles as sample points
    folium.Circle(
        i[1:], popup="<i>needtofillwithsamplenames</i>", radius=2500, color='red', fill=True).add_to(m) # other description can be added later in popup, set size with radius
# save output as html    
m.save('../output/strainmap.html')

# show map
m