In [1]:
import pandas as pd
import numpy as np
import geopandas as gpd

import folium 
import folium.plugins as plugins
import folium.folium as Map

from ipyleaflet import Map, basemaps, basemap_to_tiles,DrawControl,ScaleControl,MarkerCluster,Marker,Polyline,Heatmap,LayersControl,AntPath
from shapely.geometry import Polygon


# Load Metadata From the different ERDDAP Servers

In [85]:
# Load ERDDAP Server tables made by "Create_Cache_Metadata-SelectionTool" and convert to a geodataframe
# CSV lists available as of now (we can easily make this more flexible)
file_list = ['Server_List_catalogue_hakai_org_erddap.csv',
            'Server_List_dap_onc_uvic_ca_erddap.csv',
            'Server_List_data_cioospacific_ca_erddap.csv']

# Combine all the tables into one DataFrame
df = pd.DataFrame(columns=['Dataset ID']) 
for file in file_list:
    df_temp = pd.read_csv(file)
    df = df.append(df_temp)

# Convert to geopandas
gdf = gpd.GeoDataFrame(
    df, geometry=gpd.points_from_xy(df['longitude degrees_east'], df['latitude degrees_north']))

print(str(len(gdf))+' distinct location is associated with data')

84296 distinct location is associated with data


# Select Data Spatially through all the servers and datasets

In [79]:
# Create a map with ipyleaflet for the user to draw a polygon on it
center_map = (45,-127) #Somehwere around the center of BC Pacific Coast

# Start by defining the map
m = Map(basemap=basemaps.Esri.WorldTopoMap,
        center=(center_lat, center_long),
        zoom=3)

# Add controls for the polygon
draw_control = DrawControl()

# Add Heatmap of all the data available
heatmap = Heatmap(name='Data Location HeatMap',
                  locations=gdf[['latitude degrees_north','longitude degrees_east']].values.tolist(),
                  radius=5, blur=2,min_opacity=0.2,max=1)

# Add control and layers to the map
m.add_control(draw_control)
m.add_control(ScaleControl(position='topright')) #Scale on the top right corner
m.add_layer(heatmap)
control = LayersControl(position='bottomright')
m.add_control(control)

# Show Map
print('Draw a polygon on the map!')
m

Draw a polygon on the map!


Map(center=[45, -127], controls=(ZoomControl(options=['position', 'zoom_in_text', 'zoom_in_title', 'zoom_out_t…

In [81]:
# Find datasets in this polygon
# Retrieve the polygon from the map
polygone_matrix = np.asarray(draw_control.last_draw['geometry']['coordinates'])
polygon_region = gpd.GeoSeries({'selected':Polygon(polygone_matrix[0,:,:])})

# Filter only the datasets within that polygon and plot them
gdf = gdf.assign(**{key: gdf.within(geom) for key, geom in polygon_region.items()})
gdf_selected = gdf[gdf['selected']]
print(str(len(gdf[gdf['selected']]))+' datasets are within this polygon!')


157 datasets are within this polygon!


In [82]:
# Add to map above selected data as Marker Cluster or AntPath(>3000 points otherwise ipyleaflet is slow)
if len(gdf_selected)>3000:
    lines = AntPath(name='Selected Data',locations=
                     gdf_selected[['latitude degrees_north','longitude degrees_east']].values.tolist(),
                     dash_array=[1, 10],delay=1000,color='#7590ba',pulse_color='#3f6fba',use='polygon')
    m.add_layer(lines)
else:
    # Add Marker Cluster of the selected data
    markers = [Marker(location=loc) for loc in gdf_selected[['latitude degrees_north','longitude degrees_east']].values.tolist()]

    marker_cluster = MarkerCluster(name='Selected Data',
                                   markers = markers)
    m += marker_cluster

# Get the range of data to be querry through the different ERDDAP servers and dataset ID 
<ul>
<li>Latitude range (min/max)</li>
<li>Longitude range (min/max)</li>
<li>Depth (One day...)</li>
<li>Time (One day...)</li>
</ul>

In [83]:
# Regroup each dataset in its erddap dataset ID
gdf_selected.set_index(['server','Dataset ID']).groupby(['server','Dataset ID']).agg(['min','max'])


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 0,Unnamed: 0,latitude degrees_north,latitude degrees_north,longitude degrees_east,longitude degrees_east,sea_water_practical_salinity,sea_water_practical_salinity,sea_water_temperature,sea_water_temperature,cdm_data_type,cdm_data_type,selected,selected
Unnamed: 0_level_1,Unnamed: 1_level_1,min,max,min,max,min,max,min,max,min,max,min,max,min,max
server,Dataset ID,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2
https://data.cioospacific.ca/erddap,IOS_BOT_Profiles,151.0,260.0,32.4855,40.99633,-165.06,-155.0483,"PSALST01,PSALBST01,PSALST02","PSALST01,PSALBST01,PSALST02","TEMPRTN1,TEMPST01,TEMPS901,TEMPS602,TEMPS601,T...","TEMPRTN1,TEMPST01,TEMPS901,TEMPS602,TEMPS601,T...",Profile,Profile,True,True
https://data.cioospacific.ca/erddap,IOS_CTD_Profiles,183.0,683.0,31.015,40.9985,-167.7517,-155.0,"PSALST01,PSALST02","PSALST01,PSALST02","TEMPST01,TEMPS602,TEMPS601,TEMPS902","TEMPST01,TEMPS602,TEMPS601,TEMPS902",Profile,Profile,True,True


# Next steps
<ul>
<li>Querry every dataset ID for those range and variables</li>
<li>Filter the resulting data with the polygon above </li>
<li>Merge the different datasets together</li>
</ul>

# EXTRA

## FastMarkerCluster Example with Folium

In [None]:
# Example of map with Folium which has a faster tool to handle LOTS of points. 
# This section is however not used in the steps below

def get_folium_map(df,type_dict, lat_col='latitude', lon_col='longitude', zoom_start=5, \
                plot_points=False, pt_radius=15):

    # Center map in the middle of points center in
    middle_lat = df[lat_col].median()
    middle_lon = df[lon_col].median()

    # Create Base Map
    curr_map = folium.Map(location=[middle_lat, middle_lon],
                          zoom_start=zoom_start)
    folium.TileLayer('Stamen Terrain').add_to(curr_map)

    #Add Mini Map
    minimap = plugins.MiniMap(zoom_level_offset=-4,width=300,height=300)
    curr_map.add_child(minimap)
    
    #Define  FastMakerCluster sets (this is fast and can handle >10k points)
    for type_file in sorted(df[type_dict].unique()):
        df_reduced = df[df[type_dict]==type_file]
        curr_map.add_child(plugins.FastMarkerCluster(df_reduced[[lat_col, lon_col]].values.tolist(),
                                                     name = type_file.upper(),
                                                     control=True,
                                                     show=False))  
    
    folium.LayerControl(collapsed=False).add_to(curr_map)
    return curr_map

# Let's create an example of a webpage with Folium
# Get Marker Cluster map with folium
curr_map = apply_heat_map(df,'server','latitude degrees_north','longitude degrees_east')
curr_map.save('Folium_CIOOS_dataset_available.html')