This notebook generates a map to answer answer the following question:
* Find industrial facilities that are upstream from surface water bodies with concerning PFAS sample results (e.g., > 4 ppt).

# Setup


In [None]:
%%capture
!pip install mapclassify
!pip install SPARQLWrapper
!pip install rdflib

In [None]:
# from branca.element import Figure                                  # For controlling the size of the final map
import folium                                                      # For map layer control
import geopandas as gpd                                            # For geospatial dataframes
import pandas as pd                                                # For dataframes
from shapely import wkt                                            # For working with WKT coordinates in a GeoDataFrame
from SPARQLWrapper import SPARQLWrapper2, JSON, GET, POST, DIGEST  # For querying SPARQL endpoints
import rdflib                                                      # For working with URIs

def convertToDataframe(results):
    d = []
    for x in results.bindings:
        row = {}
        for k in x:
            v = x[k]
            vv = rdflib.term.Literal(v.value, datatype=v.datatype).toPython()  # type: ignore[no-untyped-call]
            row[k] = vv
        d.append(row)
    df = pd.DataFrame(d)
    return df

def convertS2ListToQueryString(s2list):
    s2list = list(set(s2list))
    s2list_short = [s2cell.replace("http://stko-kwg.geog.ucsb.edu/lod/resource/","kwgr:") for s2cell in s2list]
    s2_values_string = " ".join(s2list_short)
    return s2_values_string


In [None]:
# for interactive widgets
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets
from IPython.display import display

In [None]:
frink_fed_endpoint = 'https://frink.apps.renci.org/federation/sparql'

sparqlGET = SPARQLWrapper2(frink_fed_endpoint)
sparqlGET.setHTTPAuth(DIGEST)
sparqlGET.setMethod(POST)
sparqlGET.setReturnFormat(JSON)

# Find industrial facilities that are upstream from surface water bodies with concerning PFAS sample results (e.g., > 4 ppt).

In [None]:
#Parameters to choose

# TODO: indicate DSSToxID for each PFAS, query by DSSToxId
substance = "PFOA (DTXSID8031865)" # @param ["PFOS (DTXSID3031864)", "PFOA (DTXSID8031865)", "PFBA (DTXSID4059916)", "PFBS (DTXSID5030030)", "PFHPA (DTXSID1037303)", "PFHXS (DTXSID7040150)", "PFHXA (DTXSID3031862)", "PFHPS (DTXSID8059920)", "PFNA (DTXSID8031863)", "PFDA (DTXSID3031860)"]{"allow-input":true}
#substanceCode = "me_egad_data:parameter." + substance + "_A"
dsstoxid = substance.split()[1].strip("()")
substanceCode ="dsstox:" + dsstoxid

# TODO: change the list of options to all subclasses of coso:MaterialSample
materialType = "Water Sample" # @param ["Water Sample", "   Surface Water Sample", "   Ground Water Sample", "   Drinking Water Sample", "Animal Material Sample", "Plant Material Sample", "Solid Material Sample" ]
#materialType = "SW (Surface Water)" # @param ["DW (Drinking Water)", "GW (Groundwater)", "WW (Waste Water)", "SW (Surface Water)", "PW (Pore Water)", "L (Leachate)", "SR (Storm Water Runoff)", "SL (Soil)" ]{"allow-input":true}
#matTypeCode = "me_egad_data:sampleMaterialType." + materialType.split()[0]
matTypeCode= "coso:"+ ''.join(materialType.split())

admin_region = "23005 (Cumberland County, Maine)" # @param ["23019 (Penobscot County, Maine)","23011 (Kennebec County, Maine)","23005 (Cumberland County, Maine)","23003 (Aroostook County, Maine)","23025 (Somerset County, Maine)","23 (Maine)"] {"allow-input":true}
regionCode = admin_region.split()[0]

minValue = 4 # @param
# maxValue = 1000000 # @param


## Queries

In [None]:
# Query sample points, sample data, and S2 cells within a given administrative region

q1 = '''
PREFIX coso: <http://w3id.org/coso/v1/contaminoso#>
PREFIX geo: <http://www.opengis.net/ont/geosparql#>
PREFIX hyf: <https://www.opengis.net/def/schema/hy_features/hyf/>
PREFIX kwg-ont: <http://stko-kwg.geog.ucsb.edu/lod/ontology/>
PREFIX kwgr: <http://stko-kwg.geog.ucsb.edu/lod/resource/>
PREFIX dsstox: <http://w3id.org/DSSTox/v1/>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX schema: <https://schema.org/>
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
PREFIX spatial: <http://purl.org/spatialai/spatial/spatial-full#>
PREFIX qudt: <http://qudt.org/schema/qudt/>

SELECT (COUNT(DISTINCT ?subVal) as ?resultCount) (MAX(?result_value) as ?max) ?sp ?spWKT ?s2cell WHERE {
    ?sp rdf:type coso:SamplePoint ;
        geo:hasGeometry/geo:asWKT ?spWKT ;
        spatial:connectedTo ?ar3 ;
        spatial:connectedTo ?s2 .
    ?ar3 rdf:type kwg-ont:AdministrativeRegion_3 ;
         kwg-ont:administrativePartOf+ kwgr:administrativeRegion.USA.''' + regionCode + ''' .
    ?s2 rdf:type kwg-ont:S2Cell_Level13 .
    ?s2cell rdf:type kwg-ont:S2Cell_Level13 ;
             kwg-ont:sfTouches | owl:sameAs ?s2 ;
             spatial:connectedTo ?waterbody .
    ?waterbody a hyf:HY_WaterBody .
    ?observation rdf:type coso:ContaminantObservation ;
                coso:observedAtSamplePoint ?sp ;
                coso:ofSubstance ?substance ;
                coso:analyzedSample ?sample ;
                coso:hasResult ?result .
    ?sample rdfs:label ?sampleLabel ;
            rdf:type ?matType .
    ?matType rdfs:label ?matTypeLabel .
    ?result coso:measurementValue ?result_value ;
            coso:measurementUnit ?unit .
    ?unit qudt:symbol ?unit_sym .
    VALUES ?unit {<http://qudt.org/vocab/unit/NanoGM-PER-L>}
    VALUES ?substance {''' + substanceCode + '''}
    VALUES ?matType {''' + matTypeCode + '''}
    FILTER (?result_value > '''+ str(minValue) + ''')
    BIND((CONCAT(str(?result_value) , " ", ?unit_sym)) as ?subVal)
} GROUP BY ?sp ?spWKT ?s2cell
'''
# print(q1)

sparqlGET.setQuery(q1)
samplepoint_result = sparqlGET.query()
samplepoints = convertToDataframe(samplepoint_result)

#s2_sp_values_string = convertS2ListToQueryString(samplepoints['s2cell'].tolist())

In [None]:
samplepoints

Unnamed: 0,resultCount,max,sp,spWKT,s2cell
0,11,11.5,http://w3id.org/sawgraph/v1/me-egad-data#sampl...,POINT(-70.207340 43.591776),http://stko-kwg.geog.ucsb.edu/lod/resource/s2....
1,11,11.5,http://w3id.org/sawgraph/v1/me-egad-data#sampl...,POINT(-70.207340 43.591776),http://stko-kwg.geog.ucsb.edu/lod/resource/s2....
2,1,10.8,http://w3id.org/sawgraph/v1/me-egad-data#sampl...,POINT(-70.635809 44.149630),http://stko-kwg.geog.ucsb.edu/lod/resource/s2....
3,8,21.1,http://w3id.org/sawgraph/v1/me-egad-data#sampl...,POINT(-70.638250 44.147668),http://stko-kwg.geog.ucsb.edu/lod/resource/s2....
4,3,14.4,http://w3id.org/sawgraph/v1/me-egad-data#sampl...,POINT(-70.635293 44.149719),http://stko-kwg.geog.ucsb.edu/lod/resource/s2....
...,...,...,...,...,...
198,2,3180.0,http://w3id.org/sawgraph/v1/me-egad-data#sampl...,POINT(-70.547365 43.764320),http://stko-kwg.geog.ucsb.edu/lod/resource/s2....
199,2,7470.0,http://w3id.org/sawgraph/v1/me-egad-data#sampl...,POINT(-70.548101 43.763882),http://stko-kwg.geog.ucsb.edu/lod/resource/s2....
200,1,5220.0,http://w3id.org/sawgraph/v1/me-egad-data#sampl...,POINT(-70.547075 43.762789),http://stko-kwg.geog.ucsb.edu/lod/resource/s2....
201,2,146.0,http://w3id.org/sawgraph/v1/me-egad-data#sampl...,POINT(-70.548021 43.761943),http://stko-kwg.geog.ucsb.edu/lod/resource/s2....


In [None]:
# Waterbodies for mapping
q_waterbodies = '''
PREFIX coso: <http://w3id.org/coso/v1/contaminoso#>
PREFIX geo: <http://www.opengis.net/ont/geosparql#>
PREFIX hyf: <https://www.opengis.net/def/schema/hy_features/hyf/>
PREFIX kwg-ont: <http://stko-kwg.geog.ucsb.edu/lod/ontology/>
PREFIX kwgr: <http://stko-kwg.geog.ucsb.edu/lod/resource/>
PREFIX dsstox: <http://w3id.org/DSSTox/v1/>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX schema: <https://schema.org/>
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
PREFIX spatial: <http://purl.org/spatialai/spatial/spatial-full#>
PREFIX qudt: <http://qudt.org/schema/qudt/>

SELECT DISTINCT ?wb ?wb_name ?wbWKT #?s2wb
WHERE {
    ?wb a hyf:HY_WaterBody ;
        geo:hasGeometry/geo:asWKT ?wbWKT ;
        spatial:connectedTo ?s2cell ;
        #spatial:connectedTo ?s2wb .
    #?s2wb a kwg-ont:S2Cell_Level13 .
    OPTIONAL { ?wb schema:name ?wb_name }
    # get s2cells of samples near water
    {SELECT DISTINCT ?s2cell WHERE {
    ?sp rdf:type coso:SamplePoint ;
        spatial:connectedTo ?ar3 ;
        spatial:connectedTo ?s2 .
    ?ar3 rdf:type kwg-ont:AdministrativeRegion_3 ;
         kwg-ont:administrativePartOf+ kwgr:administrativeRegion.USA.''' + regionCode + ''' .
    ?s2 rdf:type kwg-ont:S2Cell_Level13 .
    ?s2cell rdf:type kwg-ont:S2Cell_Level13 ;
             kwg-ont:sfTouches | owl:sameAs ?s2 ;
             spatial:connectedTo ?waterbody .
    ?waterbody a hyf:HY_WaterBody .
    ?observation rdf:type coso:ContaminantObservation ;
                coso:observedAtSamplePoint ?sp ;
                coso:ofSubstance ?substance ;
                coso:analyzedSample ?sample ;
                coso:hasResult ?result .
    ?sample rdfs:label ?sampleLabel ;
            rdf:type ?matType .
    ?matType rdfs:label ?matTypeLabel .
    ?result coso:measurementValue ?result_value ;
            coso:measurementUnit ?unit .
    ?unit qudt:symbol ?unit_sym .
    VALUES ?unit {<http://qudt.org/vocab/unit/NanoGM-PER-L>}
    VALUES ?substance {''' + substanceCode + '''}
    VALUES ?matType {''' + matTypeCode + '''}
    FILTER (?result_value > '''+ str(minValue) + ''')
  } GROUP BY ?s2cell}
}
'''

sparqlGET.setQuery(q_waterbodies)
waterbodies_result = sparqlGET.query()
waterbodies = convertToDataframe(waterbodies_result)

#s2_wb_values_string = convertS2ListToQueryString(waterbodies['s2wb'].tolist())

In [None]:
waterbodies

Unnamed: 0,wb,wb_name,wbWKT
0,https://geoconnex.us/nhdplusv2/comid/166421056,Parker Pond,POLYGON ((-70.51402300098817 43.99489679837779...
1,https://geoconnex.us/nhdplusv2/comid/6713733,,POLYGON ((-70.03519906839807 43.98208913173101...
2,https://geoconnex.us/nhdplusv2/comid/6718823,Island Pond,POLYGON ((-70.64394160078649 44.15504279812921...
3,https://geoconnex.us/nhdplusv2/comid/6718889,Pleasant Lake,POLYGON ((-70.54174986761177 44.05718299828112...
4,https://geoconnex.us/nhdplusv2/comid/6718893,Woods Pond,POLYGON ((-70.72769366732314 44.01934639833985...
5,https://geoconnex.us/nhdplusv2/comid/6719609,Crescent Lake,"POLYGON ((-70.4627742010677 43.99126293171679,..."
6,https://geoconnex.us/nhdplusv2/comid/6719681,Panther Pond,POLYGON ((-70.48485286770011 43.94498693178860...
7,https://geoconnex.us/nhdplusv2/comid/6719717,Sebago Lake Basin,"POLYGON ((-70.5090222676626 43.84804039860575,..."
8,https://geoconnex.us/nhdplusv2/comid/6719845,,POLYGON ((-70.10784326828531 43.83730293195577...
9,https://geoconnex.us/nhdplusv2/comid/6719849,,POLYGON ((-70.10733846828606 43.83410133196077...


In [None]:
# Query the hydrology graph for the upstream S2cells

q2 = '''
PREFIX coso: <http://w3id.org/coso/v1/contaminoso#>
PREFIX geo: <http://www.opengis.net/ont/geosparql#>
PREFIX hyf: <https://www.opengis.net/def/schema/hy_features/hyf/>
PREFIX kwg-ont: <http://stko-kwg.geog.ucsb.edu/lod/ontology/>
PREFIX kwgr: <http://stko-kwg.geog.ucsb.edu/lod/resource/>
PREFIX dsstox: <http://w3id.org/DSSTox/v1/>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX schema: <https://schema.org/>
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
PREFIX spatial: <http://purl.org/spatialai/spatial/spatial-full#>
PREFIX qudt: <http://qudt.org/schema/qudt/>
PREFIX nhdplusv2: <http://nhdplusv2.spatialai.org/v1/nhdplusv2#>

SELECT DISTINCT ?upstream_flowline ?us_ftype ?upstream_flowlineWKT #?downstream_flowline ?ds_ftype
WHERE {
	{SELECT DISTINCT ?s2wb WHERE {																# get s2 of water bodies that overlap...
    ?wb a hyf:HY_WaterBody ;
        geo:hasGeometry/geo:asWKT ?wbWKT ;
        spatial:connectedTo ?s2cell ;
        spatial:connectedTo ?s2wb .
    ?s2wb a kwg-ont:S2Cell_Level13 .
						{SELECT DISTINCT ?s2cell WHERE { 										# s2cells of samples near water
						?sp rdf:type coso:SamplePoint ;
								spatial:connectedTo ?ar3 ;
								spatial:connectedTo ?s2 .
						?ar3 rdf:type kwg-ont:AdministrativeRegion_3 ;
								kwg-ont:administrativePartOf+ kwgr:administrativeRegion.USA.''' + regionCode + ''' .
						?s2 rdf:type kwg-ont:S2Cell_Level13 .
						?s2cell rdf:type kwg-ont:S2Cell_Level13 ;
										kwg-ont:sfTouches | owl:sameAs ?s2 ;
										spatial:connectedTo ?waterbody .
						?waterbody a hyf:HY_WaterBody .
						?observation rdf:type coso:ContaminantObservation ;
												coso:observedAtSamplePoint ?sp ;
												coso:ofSubstance ?substance ;
												coso:analyzedSample ?sample ;
												coso:hasResult ?result .
						?sample rdfs:label ?sampleLabel ;
										rdf:type ?matType .
						?matType rdfs:label ?matTypeLabel .
						?result coso:measurementValue ?result_value ;
										coso:measurementUnit ?unit .
						?unit qudt:symbol ?unit_sym .
						VALUES ?unit {<http://qudt.org/vocab/unit/NanoGM-PER-L>}
						VALUES ?substance {''' + substanceCode + '''}
						VALUES ?matType {''' + matTypeCode + '''}
						FILTER (?result_value > '''+ str(minValue) + ''')
					} GROUP BY ?s2cell}
		}}
	?downstream_flowline rdf:type hyf:HY_FlowPath ;							# ... and flowlines connected to
						 spatial:connectedTo ?s2wb ;
						 nhdplusv2:hasFTYPE ?ds_ftype .
	?upstream_flowline hyf:downstreamFlowPathTC ?downstream_flowline ;   # and their upstream parts
					   geo:hasGeometry/geo:asWKT ?upstream_flowlineWKT ;
					   nhdplusv2:hasFTYPE ?us_ftype .
	#?s2fl spatial:connectedTo ?upstream_flowline ;
	#		rdf:type kwg-ont:S2Cell_Level13 .
}
'''

sparqlGET.setQuery(q2)
hydrology_result = sparqlGET.query()
hydrology = convertToDataframe(hydrology_result)

#s2_fl_values_string = convertS2ListToQueryString(hydrology['s2cell'].tolist())

In [None]:
hydrology

Unnamed: 0,upstream_flowline,us_ftype,upstream_flowlineWKT
0,https://geoconnex.us/nhdplusv2/comid/166195992,StreamRiver,LINESTRING (-69.98898240180313 44.500690797592...
1,https://geoconnex.us/nhdplusv2/comid/166195993,ArtificialPath,LINESTRING (-69.99359506846264 44.510157797578...
2,https://geoconnex.us/nhdplusv2/comid/166195994,StreamRiver,LINESTRING (-69.99278040179723 44.511933997575...
3,https://geoconnex.us/nhdplusv2/comid/166195995,ArtificialPath,LINESTRING (-69.9924186017978 44.5140668642386...
4,https://geoconnex.us/nhdplusv2/comid/166195996,StreamRiver,LINESTRING (-69.99321840179653 44.514987330903...
...,...,...,...
4665,https://geoconnex.us/nhdplusv2/comid/9320179,ArtificialPath,LINESTRING (-70.90983600037379 44.014045798348...
4666,https://geoconnex.us/nhdplusv2/comid/9320181,ArtificialPath,LINESTRING (-70.90626046704597 44.011690531685...
4667,https://geoconnex.us/nhdplusv2/comid/9320183,ArtificialPath,LINESTRING (-70.9040294003828 44.0097455983547...
4668,https://geoconnex.us/nhdplusv2/comid/9320185,ArtificialPath,LINESTRING (-70.90197086705263 44.007910931690...


In [None]:
# Retrieve facility details
q3 = '''
PREFIX coso: <http://w3id.org/coso/v1/contaminoso#>
PREFIX geo: <http://www.opengis.net/ont/geosparql#>
PREFIX hyf: <https://www.opengis.net/def/schema/hy_features/hyf/>
PREFIX kwg-ont: <http://stko-kwg.geog.ucsb.edu/lod/ontology/>
PREFIX kwgr: <http://stko-kwg.geog.ucsb.edu/lod/resource/>
PREFIX dsstox: <http://w3id.org/DSSTox/v1/>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX schema: <https://schema.org/>
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
PREFIX spatial: <http://purl.org/spatialai/spatial/spatial-full#>
PREFIX qudt: <http://qudt.org/schema/qudt/>
PREFIX nhdplusv2: <http://nhdplusv2.spatialai.org/v1/nhdplusv2#>
PREFIX fio: <http://w3id.org/fio/v1/fio#>
PREFIX naics: <http://w3id.org/fio/v1/naics#>

SELECT DISTINCT ?facility ?facWKT ?facilityName ?industry ?industryName ?industryGroupCode ?industryGroupName ?industrySubsectorCode ?industrySubsectorName WHERE {
      {SELECT DISTINCT ?s2fl WHERE {
          {SELECT DISTINCT ?s2wb WHERE {																# get s2 of water bodies that overlap...
            ?wb a hyf:HY_WaterBody ;
                geo:hasGeometry/geo:asWKT ?wbWKT ;
                spatial:connectedTo ?s2cell ;
                spatial:connectedTo ?s2wb .
            ?s2wb a kwg-ont:S2Cell_Level13 .
                    {SELECT DISTINCT ?s2cell WHERE { 										# s2cells of samples near water
                    ?sp rdf:type coso:SamplePoint ;
                        spatial:connectedTo ?ar3 ;
                        spatial:connectedTo ?s2 .
                    ?ar3 rdf:type kwg-ont:AdministrativeRegion_3 ;
                        kwg-ont:administrativePartOf+ kwgr:administrativeRegion.USA.''' + regionCode + ''' .
                    ?s2 rdf:type kwg-ont:S2Cell_Level13 .
                    ?s2cell rdf:type kwg-ont:S2Cell_Level13 ;
                            kwg-ont:sfTouches | owl:sameAs ?s2 ;
                            spatial:connectedTo ?waterbody .
                    ?waterbody a hyf:HY_WaterBody .
                    ?observation rdf:type coso:ContaminantObservation ;
                                coso:observedAtSamplePoint ?sp ;
                                coso:ofSubstance ?substance ;
                                coso:analyzedSample ?sample ;
                                coso:hasResult ?result .
                    ?sample rdfs:label ?sampleLabel ;
                            rdf:type ?matType .
                    ?matType rdfs:label ?matTypeLabel .
                    ?result coso:measurementValue ?result_value ;
                            coso:measurementUnit ?unit .
                    ?unit qudt:symbol ?unit_sym .
                    VALUES ?unit {<http://qudt.org/vocab/unit/NanoGM-PER-L>}
                    VALUES ?substance {''' + substanceCode + '''}
                    VALUES ?matType {''' + matTypeCode + '''}
                    FILTER (?result_value > '''+ str(minValue) + ''')
                  } GROUP BY ?s2cell}
            }}
          ?downstream_flowline rdf:type hyf:HY_FlowPath ;							# ... and flowlines connected to
                    spatial:connectedTo ?s2wb ;
                    nhdplusv2:hasFTYPE ?ds_ftype .
          ?upstream_flowline hyf:downstreamFlowPathTC ?downstream_flowline ;   # and their upstream parts
                    geo:hasGeometry/geo:asWKT ?upstream_flowlineWKT ;
                    nhdplusv2:hasFTYPE ?us_ftype .
          ?s2fl spatial:connectedTo ?upstream_flowline ;
          		rdf:type kwg-ont:S2Cell_Level13 .
        }}

    ?s2fl kwg-ont:sfContains ?facility.
    ?facility fio:ofIndustry ?industryCode, ?industryGroupCode, ?industrySubsectorCode ;
              geo:hasGeometry/geo:asWKT ?facWKT;
              rdfs:label ?facilityName.
    ?industryCode a naics:NAICS-IndustryCode;
                  rdfs:label ?industryName ;
    fio:subcodeOf ?industryGroupCode .
    ?industryGroupCode a naics:NAICS-IndustryGroup;
                       rdfs:label ?industryGroupName ;
                       fio:subcodeOf ?industrySubsectorCode .
    ?industrySubsectorCode a naics:NAICS-IndustrySubsector;
                           rdfs:label ?industrySubsectorName;
                           fio:subcodeOf ?manufacturing.
  VALUES ?manufacturing {naics:NAICS-31 naics:NAICS-32 naics:NAICS-33} .
}
'''

sparqlGET.setQuery(q3)
facility_result = sparqlGET.query()
facilities = convertToDataframe(facility_result)

In [None]:
facilities

Unnamed: 0,facility,facWKT,facilityName,industryName,industryGroupCode,industryGroupName,industrySubsectorCode,industrySubsectorName
0,http://w3id.org/fio/v1/epa-frs-data#d.FRS-Faci...,POINT(-71.197650 44.480810),"ISAACSON STRUCTURAL STEEL, INC.",Fabricated Structural Metal Manufacturing,http://w3id.org/fio/v1/naics#NAICS-3323,Architectural and Structural Metals Manufacturing,http://w3id.org/fio/v1/naics#NAICS-332,Fabricated Metal Product Manufacturing
1,http://w3id.org/fio/v1/epa-frs-data#d.FRS-Faci...,POINT(-71.197650 44.480810),"ISAACSON STRUCTURAL STEEL, INC.",Plate Work and Fabricated Structural Product M...,http://w3id.org/fio/v1/naics#NAICS-3323,Architectural and Structural Metals Manufacturing,http://w3id.org/fio/v1/naics#NAICS-332,Fabricated Metal Product Manufacturing
2,http://w3id.org/fio/v1/epa-frs-data#d.FRS-Faci...,POINT(-71.197650 44.480810),"ISAACSON STRUCTURAL STEEL, INC.",Rolling and Drawing of Purchased Steel,http://w3id.org/fio/v1/naics#NAICS-3312,Steel Product Manufacturing from Purchased Steel,http://w3id.org/fio/v1/naics#NAICS-331,Primary Metal Manufacturing
3,http://w3id.org/fio/v1/epa-frs-data#d.FRS-Faci...,POINT(-70.340950 43.650630),NICHOLS PORTLAND DIVISION PARKER HANNIFIN CORP...,Fluid Power Valve and Hose Fitting Manufacturing,http://w3id.org/fio/v1/naics#NAICS-3329,Other Fabricated Metal Product Manufacturing,http://w3id.org/fio/v1/naics#NAICS-332,Fabricated Metal Product Manufacturing
4,http://w3id.org/fio/v1/epa-frs-data#d.FRS-Faci...,POINT(-70.340950 43.650630),NICHOLS PORTLAND DIVISION PARKER HANNIFIN CORP...,Forging and Stamping,http://w3id.org/fio/v1/naics#NAICS-3321,Forging and Stamping,http://w3id.org/fio/v1/naics#NAICS-332,Fabricated Metal Product Manufacturing
...,...,...,...,...,...,...,...,...
514,http://w3id.org/fio/v1/epa-frs-data#d.FRS-Faci...,POINT(-70.236850 44.508300),"GODFREY FOREST ARIZONA, LLC","Veneer, Plywood, and Engineered Wood Product M...",http://w3id.org/fio/v1/naics#NAICS-3212,"Veneer, Plywood, and Engineered Wood Product M...",http://w3id.org/fio/v1/naics#NAICS-321,Wood Product Manufacturing
515,http://w3id.org/fio/v1/epa-frs-data#d.FRS-Faci...,POINT(-70.736245 44.410105),"HANCOCK LUMBER COMPANY, INC. - BETHEL",Sawmills,http://w3id.org/fio/v1/naics#NAICS-3211,Sawmills and Wood Preservation,http://w3id.org/fio/v1/naics#NAICS-321,Wood Product Manufacturing
516,http://w3id.org/fio/v1/epa-frs-data#d.FRS-Faci...,POINT(-70.736245 44.410105),"HANCOCK LUMBER COMPANY, INC. - BETHEL",Sawmills and Wood Preservation,http://w3id.org/fio/v1/naics#NAICS-3211,Sawmills and Wood Preservation,http://w3id.org/fio/v1/naics#NAICS-321,Wood Product Manufacturing
517,http://w3id.org/fio/v1/epa-frs-data#d.FRS-Faci...,POINT(-70.755589 44.643067),WOOD PRODUCTS COMPLEX ANDOVER LLC,Office Furniture (including Fixtures) Manufact...,http://w3id.org/fio/v1/naics#NAICS-3372,Office Furniture (including Fixtures) Manufact...,http://w3id.org/fio/v1/naics#NAICS-337,Furniture and Related Product Manufacturing


In [None]:
# County boundaries for mapping
q_counties = '''
PREFIX geo: <http://www.opengis.net/ont/geosparql#>
PREFIX kwgr: <http://stko-kwg.geog.ucsb.edu/lod/resource/>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>

SELECT * WHERE {
    ?county geo:hasGeometry/geo:asWKT ?countyWKT ;
            rdfs:label ?countyName.
    VALUES ?county {kwgr:administrativeRegion.USA.'''+ regionCode + '''}
}
'''

sparqlGET.setQuery(q_counties)
counties_result = sparqlGET.query()
counties = convertToDataframe(counties_result)

## Prep data for mapping

In [None]:
samplept_columns = ['resultCount', 'max', 'sp', 'spWKT']
samplept_map = samplepoints[samplept_columns].copy()
samplept_map.drop_duplicates(inplace=True)
samplept_map['spWKT'] = samplept_map['spWKT'].apply(wkt.loads)
samplept_map = gpd.GeoDataFrame(samplept_map, geometry='spWKT')
samplept_map.set_crs(epsg=4326, inplace=True, allow_override=True)

Unnamed: 0,resultCount,max,sp,spWKT
0,11,11.5,http://w3id.org/sawgraph/v1/me-egad-data#sampl...,POINT (-70.20734 43.59178)
2,1,10.8,http://w3id.org/sawgraph/v1/me-egad-data#sampl...,POINT (-70.63581 44.14963)
3,8,21.1,http://w3id.org/sawgraph/v1/me-egad-data#sampl...,POINT (-70.63825 44.14767)
4,3,14.4,http://w3id.org/sawgraph/v1/me-egad-data#sampl...,POINT (-70.63529 44.14972)
5,4,32.9,http://w3id.org/sawgraph/v1/me-egad-data#sampl...,POINT (-70.63758 44.14741)
...,...,...,...,...
184,2,3180.0,http://w3id.org/sawgraph/v1/me-egad-data#sampl...,POINT (-70.54736 43.76432)
185,2,7470.0,http://w3id.org/sawgraph/v1/me-egad-data#sampl...,POINT (-70.5481 43.76388)
186,1,5220.0,http://w3id.org/sawgraph/v1/me-egad-data#sampl...,POINT (-70.54708 43.76279)
187,2,146.0,http://w3id.org/sawgraph/v1/me-egad-data#sampl...,POINT (-70.54802 43.76194)


In [None]:
waterbody_columns = ['wb', 'wb_name', 'wbWKT']
waterbodies.drop_duplicates(inplace=True)
waterbodies['wbWKT'] = waterbodies['wbWKT'].apply(wkt.loads)
waterbodies = gpd.GeoDataFrame(waterbodies, geometry='wbWKT')
waterbodies.set_crs(epsg=4326, inplace=True, allow_override=True)

Unnamed: 0,wb,wb_name,wbWKT
0,https://geoconnex.us/nhdplusv2/comid/166421056,Parker Pond,"POLYGON ((-70.51402 43.9949, -70.51411 43.9948..."
1,https://geoconnex.us/nhdplusv2/comid/6713733,,"POLYGON ((-70.0352 43.98209, -70.03536 43.9817..."
2,https://geoconnex.us/nhdplusv2/comid/6718823,Island Pond,"POLYGON ((-70.64394 44.15504, -70.64528 44.155..."
3,https://geoconnex.us/nhdplusv2/comid/6718889,Pleasant Lake,"POLYGON ((-70.54175 44.05718, -70.5415 44.0574..."
4,https://geoconnex.us/nhdplusv2/comid/6718893,Woods Pond,"POLYGON ((-70.72769 44.01935, -70.72969 44.019..."
5,https://geoconnex.us/nhdplusv2/comid/6719609,Crescent Lake,"POLYGON ((-70.46277 43.99126, -70.46182 43.991..."
6,https://geoconnex.us/nhdplusv2/comid/6719681,Panther Pond,"POLYGON ((-70.48485 43.94499, -70.48438 43.944..."
7,https://geoconnex.us/nhdplusv2/comid/6719717,Sebago Lake Basin,"POLYGON ((-70.50902 43.84804, -70.50871 43.848..."
8,https://geoconnex.us/nhdplusv2/comid/6719845,,"POLYGON ((-70.10784 43.8373, -70.10794 43.8373..."
9,https://geoconnex.us/nhdplusv2/comid/6719849,,"POLYGON ((-70.10734 43.8341, -70.10753 43.8341..."


In [None]:
facilities.drop_duplicates(inplace=True)
facilities['facWKT'] = facilities['facWKT'].apply(wkt.loads)
facilities = gpd.GeoDataFrame(facilities, geometry='facWKT')
facilities.set_crs(epsg=4326, inplace=True, allow_override=True)
facilities['industryGroupCode'] = facilities['industryGroupCode'].apply(lambda x: x.split('#')[-1])
facilities['industrySubsectorCode'] = facilities['industrySubsectorCode'].apply(lambda x: x.split('#')[-1])

In [None]:
hydrology_columns = ['upstream_flowline', 'us_ftype', 'upstream_flowlineWKT']
hydrology['upstream_flowlineWKT'] = hydrology['upstream_flowlineWKT'].apply(wkt.loads)
hydrology.drop_duplicates(inplace=True)
hydrology = gpd.GeoDataFrame(hydrology, geometry='upstream_flowlineWKT')
hydrology.set_crs(epsg=4326, inplace=True, allow_override=True)

Unnamed: 0,upstream_flowline,us_ftype,upstream_flowlineWKT
0,https://geoconnex.us/nhdplusv2/comid/166195992,StreamRiver,"LINESTRING (-69.98898 44.50069, -69.98799 44.4..."
1,https://geoconnex.us/nhdplusv2/comid/166195993,ArtificialPath,"LINESTRING (-69.9936 44.51016, -69.99373 44.50..."
2,https://geoconnex.us/nhdplusv2/comid/166195994,StreamRiver,"LINESTRING (-69.99278 44.51193, -69.99304 44.5..."
3,https://geoconnex.us/nhdplusv2/comid/166195995,ArtificialPath,"LINESTRING (-69.99242 44.51407, -69.99232 44.5..."
4,https://geoconnex.us/nhdplusv2/comid/166195996,StreamRiver,"LINESTRING (-69.99322 44.51499, -69.99242 44.5..."
...,...,...,...
4665,https://geoconnex.us/nhdplusv2/comid/9320179,ArtificialPath,"LINESTRING (-70.90984 44.01405, -70.90759 44.0..."
4666,https://geoconnex.us/nhdplusv2/comid/9320181,ArtificialPath,"LINESTRING (-70.90626 44.01169, -70.90622 44.0..."
4667,https://geoconnex.us/nhdplusv2/comid/9320183,ArtificialPath,"LINESTRING (-70.90403 44.00975, -70.90324 44.0..."
4668,https://geoconnex.us/nhdplusv2/comid/9320185,ArtificialPath,"LINESTRING (-70.90197 44.00791, -70.90158 44.0..."


In [None]:
counties['countyWKT'] = counties['countyWKT'].apply(wkt.loads)
counties.drop_duplicates(inplace=True)
counties = gpd.GeoDataFrame(counties, geometry='countyWKT')
counties = counties.simplify(tolerance=0.001)  #simply the county boundary at tolerance of 0.01 degrees for faster mapping
counties.set_crs(epsg=4326, inplace=True, allow_override=True)

Unnamed: 0,0
0,"POLYGON ((-69.8819 43.63647, -70.13661 43.5334..."


In [None]:
%%capture
from branca.element import Figure
import math

map = counties.explore(name='Counties',
                       style_kwds=dict(color='Gray',
                                       fill=False,
                                       weight=7))

if 'wb_name' in waterbodies.columns:
    wbcols = ['wb', 'wb_name']
else:
    wbcols = ['wb']

samplept_map.explore(m=map,
                     name=f'<span style="color:DarkOrange;">Samples</span>',
                     color='DarkOrange',
                     style_kwds=dict(style_function=lambda x: {'radius': math.log(float(x['properties']['max'])) * 1.8976 + 3.36937,  # fits to (4 ppt, 6 radius) and (6400 ppt, 20 radius)
                                                               "opacity":1,
                                                               "color":'DimGray'},
                                     fillOpacity=1),
                     marker_kwds=dict(radius=6,
                                      fill=True),
                     marker_type='circle_marker',
                     tooltip=['sp', 'max', 'resultCount'],
                     show=True)

waterbodies.explore(m=map,
                    name='<span style="color:Blue;">Waterbodies</span>',
                    style_kwds=dict(color='Blue',
                                    fill=False,
                                    weight=4),
                    tooltip=wbcols,
                    show=True)

hydrology.explore(m=map,
                  name='<span style="color:Blue;">Upstream Flowlines</span>',
                  color='Blue',
                  style_kwds=dict(weight=1),
                  tooltip=['upstream_flowline', 'us_ftype'],
                  show=False)

c = 0
colors = ['Purple', 'PaleVioletRed', 'Orchid', 'Fuchsia', 'MediumVioletRed', 'HotPink', 'LightPink', 'red', 'lightred', 'pink', 'orange',
          'MidnightBlue', 'MediumBlue', 'SlateBlue', 'MediumSlateBlue', 'DodgerBlue', 'DeepSkyBlue', 'SkyBlue', 'CadetBlue', 'DarkCyan', 'LightSeaGreen',
          'MediumSageGreen', 'lightblue', 'gray', 'blue', 'darkred', 'lightgreen', 'green', 'darkblue', 'darkpurple', 'cadetblue', 'orange', 'lightgray', 'darkgreen']
for industry in list(facilities.industrySubsectorName.unique()):
    facilities[facilities['industrySubsectorName']==industry].explore(m=map,
                                                                      name=f'<span style="color:{colors[c]};">{industry}</span>',
                                                                      color=colors[c],
                                                                      marker_kwds=dict(radius=6,
                                                                                       fill=True),
                                                                      style_kwds=dict(color='black',
                                                                                      fillOpacity=1),
                                                                      marker_type='circle_marker',
                                                                      show=True)
    c += 1

## Map

In [None]:
folium.LayerControl(collapsed=False).add_to(map)
fig = Figure(width='100%', height=700)
fig.add_child(map)

In [None]:
fig.save('SAWGraph_Y3-Kickoff-Demo_TracingUpstream.html')