This notebook generates a map to answer answer the following question:
* Find industrial facilities that are upstream from surface water bodies with concerning PFAS sample results (e.g., > 4 ppt).

# Setup


In [None]:
%%capture
!pip install mapclassify
!pip install SPARQLWrapper
!pip install rdflib

In [None]:
# from branca.element import Figure                                  # For controlling the size of the final map
import folium                                                      # For map layer control
import geopandas as gpd                                            # For geospatial dataframes
import pandas as pd                                                # For dataframes
from shapely import wkt                                            # For working with WKT coordinates in a GeoDataFrame
from SPARQLWrapper import SPARQLWrapper2, JSON, GET, POST, DIGEST  # For querying SPARQL endpoints
import rdflib                                                      # For working with URIs

def convertToDataframe(results):
    d = []
    for x in results.bindings:
        row = {}
        for k in x:
            v = x[k]
            vv = rdflib.term.Literal(v.value, datatype=v.datatype).toPython()  # type: ignore[no-untyped-call]
            row[k] = vv
        d.append(row)
    df = pd.DataFrame(d)
    return df

def convertS2ListToQueryString(s2list):
    s2list = list(set(s2list))
    s2list_short = [s2cell.replace("http://stko-kwg.geog.ucsb.edu/lod/resource/","kwgr:") for s2cell in s2list]
    s2_values_string = " ".join(s2list_short)
    return s2_values_string


In [None]:
# for interactive widgets
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets
from IPython.display import display

In [None]:
frink_fed_endpoint = 'https://frink.apps.renci.org/federation/sparql'

sparqlGET = SPARQLWrapper2(frink_fed_endpoint)
sparqlGET.setHTTPAuth(DIGEST)
sparqlGET.setMethod(POST)
sparqlGET.setReturnFormat(JSON)

# Find industrial facilities that are upstream from surface water bodies with concerning PFAS sample results (e.g., > 4 ppt).

In [None]:
#Parameters to choose

# TODO: indicate DSSToxID for each PFAS, query by DSSToxId
substance = "PFOA (DTXSID8031865)" # @param ["PFOS (DTXSID3031864)", "PFOA (DTXSID8031865)", "PFBA (DTXSID4059916)", "PFBS (DTXSID5030030)", "PFHPA (DTXSID1037303)", "PFHXS (DTXSID7040150)", "PFHXA (DTXSID3031862)", "PFHPS (DTXSID8059920)", "PFNA (DTXSID8031863)", "PFDA (DTXSID3031860)"]{"allow-input":true}
#substanceCode = "me_egad_data:parameter." + substance + "_A"
dsstoxid = substance.split()[1].strip("()")
substanceCode ="dsstox:" + dsstoxid

# TODO: change the list of options to all subclasses of coso:MaterialSample
materialType = "   Surface Water Sample" # @param ["Water Sample", "   Surface Water Sample", "   Ground Water Sample", "   Drinking Water Sample", "Animal Material Sample", "Plant Material Sample", "Solid Material Sample" ]
#materialType = "SW (Surface Water)" # @param ["DW (Drinking Water)", "GW (Groundwater)", "WW (Waste Water)", "SW (Surface Water)", "PW (Pore Water)", "L (Leachate)", "SR (Storm Water Runoff)", "SL (Soil)" ]{"allow-input":true}
#matTypeCode = "me_egad_data:sampleMaterialType." + materialType.split()[0]
matTypeCode= "coso:"+ ''.join(materialType.split())

admin_region = "23005 (Cumberland County, Maine)" # @param ["23019 (Penobscot County, Maine)","23011 (Kennebec County, Maine)","23005 (Cumberland County, Maine)","23003 (Aroostook County, Maine)","23025 (Somerset County, Maine)","23 (Maine)"] {"allow-input":true}
regionCode = admin_region.split()[0]

minValue = 4 # @param
# maxValue = 1000000 # @param


## Queries

In [None]:
# Query sample points, sample data, and S2 cells within a given administrative region

q1 = '''
PREFIX coso: <http://w3id.org/coso/v1/contaminoso#>
PREFIX geo: <http://www.opengis.net/ont/geosparql#>
PREFIX hyf: <https://www.opengis.net/def/schema/hy_features/hyf/>
PREFIX kwg-ont: <http://stko-kwg.geog.ucsb.edu/lod/ontology/>
PREFIX kwgr: <http://stko-kwg.geog.ucsb.edu/lod/resource/>
PREFIX dsstox: <http://w3id.org/DSSTox/v1/>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX schema: <https://schema.org/>
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
PREFIX spatial: <http://purl.org/spatialai/spatial/spatial-full#>
PREFIX qudt: <http://qudt.org/schema/qudt/>

SELECT (COUNT(DISTINCT ?subVal) as ?resultCount) (MAX(?result_value) as ?max) ?sp ?spWKT ?s2cell WHERE {
    ?sp rdf:type coso:SamplePoint ;
        geo:hasGeometry/geo:asWKT ?spWKT ;
        spatial:connectedTo ?ar3 ;
        spatial:connectedTo ?s2 .
    ?ar3 rdf:type kwg-ont:AdministrativeRegion_3 ;
         kwg-ont:administrativePartOf+ kwgr:administrativeRegion.USA.''' + regionCode + ''' .
    ?s2 rdf:type kwg-ont:S2Cell_Level13 .
    ?s2cell rdf:type kwg-ont:S2Cell_Level13 ;
             kwg-ont:sfTouches | owl:sameAs ?s2 ;
             spatial:connectedTo ?waterbody .
    ?waterbody a hyf:HY_WaterBody .
    ?observation rdf:type coso:ContaminantObservation ;
                coso:observedAtSamplePoint ?sp ;
                coso:ofSubstance ?substance ;
                coso:analyzedSample ?sample ;
                coso:hasResult ?result .
    ?sample rdfs:label ?sampleLabel ;
            rdf:type ?matType .
    ?matType rdfs:label ?matTypeLabel .
    ?result coso:measurementValue ?result_value ;
            coso:measurementUnit ?unit .
    ?unit qudt:symbol ?unit_sym .
    VALUES ?unit {<http://qudt.org/vocab/unit/NanoGM-PER-L>}
    VALUES ?substance {''' + substanceCode + '''}
    VALUES ?matType {''' + matTypeCode + '''}
    FILTER (?result_value > '''+ str(minValue) + ''')
    BIND((CONCAT(str(?result_value) , " ", ?unit_sym)) as ?subVal)
} GROUP BY ?sp ?spWKT ?s2cell
'''
# print(q1)

sparqlGET.setQuery(q1)
samplepoint_result = sparqlGET.query()
samplepoints = convertToDataframe(samplepoint_result)

#s2_sp_values_string = convertS2ListToQueryString(samplepoints['s2cell'].tolist())

In [None]:
samplepoints

Unnamed: 0,resultCount,max,sp,spWKT,s2cell
0,1,14.2,http://w3id.org/sawgraph/v1/me-egad-data#sampl...,POINT(-70.410341 43.644329),http://stko-kwg.geog.ucsb.edu/lod/resource/s2....
1,1,18.6,http://w3id.org/sawgraph/v1/me-egad-data#sampl...,POINT(-70.408858 43.645348),http://stko-kwg.geog.ucsb.edu/lod/resource/s2....
2,1,7.57,http://w3id.org/sawgraph/v1/me-egad-data#sampl...,POINT(-70.408847 43.644141),http://stko-kwg.geog.ucsb.edu/lod/resource/s2....
3,1,14.2,http://w3id.org/sawgraph/v1/me-egad-data#sampl...,POINT(-70.410341 43.644329),http://stko-kwg.geog.ucsb.edu/lod/resource/s2....
4,1,18.6,http://w3id.org/sawgraph/v1/me-egad-data#sampl...,POINT(-70.408858 43.645348),http://stko-kwg.geog.ucsb.edu/lod/resource/s2....
5,1,7.57,http://w3id.org/sawgraph/v1/me-egad-data#sampl...,POINT(-70.408847 43.644141),http://stko-kwg.geog.ucsb.edu/lod/resource/s2....
6,1,14.2,http://w3id.org/sawgraph/v1/me-egad-data#sampl...,POINT(-70.410341 43.644329),http://stko-kwg.geog.ucsb.edu/lod/resource/s2....
7,1,18.6,http://w3id.org/sawgraph/v1/me-egad-data#sampl...,POINT(-70.408858 43.645348),http://stko-kwg.geog.ucsb.edu/lod/resource/s2....
8,1,7.57,http://w3id.org/sawgraph/v1/me-egad-data#sampl...,POINT(-70.408847 43.644141),http://stko-kwg.geog.ucsb.edu/lod/resource/s2....
9,1,14.2,http://w3id.org/sawgraph/v1/me-egad-data#sampl...,POINT(-70.410341 43.644329),http://stko-kwg.geog.ucsb.edu/lod/resource/s2....


In [None]:
# Waterbodies for mapping
q_waterbodies = '''
PREFIX coso: <http://w3id.org/coso/v1/contaminoso#>
PREFIX geo: <http://www.opengis.net/ont/geosparql#>
PREFIX hyf: <https://www.opengis.net/def/schema/hy_features/hyf/>
PREFIX kwg-ont: <http://stko-kwg.geog.ucsb.edu/lod/ontology/>
PREFIX kwgr: <http://stko-kwg.geog.ucsb.edu/lod/resource/>
PREFIX dsstox: <http://w3id.org/DSSTox/v1/>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX schema: <https://schema.org/>
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
PREFIX spatial: <http://purl.org/spatialai/spatial/spatial-full#>
PREFIX qudt: <http://qudt.org/schema/qudt/>

SELECT DISTINCT ?wb ?wb_name ?wbWKT #?s2wb
WHERE {
    ?wb a hyf:HY_WaterBody ;
        geo:hasGeometry/geo:asWKT ?wbWKT ;
        spatial:connectedTo ?s2cell ;
        #spatial:connectedTo ?s2wb .
    #?s2wb a kwg-ont:S2Cell_Level13 .
    OPTIONAL { ?wb schema:name ?wb_name }
    # get s2cells of samples near water
    {SELECT DISTINCT ?s2cell WHERE {
    ?sp rdf:type coso:SamplePoint ;
        spatial:connectedTo ?ar3 ;
        spatial:connectedTo ?s2 .
    ?ar3 rdf:type kwg-ont:AdministrativeRegion_3 ;
         kwg-ont:administrativePartOf+ kwgr:administrativeRegion.USA.''' + regionCode + ''' .
    ?s2 rdf:type kwg-ont:S2Cell_Level13 .
    ?s2cell rdf:type kwg-ont:S2Cell_Level13 ;
             kwg-ont:sfTouches | owl:sameAs ?s2 ;
             spatial:connectedTo ?waterbody .
    ?waterbody a hyf:HY_WaterBody .
    ?observation rdf:type coso:ContaminantObservation ;
                coso:observedAtSamplePoint ?sp ;
                coso:ofSubstance ?substance ;
                coso:analyzedSample ?sample ;
                coso:hasResult ?result .
    ?sample rdfs:label ?sampleLabel ;
            rdf:type ?matType .
    ?matType rdfs:label ?matTypeLabel .
    ?result coso:measurementValue ?result_value ;
            coso:measurementUnit ?unit .
    ?unit qudt:symbol ?unit_sym .
    VALUES ?unit {<http://qudt.org/vocab/unit/NanoGM-PER-L>}
    VALUES ?substance {''' + substanceCode + '''}
    VALUES ?matType {''' + matTypeCode + '''}
    FILTER (?result_value > '''+ str(minValue) + ''')
  } GROUP BY ?s2cell}
}
'''

sparqlGET.setQuery(q_waterbodies)
waterbodies_result = sparqlGET.query()
waterbodies = convertToDataframe(waterbodies_result)

#s2_wb_values_string = convertS2ListToQueryString(waterbodies['s2wb'].tolist())

In [None]:
waterbodies

Unnamed: 0,wb,wbWKT
0,https://geoconnex.us/nhdplusv2/comid/6720129,POLYGON ((-70.40314880116023 43.64426313225539...
1,https://geoconnex.us/nhdplusv2/comid/6720137,"POLYGON ((-70.3898278678476 43.64035599892816,..."
2,https://geoconnex.us/nhdplusv2/comid/6720139,POLYGON ((-70.39212566784403 43.63827473226468...
3,https://geoconnex.us/nhdplusv2/comid/6720153,POLYGON ((-70.39681520117006 43.63429499893755...
4,https://geoconnex.us/nhdplusv2/comid/6720169,POLYGON ((-70.27834986802065 43.62966493227805...


In [None]:
# Query the hydrology graph for the upstream S2cells

q2 = '''
PREFIX coso: <http://w3id.org/coso/v1/contaminoso#>
PREFIX geo: <http://www.opengis.net/ont/geosparql#>
PREFIX hyf: <https://www.opengis.net/def/schema/hy_features/hyf/>
PREFIX kwg-ont: <http://stko-kwg.geog.ucsb.edu/lod/ontology/>
PREFIX kwgr: <http://stko-kwg.geog.ucsb.edu/lod/resource/>
PREFIX dsstox: <http://w3id.org/DSSTox/v1/>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX schema: <https://schema.org/>
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
PREFIX spatial: <http://purl.org/spatialai/spatial/spatial-full#>
PREFIX qudt: <http://qudt.org/schema/qudt/>
PREFIX nhdplusv2: <http://nhdplusv2.spatialai.org/v1/nhdplusv2#>

SELECT DISTINCT ?upstream_flowline ?us_ftype ?upstream_flowlineWKT #?downstream_flowline ?ds_ftype
WHERE {
	{SELECT DISTINCT ?s2wb WHERE {																# get s2 of water bodies that overlap...
    ?wb a hyf:HY_WaterBody ;
        geo:hasGeometry/geo:asWKT ?wbWKT ;
        spatial:connectedTo ?s2cell ;
        spatial:connectedTo ?s2wb .
    ?s2wb a kwg-ont:S2Cell_Level13 .
						{SELECT DISTINCT ?s2cell WHERE { 										# s2cells of samples near water
						?sp rdf:type coso:SamplePoint ;
								spatial:connectedTo ?ar3 ;
								spatial:connectedTo ?s2 .
						?ar3 rdf:type kwg-ont:AdministrativeRegion_3 ;
								kwg-ont:administrativePartOf+ kwgr:administrativeRegion.USA.''' + regionCode + ''' .
						?s2 rdf:type kwg-ont:S2Cell_Level13 .
						?s2cell rdf:type kwg-ont:S2Cell_Level13 ;
										kwg-ont:sfTouches | owl:sameAs ?s2 ;
										spatial:connectedTo ?waterbody .
						?waterbody a hyf:HY_WaterBody .
						?observation rdf:type coso:ContaminantObservation ;
												coso:observedAtSamplePoint ?sp ;
												coso:ofSubstance ?substance ;
												coso:analyzedSample ?sample ;
												coso:hasResult ?result .
						?sample rdfs:label ?sampleLabel ;
										rdf:type ?matType .
						?matType rdfs:label ?matTypeLabel .
						?result coso:measurementValue ?result_value ;
										coso:measurementUnit ?unit .
						?unit qudt:symbol ?unit_sym .
						VALUES ?unit {<http://qudt.org/vocab/unit/NanoGM-PER-L>}
						VALUES ?substance {''' + substanceCode + '''}
						VALUES ?matType {''' + matTypeCode + '''}
						FILTER (?result_value > '''+ str(minValue) + ''')
					} GROUP BY ?s2cell}
		}}
	?downstream_flowline rdf:type hyf:HY_FlowPath ;							# ... and flowlines connected to
						 spatial:connectedTo ?s2wb ;
						 nhdplusv2:hasFTYPE ?ds_ftype .
	?upstream_flowline hyf:downstreamFlowPathTC ?downstream_flowline ;   # and their upstream parts
					   geo:hasGeometry/geo:asWKT ?upstream_flowlineWKT ;
					   nhdplusv2:hasFTYPE ?us_ftype .
	#?s2fl spatial:connectedTo ?upstream_flowline ;
	#		rdf:type kwg-ont:S2Cell_Level13 .
}
'''

sparqlGET.setQuery(q2)
hydrology_result = sparqlGET.query()
hydrology = convertToDataframe(hydrology_result)

#s2_fl_values_string = convertS2ListToQueryString(hydrology['s2cell'].tolist())

In [None]:
hydrology

Unnamed: 0,upstream_flowline,us_ftype,upstream_flowlineWKT
0,https://geoconnex.us/nhdplusv2/comid/6721543,StreamRiver,LINESTRING (-70.39310646784253 43.650965532244...
1,https://geoconnex.us/nhdplusv2/comid/6721551,StreamRiver,LINESTRING (-70.4851554676996 43.6505437989123...
2,https://geoconnex.us/nhdplusv2/comid/6721565,StreamRiver,LINESTRING (-70.43874500110502 43.657120932235...
3,https://geoconnex.us/nhdplusv2/comid/6721567,StreamRiver,LINESTRING (-70.4509240010861 43.6734621988767...
4,https://geoconnex.us/nhdplusv2/comid/6721569,StreamRiver,LINESTRING (-70.42496820112638 43.642128132258...
5,https://geoconnex.us/nhdplusv2/comid/6721571,StreamRiver,LINESTRING (-70.42871486778722 43.641920532259...
6,https://geoconnex.us/nhdplusv2/comid/6721573,StreamRiver,LINESTRING (-70.43370626777948 43.657604132234...
7,https://geoconnex.us/nhdplusv2/comid/6721575,StreamRiver,LINESTRING (-70.48067846770658 43.647689998916...
8,https://geoconnex.us/nhdplusv2/comid/6721577,StreamRiver,LINESTRING (-70.44004640110296 43.638644332264...
9,https://geoconnex.us/nhdplusv2/comid/6721579,StreamRiver,LINESTRING (-70.44745320109149 43.644767598921...


In [None]:
# Retrieve facility details
q3 = '''
PREFIX coso: <http://w3id.org/coso/v1/contaminoso#>
PREFIX geo: <http://www.opengis.net/ont/geosparql#>
PREFIX hyf: <https://www.opengis.net/def/schema/hy_features/hyf/>
PREFIX kwg-ont: <http://stko-kwg.geog.ucsb.edu/lod/ontology/>
PREFIX kwgr: <http://stko-kwg.geog.ucsb.edu/lod/resource/>
PREFIX dsstox: <http://w3id.org/DSSTox/v1/>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX schema: <https://schema.org/>
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
PREFIX spatial: <http://purl.org/spatialai/spatial/spatial-full#>
PREFIX qudt: <http://qudt.org/schema/qudt/>
PREFIX nhdplusv2: <http://nhdplusv2.spatialai.org/v1/nhdplusv2#>
PREFIX fio: <http://w3id.org/fio/v1/fio#>
PREFIX naics: <http://w3id.org/fio/v1/naics#>

SELECT DISTINCT ?facility ?facWKT ?facilityName ?industry ?industryName ?industryGroupCode ?industryGroupName ?industrySubsectorCode ?industrySubsectorName WHERE {
      {SELECT DISTINCT ?s2fl WHERE {
          {SELECT DISTINCT ?s2wb WHERE {																# get s2 of water bodies that overlap...
            ?wb a hyf:HY_WaterBody ;
                geo:hasGeometry/geo:asWKT ?wbWKT ;
                spatial:connectedTo ?s2cell ;
                spatial:connectedTo ?s2wb .
            ?s2wb a kwg-ont:S2Cell_Level13 .
                    {SELECT DISTINCT ?s2cell WHERE { 										# s2cells of samples near water
                    ?sp rdf:type coso:SamplePoint ;
                        spatial:connectedTo ?ar3 ;
                        spatial:connectedTo ?s2 .
                    ?ar3 rdf:type kwg-ont:AdministrativeRegion_3 ;
                        kwg-ont:administrativePartOf+ kwgr:administrativeRegion.USA.''' + regionCode + ''' .
                    ?s2 rdf:type kwg-ont:S2Cell_Level13 .
                    ?s2cell rdf:type kwg-ont:S2Cell_Level13 ;
                            kwg-ont:sfTouches | owl:sameAs ?s2 ;
                            spatial:connectedTo ?waterbody .
                    ?waterbody a hyf:HY_WaterBody .
                    ?observation rdf:type coso:ContaminantObservation ;
                                coso:observedAtSamplePoint ?sp ;
                                coso:ofSubstance ?substance ;
                                coso:analyzedSample ?sample ;
                                coso:hasResult ?result .
                    ?sample rdfs:label ?sampleLabel ;
                            rdf:type ?matType .
                    ?matType rdfs:label ?matTypeLabel .
                    ?result coso:measurementValue ?result_value ;
                            coso:measurementUnit ?unit .
                    ?unit qudt:symbol ?unit_sym .
                    VALUES ?unit {<http://qudt.org/vocab/unit/NanoGM-PER-L>}
                    VALUES ?substance {''' + substanceCode + '''}
                    VALUES ?matType {''' + matTypeCode + '''}
                    FILTER (?result_value > '''+ str(minValue) + ''')
                  } GROUP BY ?s2cell}
            }}
          ?downstream_flowline rdf:type hyf:HY_FlowPath ;							# ... and flowlines connected to
                    spatial:connectedTo ?s2wb ;
                    nhdplusv2:hasFTYPE ?ds_ftype .
          ?upstream_flowline hyf:downstreamFlowPathTC ?downstream_flowline ;   # and their upstream parts
                    geo:hasGeometry/geo:asWKT ?upstream_flowlineWKT ;
                    nhdplusv2:hasFTYPE ?us_ftype .
          ?s2fl spatial:connectedTo ?upstream_flowline ;
          		rdf:type kwg-ont:S2Cell_Level13 .
        }}

    ?s2fl kwg-ont:sfContains ?facility.
    ?facility fio:ofIndustry ?industryCode, ?industryGroupCode, ?industrySubsectorCode ;
              geo:hasGeometry/geo:asWKT ?facWKT;
              rdfs:label ?facilityName.
    ?industryCode a naics:NAICS-IndustryCode;
                  rdfs:label ?industryName ;
    fio:subcodeOf ?industryGroupCode .
    ?industryGroupCode a naics:NAICS-IndustryGroup;
                       rdfs:label ?industryGroupName ;
                       fio:subcodeOf ?industrySubsectorCode .
    ?industrySubsectorCode a naics:NAICS-IndustrySubsector;
                           rdfs:label ?industrySubsectorName;
                           fio:subcodeOf ?manufacturing.
  VALUES ?manufacturing {naics:NAICS-31 naics:NAICS-32 naics:NAICS-33} .
}
'''

sparqlGET.setQuery(q3)
facility_result = sparqlGET.query()
facilities = convertToDataframe(facility_result)

In [None]:
facilities

Unnamed: 0,facility,facWKT,facilityName,industryName,industryGroupCode,industryGroupName,industrySubsectorCode,industrySubsectorName
0,http://w3id.org/fio/v1/epa-frs-data#d.FRS-Faci...,POINT(-70.279970 43.628330),MEGQUIRE & JONES CORP,Fabricated Structural Metal Manufacturing,http://w3id.org/fio/v1/naics#NAICS-3323,Architectural and Structural Metals Manufacturing,http://w3id.org/fio/v1/naics#NAICS-332,Fabricated Metal Product Manufacturing
1,http://w3id.org/fio/v1/epa-frs-data#d.FRS-Faci...,POINT(-70.279970 43.628330),MEGQUIRE & JONES CORP,Plate Work and Fabricated Structural Product M...,http://w3id.org/fio/v1/naics#NAICS-3323,Architectural and Structural Metals Manufacturing,http://w3id.org/fio/v1/naics#NAICS-332,Fabricated Metal Product Manufacturing
2,http://w3id.org/fio/v1/epa-frs-data#d.FRS-Faci...,POINT(-70.304440 43.613220),"EAST COAST STEEL, BRIDGE DIV",Fabricated Structural Metal Manufacturing,http://w3id.org/fio/v1/naics#NAICS-3323,Architectural and Structural Metals Manufacturing,http://w3id.org/fio/v1/naics#NAICS-332,Fabricated Metal Product Manufacturing
3,http://w3id.org/fio/v1/epa-frs-data#d.FRS-Faci...,POINT(-70.304440 43.613220),"EAST COAST STEEL, BRIDGE DIV",Plate Work and Fabricated Structural Product M...,http://w3id.org/fio/v1/naics#NAICS-3323,Architectural and Structural Metals Manufacturing,http://w3id.org/fio/v1/naics#NAICS-332,Fabricated Metal Product Manufacturing
4,http://w3id.org/fio/v1/epa-frs-data#d.FRS-Faci...,POINT(-70.304440 43.613220),DURASTONE PRECAST INCORPORATED-SOUTH PORTLAND,Concrete Block and Brick Manufacturing,http://w3id.org/fio/v1/naics#NAICS-3273,Cement and Concrete Product Manufacturing,http://w3id.org/fio/v1/naics#NAICS-327,Nonmetallic Mineral Product Manufacturing
5,http://w3id.org/fio/v1/epa-frs-data#d.FRS-Faci...,POINT(-70.304440 43.613220),DURASTONE PRECAST INCORPORATED-SOUTH PORTLAND,"Concrete Pipe, Brick, and Block Manufacturing",http://w3id.org/fio/v1/naics#NAICS-3273,Cement and Concrete Product Manufacturing,http://w3id.org/fio/v1/naics#NAICS-327,Nonmetallic Mineral Product Manufacturing
6,http://w3id.org/fio/v1/epa-frs-data#d.FRS-Faci...,POINT(-70.304440 43.613220),DURASTONE PRECAST INCORPORATED-SOUTH PORTLAND,Fabricated Structural Metal Manufacturing,http://w3id.org/fio/v1/naics#NAICS-3323,Architectural and Structural Metals Manufacturing,http://w3id.org/fio/v1/naics#NAICS-332,Fabricated Metal Product Manufacturing
7,http://w3id.org/fio/v1/epa-frs-data#d.FRS-Faci...,POINT(-70.304440 43.613220),DURASTONE PRECAST INCORPORATED-SOUTH PORTLAND,Plate Work and Fabricated Structural Product M...,http://w3id.org/fio/v1/naics#NAICS-3323,Architectural and Structural Metals Manufacturing,http://w3id.org/fio/v1/naics#NAICS-332,Fabricated Metal Product Manufacturing
8,http://w3id.org/fio/v1/epa-frs-data#d.FRS-Faci...,POINT(-70.399546 43.645162),MAINE RUBBER RIM SHOP,All Other Plastics Product Manufacturing,http://w3id.org/fio/v1/naics#NAICS-3261,Plastics Product Manufacturing,http://w3id.org/fio/v1/naics#NAICS-326,Plastics and Rubber Products Manufacturing
9,http://w3id.org/fio/v1/epa-frs-data#d.FRS-Faci...,POINT(-70.399546 43.645162),MAINE RUBBER RIM SHOP,Other Plastics Product Manufacturing,http://w3id.org/fio/v1/naics#NAICS-3261,Plastics Product Manufacturing,http://w3id.org/fio/v1/naics#NAICS-326,Plastics and Rubber Products Manufacturing


In [None]:
# County boundaries for mapping
q_counties = '''
PREFIX geo: <http://www.opengis.net/ont/geosparql#>
PREFIX kwgr: <http://stko-kwg.geog.ucsb.edu/lod/resource/>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>

SELECT * WHERE {
    ?county geo:hasGeometry/geo:asWKT ?countyWKT ;
            rdfs:label ?countyName.
    VALUES ?county {kwgr:administrativeRegion.USA.'''+ regionCode + '''}
}
'''

sparqlGET.setQuery(q_counties)
counties_result = sparqlGET.query()
counties = convertToDataframe(counties_result)

## Prep data for mapping

In [None]:
samplept_columns = ['resultCount', 'max', 'sp', 'spWKT']
samplept_map = samplepoints[samplept_columns].copy()
samplept_map.drop_duplicates(inplace=True)
samplept_map['spWKT'] = samplept_map['spWKT'].apply(wkt.loads)
samplept_map = gpd.GeoDataFrame(samplept_map, geometry='spWKT')
samplept_map.set_crs(epsg=4326, inplace=True, allow_override=True)

Unnamed: 0,resultCount,max,sp,spWKT
0,1,14.2,http://w3id.org/sawgraph/v1/me-egad-data#sampl...,POINT (-70.41034 43.64433)
1,1,18.6,http://w3id.org/sawgraph/v1/me-egad-data#sampl...,POINT (-70.40886 43.64535)
2,1,7.57,http://w3id.org/sawgraph/v1/me-egad-data#sampl...,POINT (-70.40885 43.64414)
12,1,21.5,http://w3id.org/sawgraph/v1/me-egad-data#sampl...,POINT (-70.28384 43.63913)
13,1,173.0,http://w3id.org/sawgraph/v1/me-egad-data#sampl...,POINT (-70.2859 43.6383)
14,1,5.87,http://w3id.org/sawgraph/v1/me-egad-data#sampl...,POINT (-70.28378 43.63925)


In [None]:
waterbody_columns = ['wb', 'wb_name', 'wbWKT']
waterbodies.drop_duplicates(inplace=True)
waterbodies['wbWKT'] = waterbodies['wbWKT'].apply(wkt.loads)
waterbodies = gpd.GeoDataFrame(waterbodies, geometry='wbWKT')
waterbodies.set_crs(epsg=4326, inplace=True, allow_override=True)

Unnamed: 0,wb,wbWKT
0,https://geoconnex.us/nhdplusv2/comid/6720129,"POLYGON ((-70.40315 43.64426, -70.40482 43.645..."
1,https://geoconnex.us/nhdplusv2/comid/6720137,"POLYGON ((-70.38983 43.64036, -70.39102 43.641..."
2,https://geoconnex.us/nhdplusv2/comid/6720139,"POLYGON ((-70.39213 43.63827, -70.39263 43.638..."
3,https://geoconnex.us/nhdplusv2/comid/6720153,"POLYGON ((-70.39682 43.63429, -70.3977 43.6351..."
4,https://geoconnex.us/nhdplusv2/comid/6720169,"POLYGON ((-70.27835 43.62966, -70.2786 43.6299..."


In [None]:
facilities.drop_duplicates(inplace=True)
facilities['facWKT'] = facilities['facWKT'].apply(wkt.loads)
facilities = gpd.GeoDataFrame(facilities, geometry='facWKT')
facilities.set_crs(epsg=4326, inplace=True, allow_override=True)
facilities['industryGroupCode'] = facilities['industryGroupCode'].apply(lambda x: x.split('#')[-1])
facilities['industrySubsectorCode'] = facilities['industrySubsectorCode'].apply(lambda x: x.split('#')[-1])

In [None]:
hydrology_columns = ['upstream_flowline', 'us_ftype', 'upstream_flowlineWKT']
hydrology['upstream_flowlineWKT'] = hydrology['upstream_flowlineWKT'].apply(wkt.loads)
hydrology.drop_duplicates(inplace=True)
hydrology = gpd.GeoDataFrame(hydrology, geometry='upstream_flowlineWKT')
hydrology.set_crs(epsg=4326, inplace=True, allow_override=True)

Unnamed: 0,upstream_flowline,us_ftype,upstream_flowlineWKT
0,https://geoconnex.us/nhdplusv2/comid/6721543,StreamRiver,"LINESTRING (-70.39311 43.65097, -70.39355 43.6..."
1,https://geoconnex.us/nhdplusv2/comid/6721551,StreamRiver,"LINESTRING (-70.48516 43.65054, -70.48412 43.6..."
2,https://geoconnex.us/nhdplusv2/comid/6721565,StreamRiver,"LINESTRING (-70.43875 43.65712, -70.43884 43.6..."
3,https://geoconnex.us/nhdplusv2/comid/6721567,StreamRiver,"LINESTRING (-70.45092 43.67346, -70.45061 43.6..."
4,https://geoconnex.us/nhdplusv2/comid/6721569,StreamRiver,"LINESTRING (-70.42497 43.64213, -70.42462 43.6..."
5,https://geoconnex.us/nhdplusv2/comid/6721571,StreamRiver,"LINESTRING (-70.42871 43.64192, -70.42837 43.6..."
6,https://geoconnex.us/nhdplusv2/comid/6721573,StreamRiver,"LINESTRING (-70.43371 43.6576, -70.43279 43.65..."
7,https://geoconnex.us/nhdplusv2/comid/6721575,StreamRiver,"LINESTRING (-70.48068 43.64769, -70.48062 43.6..."
8,https://geoconnex.us/nhdplusv2/comid/6721577,StreamRiver,"LINESTRING (-70.44005 43.63864, -70.43866 43.6..."
9,https://geoconnex.us/nhdplusv2/comid/6721579,StreamRiver,"LINESTRING (-70.44745 43.64477, -70.44742 43.6..."


In [None]:
counties['countyWKT'] = counties['countyWKT'].apply(wkt.loads)
counties.drop_duplicates(inplace=True)
counties = gpd.GeoDataFrame(counties, geometry='countyWKT')
counties = counties.simplify(tolerance=0.001)  #simply the county boundary at tolerance of 0.01 degrees for faster mapping
counties.set_crs(epsg=4326, inplace=True, allow_override=True)

Unnamed: 0,0
0,"POLYGON ((-69.8819 43.63647, -70.13661 43.5334..."


In [None]:
%%capture
from branca.element import Figure
import math

map = counties.explore(name='Counties',
                       style_kwds=dict(color='Gray',
                                       fill=False,
                                       weight=7))

if 'wb_name' in waterbodies.columns:
    wbcols = ['wb', 'wb_name']
else:
    wbcols = ['wb']

samplept_map.explore(m=map,
                     name=f'<span style="color:DarkOrange;">Samples</span>',
                     color='DarkOrange',
                     style_kwds=dict(style_function=lambda x: {'radius': math.log(float(x['properties']['max'])) * 1.8976 + 3.36937,  # fits to (4 ppt, 6 radius) and (6400 ppt, 20 radius)
                                                               "opacity":1,
                                                               "color":'DimGray'},
                                     fillOpacity=1),
                     marker_kwds=dict(radius=6,
                                      fill=True),
                     marker_type='circle_marker',
                     tooltip=['sp', 'max', 'resultCount'],
                     show=True)

waterbodies.explore(m=map,
                    name='<span style="color:Blue;">Waterbodies</span>',
                    style_kwds=dict(color='Blue',
                                    fill=False,
                                    weight=4),
                    tooltip=wbcols,
                    show=True)

hydrology.explore(m=map,
                  name='<span style="color:Blue;">Upstream Flowlines</span>',
                  color='Blue',
                  style_kwds=dict(weight=1),
                  tooltip=['upstream_flowline', 'us_ftype'],
                  show=False)

c = 0
colors = ['Purple', 'PaleVioletRed', 'Orchid', 'Fuchsia', 'MediumVioletRed', 'HotPink', 'LightPink', 'red', 'lightred', 'pink', 'orange',
          'MidnightBlue', 'MediumBlue', 'SlateBlue', 'MediumSlateBlue', 'DodgerBlue', 'DeepSkyBlue', 'SkyBlue', 'CadetBlue', 'DarkCyan', 'LightSeaGreen',
          'MediumSageGreen', 'lightblue', 'gray', 'blue', 'darkred', 'lightgreen', 'green', 'darkblue', 'darkpurple', 'cadetblue', 'orange', 'lightgray', 'darkgreen']
for industry in list(facilities.industrySubsectorName.unique()):
    facilities[facilities['industrySubsectorName']==industry].explore(m=map,
                                                                      name=f'<span style="color:{colors[c]};">{industry}</span>',
                                                                      color=colors[c],
                                                                      marker_kwds=dict(radius=6,
                                                                                       fill=True),
                                                                      style_kwds=dict(color='black',
                                                                                      fillOpacity=1),
                                                                      marker_type='circle_marker',
                                                                      show=True)
    c += 1

## Map

In [None]:
folium.LayerControl(collapsed=False).add_to(map)
fig = Figure(width='100%', height=700)
fig.add_child(map)

In [None]:
fig.save('SAWGraph_Y3-Kickoff-Demo_TracingUpstream.html')