This notebook generates maps for the UI mockup answering the following 3 questions:
* Q1: What samples in a particular region (e.g. the state of Maine) are downstream of certain types of facilities (by NAICS Industry or Group Code)?

# Setup


In [1]:
# @title
%%capture
!pip install mapclassify --upgrade --quiet
!pip install SPARQLWrapper --upgrade --quiet
!pip install rdflib

In [2]:
# @title
#from branca.element import Figure                                  # For controlling the size of the final map
import folium                                                      # For map layer control
import geopandas as gpd                                            # For geospatial dataframes
import pandas as pd                                                # For dataframes
from shapely import wkt                                            # For working with WKT coordinates in a GeoDataFrame
from SPARQLWrapper import SPARQLWrapper2, JSON, GET, POST, DIGEST   # For querying SPARQL endpoints
import rdflib                                                      # For working with URIs

def convertToDataframe(results):
  d = []
  for x in results.bindings:
        row = {}
        for k in x:
            v = x[k]
            vv = rdflib.term.Literal(v.value, datatype=v.datatype).toPython()  # type: ignore[no-untyped-call]
            row[k] = vv
        d.append(row)
  df = pd.DataFrame(d)
  return df

# not used anymore, previously used to manually pass S2 cells across multiple repositories
def convertS2ListToQueryString(s2list):
  s2list_short = [s2cell.replace("http://stko-kwg.geog.ucsb.edu/lod/resource/","kwgr:") for s2cell in s2list]
  s2_values_string = " ".join(s2list_short)
  return s2_values_string


In [3]:
# @title
#for interactive widgets
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets
from IPython.display import display

# Q1 - What samples are downstream of certain types of facilities (by NAICS Industry or Group Code)?


In [4]:
industry = "5622 (Waste Treatment and Disposal)" # @param ["5622 (Waste Treatment and Disposal)","3222 (Converted Paper Manufacturing)", "221310 (Water Supply and Irrigation)", "221320 (Sewage Treatment)","3261 (Plastics Product Manufacturing)","3133 (Textile and Fabric Finishing and Coating)","3251 (Basic Chemical Manufacturing)","3255 (Paint, Coating, and Adhesive Manufacturing)", "3364 (Aerospace Product and Parts)","812320 (Drycleaning and Laundry Services)", "561740 (Carpet and Upholstery Cleaning Services)"]{"allow-input":true}

icode = industry.split()[0]

print(icode)

if len(icode)>4:
  industryValueFilter = 'VALUES ?industryCode {naics:NAICS-' + str(icode) + '}.'
else:
  industryValueFilter = 'VALUES ?industryGroup {naics:NAICS-' + str(icode) + '}.'



5622


In [5]:
admin_region = "18 (Indiana)" # @param ["All", "23 (Maine)","23019 (Penboscot County, Maine)", "24 (New Hampshire)","18 (Indiana)", "17 (Illinois)"] {"allow-input":true}

regionCode = admin_region.split()[0]
if regionCode == 'All':
  regionFilter = ''
else:
  if len(regionCode)<=5:
    regionURI = 'kwgr:administrativeRegion.USA.' + regionCode
    print(regionURI)
    regionFilter = '''?county rdf:type kwg-ont:AdministrativeRegion_2 ;
                   kwg-ont:administrativePartOf ''' + regionURI + ''' . '''
  else:
    # need to implement region filters with datacommons geoids.
    regionFilter = ''




kwgr:administrativeRegion.USA.18


## Queries

In [6]:
#WRetrieve facility details
q2 = '''
PREFIX geo: <http://www.opengis.net/ont/geosparql#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX naics: <http://w3id.org/fio/v1/naics#>
PREFIX spatial: <http://purl.org/spatialai/spatial/spatial-full#>
PREFIX kwgr: <http://stko-kwg.geog.ucsb.edu/lod/resource/>
PREFIX kwg-ont: <http://stko-kwg.geog.ucsb.edu/lod/ontology/>
PREFIX coso: <http://w3id.org/coso/v1/contaminoso#>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX fio: <http://w3id.org/fio/v1/fio#>

#Where are landfills or dod facilities

select DISTINCT ?facility ?facWKT ?facilityName ?industryCode ?industryName where {
    #SERVICE <repository:Spatial>{
    #    #determine near distance
    #    ?s2 kwg-ont:sfTouches|owl:sameAs ?s2neighbor.
    #    ?s2neighbor rdf:type kwg-ont:S2Cell_Level13.
    #}
    #SERVICE <repository:FIO>{
        #find facilities
        #?s2neighbor kwg-ont:sfContains ?facility.
            ?facility fio:ofIndustry ?industryGroup;
            fio:ofIndustry ?industryCode ;
            spatial:connectedTo ?county ;
        		geo:hasGeometry/geo:asWKT ?facWKT;
        		rdfs:label ?facilityName.
        ''' + regionFilter + '''
        ?industryCode a naics:NAICS-IndustryCode;  # only NAICS specific industry codes
            fio:subcodeOf ?industryGroup ;
            rdfs:label ?industryName.
        ''' + industryValueFilter + '''
    #}

}
'''

print(q2)


PREFIX geo: <http://www.opengis.net/ont/geosparql#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX naics: <http://w3id.org/fio/v1/naics#>
PREFIX spatial: <http://purl.org/spatialai/spatial/spatial-full#>
PREFIX kwgr: <http://stko-kwg.geog.ucsb.edu/lod/resource/>
PREFIX kwg-ont: <http://stko-kwg.geog.ucsb.edu/lod/ontology/>
PREFIX coso: <http://w3id.org/coso/v1/contaminoso#>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX fio: <http://w3id.org/fio/v1/fio#>

#Where are landfills or dod facilities

select DISTINCT ?facility ?facWKT ?facilityName ?industryCode ?industryName where {
    #SERVICE <repository:Spatial>{
    #    #determine near distance
    #    ?s2 kwg-ont:sfTouches|owl:sameAs ?s2neighbor.
    #    ?s2neighbor rdf:type kwg-ont:S2Cell_Level13.
    #}
    #SERVICE <repository:FIO>{
        #find facilities
        #?s2neighbor kwg-ont:sfContains ?facility.
            ?facility fio:ofIndustry ?industryGroup;
            fio:ofIndustry ?industryCod

In [7]:
endpointGET = 'https://frink.apps.renci.org/federation/sparql' # @param ['https://frink.apps.renci.org/fiokg/sparql','https://frink.apps.renci.org/qlever-geo/sparql', 'https://frink.apps.renci.org/federation/sparql']


sparqlGET = SPARQLWrapper2(endpointGET)
sparqlGET.setHTTPAuth(DIGEST)
#sparqlGET.setCredentials('sawgraph-endpoint', 'skailab')
sparqlGET.setMethod(POST)
sparqlGET.setReturnFormat(JSON)

sparqlGET.setQuery(q2)
facility_result = sparqlGET.query()
facilities = convertToDataframe(facility_result)
#print(facilities.info())
facilities

Unnamed: 0,facility,facWKT,facilityName,industryCode,industryName
0,http://w3id.org/fio/v1/epa-frs-data#d.FRS-Faci...,POINT(-86.299700 39.748000),HERITAGE ENVIRONMENTAL SERVICES LLC,http://w3id.org/fio/v1/naics#NAICS-56221,Waste Treatment and Disposal
1,http://w3id.org/fio/v1/epa-frs-data#d.FRS-Faci...,POINT(-86.299700 39.748000),HERITAGE ENVIRONMENTAL SERVICES LLC,http://w3id.org/fio/v1/naics#NAICS-562211,Hazardous Waste Treatment and Disposal
2,http://w3id.org/fio/v1/epa-frs-data#d.FRS-Faci...,POINT(-87.461568 41.633441),"TRADEBE TREATMENT & RECYCLINGOF TN, LLC",http://w3id.org/fio/v1/naics#NAICS-56221,Waste Treatment and Disposal
3,http://w3id.org/fio/v1/epa-frs-data#d.FRS-Faci...,POINT(-87.461568 41.633441),"TRADEBE TREATMENT & RECYCLINGOF TN, LLC",http://w3id.org/fio/v1/naics#NAICS-562211,Hazardous Waste Treatment and Disposal
4,http://w3id.org/fio/v1/epa-frs-data#d.FRS-Faci...,POINT(-87.461568 41.633441),"TRADEBE TREATMENT & RECYCLINGOF TN, LLC",http://w3id.org/fio/v1/naics#NAICS-562219,Other Nonhazardous Waste Treatment and Disposal
...,...,...,...,...,...
246,http://w3id.org/fio/v1/epa-frs-data#d.FRS-Faci...,POINT(-85.091100 40.375900),JAY COUNTY LANDFILL LLC,http://w3id.org/fio/v1/naics#NAICS-562212,Solid Waste Landfill
247,http://w3id.org/fio/v1/epa-frs-data#d.FRS-Faci...,POINT(-86.340400 40.723900),OAK RIDGE RECYCLING & DISPOSAL FACILITY,http://w3id.org/fio/v1/naics#NAICS-56221,Waste Treatment and Disposal
248,http://w3id.org/fio/v1/epa-frs-data#d.FRS-Faci...,POINT(-86.340400 40.723900),OAK RIDGE RECYCLING & DISPOSAL FACILITY,http://w3id.org/fio/v1/naics#NAICS-562212,Solid Waste Landfill
249,http://w3id.org/fio/v1/epa-frs-data#d.FRS-Faci...,POINT(-86.161135 41.493655),PRAIRIE VIEW HIGH BTU LLC,http://w3id.org/fio/v1/naics#NAICS-56221,Waste Treatment and Disposal


In [8]:
#Find streams
q1a='''
PREFIX dcterms: <http://purl.org/dc/terms/>
PREFIX qudt: <http://qudt.org/schema/qudt/>
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
PREFIX geo: <http://www.opengis.net/ont/geosparql#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX naics: <http://w3id.org/fio/v1/naics#>
PREFIX spatial: <http://purl.org/spatialai/spatial/spatial-full#>
PREFIX kwgr: <http://stko-kwg.geog.ucsb.edu/lod/resource/>
PREFIX kwg-ont: <http://stko-kwg.geog.ucsb.edu/lod/ontology/>
PREFIX coso: <http://w3id.org/coso/v1/contaminoso#>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX fio: <http://w3id.org/fio/v1/fio#>
PREFIX hyf: <https://www.opengis.net/def/schema/hy_features/hyf/>
PREFIX nhdplusv2: <http://nhdplusv2.spatialai.org/v1/nhdplusv2#>

PREFIX owl: <http://www.w3.org/2002/07/owl#>
select DISTINCT ?downstream_flowline ?dsflWKT ?fl_type ?streamName
where {
        {select ?s2neighbor where {
          #find facilities
          ?s2neighbor kwg-ont:sfContains ?facility.
          ?facility fio:ofIndustry ?industryGroup;
            fio:ofIndustry ?industryCode;
            spatial:connectedTo ?county.
        		#geo:hasGeometry/geo:asWKT ?facWKT;
        		#rdfs:label ?facilityName.
          ''' + regionFilter + '''
          ?industryCode a naics:NAICS-IndustryCode;  # only NAICS specific industry codes
            fio:subcodeOf ?industryGroup ;
            rdfs:label ?industryName.
          ''' + industryValueFilter + ''' }
          }
    #determine near streams
    ?s2 kwg-ont:sfTouches|owl:sameAs ?s2neighbor.
    ?s2neighbor rdf:type kwg-ont:S2Cell_Level13;
              spatial:connectedTo ?upstream_flowline.

 		?upstream_flowline rdf:type hyf:HY_FlowPath ;
		          # find all flowlines downstream of them
						  hyf:downstreamFlowPathTC ?downstream_flowline .
    ?downstream_flowline geo:hasGeometry/geo:asWKT ?dsflWKT;
              	nhdplusv2:hasFTYPE ?fl_type.
    OPTIONAL {?downstream_flowline rdfs:label ?streamName}


    }
'''

print(q1a)


PREFIX dcterms: <http://purl.org/dc/terms/>
PREFIX qudt: <http://qudt.org/schema/qudt/>
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
PREFIX geo: <http://www.opengis.net/ont/geosparql#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX naics: <http://w3id.org/fio/v1/naics#>
PREFIX spatial: <http://purl.org/spatialai/spatial/spatial-full#>
PREFIX kwgr: <http://stko-kwg.geog.ucsb.edu/lod/resource/>
PREFIX kwg-ont: <http://stko-kwg.geog.ucsb.edu/lod/ontology/>
PREFIX coso: <http://w3id.org/coso/v1/contaminoso#>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX fio: <http://w3id.org/fio/v1/fio#>
PREFIX hyf: <https://www.opengis.net/def/schema/hy_features/hyf/>
PREFIX nhdplusv2: <http://nhdplusv2.spatialai.org/v1/nhdplusv2#>

PREFIX owl: <http://www.w3.org/2002/07/owl#>
select DISTINCT ?downstream_flowline ?dsflWKT ?fl_type ?streamName
where {
        {select ?s2neighbor where {
          #find facilities
          ?s2neighbor kwg-ont:sfContains ?facility.
  

In [9]:
sparqlGET.setQuery(q1a)
stream_result = sparqlGET.query()
streams = convertToDataframe(stream_result)
streams

Unnamed: 0,downstream_flowline,dsflWKT,fl_type,streamName
0,https://geoconnex.us/nhdplusv2/comid/10106979,LINESTRING (-86.80224817570554 38.013268407662...,StreamRiver,Anderson River
1,https://geoconnex.us/nhdplusv2/comid/10109005,LINESTRING (-86.8123283756899 38.0000712743496...,StreamRiver,Anderson River
2,https://geoconnex.us/nhdplusv2/comid/10109029,LINESTRING (-86.84745190896871 37.992951674360...,StreamRiver,
3,https://geoconnex.us/nhdplusv2/comid/10109059,LINESTRING (-86.87149157559807 37.979859407714...,StreamRiver,Big Slough
4,https://geoconnex.us/nhdplusv2/comid/10109093,LINESTRING (-86.90179257555103 37.952955874422...,StreamRiver,Big Slough
...,...,...,...,...
3042,https://geoconnex.us/nhdplusv2/comid/935120060,LINESTRING (-88.00003137384634 38.107890607515...,ArtificialPath,Wabash River Old Channel
3043,https://geoconnex.us/nhdplusv2/comid/937130074,LINESTRING (-90.53698470324184 39.920986404701...,ArtificialPath,Illinois River
3044,https://geoconnex.us/nhdplusv2/comid/937130075,LINESTRING (-90.5737493031848 39.8921432047461...,ArtificialPath,Illinois River
3045,https://geoconnex.us/nhdplusv2/comid/937130094,LINESTRING (-90.4546140964604 40.0105334834936...,ArtificialPath,Illinois River


In [10]:
## Trace downstream
q1='''
PREFIX dcterms: <http://purl.org/dc/terms/>
PREFIX qudt: <http://qudt.org/schema/qudt/>
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
PREFIX geo: <http://www.opengis.net/ont/geosparql#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX naics: <http://w3id.org/fio/v1/naics#>
PREFIX spatial: <http://purl.org/spatialai/spatial/spatial-full#>
PREFIX kwgr: <http://stko-kwg.geog.ucsb.edu/lod/resource/>
PREFIX kwg-ont: <http://stko-kwg.geog.ucsb.edu/lod/ontology/>
PREFIX coso: <http://w3id.org/coso/v1/contaminoso#>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX fio: <http://w3id.org/fio/v1/fio#>
PREFIX hyf: <https://www.opengis.net/def/schema/hy_features/hyf/>

PREFIX owl: <http://www.w3.org/2002/07/owl#>
select DISTINCT ?samplePoint ?spWKT ?sample (GROUP_CONCAT(DISTINCT ?sampleId; separator="; ") as ?samples) (COUNT(DISTINCT ?subVal) as ?resultCount) (MAX(?result) as ?Max) ?unit (GROUP_CONCAT(DISTINCT ?subVal; separator=" <br> ") as ?results)
where {
      { SELECT DISTINCT ?s2cell WHERE {
        #find facilities
        ?s2neighbor kwg-ont:sfContains ?facility.
        ?facility fio:ofIndustry ?industryGroup;
            fio:ofIndustry ?industryCode;
            spatial:connectedTo ?county.
        		#geo:hasGeometry/geo:asWKT ?facWKT;
        		#rdfs:label ?facilityName.
        ''' + regionFilter + '''
        ?industryCode a naics:NAICS-IndustryCode;  # only NAICS specific industry codes
            fio:subcodeOf ?industryGroup ;
            rdfs:label ?industryName.
        ''' + industryValueFilter + '''
    #determine near streams
    ?s2 kwg-ont:sfTouches|owl:sameAs ?s2neighbor.
    ?s2neighbor rdf:type kwg-ont:S2Cell_Level13;
              spatial:connectedTo ?upstream_flowline.

 		?upstream_flowline rdf:type hyf:HY_FlowPath ;
		          # find all flowlines downstream of them
						  hyf:downstreamFlowPathTC ?downstream_flowline .
      ?s2cell spatial:connectedTo ?downstream_flowline ;
              rdf:type kwg-ont:S2Cell_Level13 .
    }}

    #find samples
    ?samplePoint kwg-ont:sfWithin ?s2cell;
    	rdf:type coso:SamplePoint;
    	geo:hasGeometry/geo:asWKT ?spWKT.
    ?s2cell rdf:type kwg-ont:S2Cell_Level13.
    ?sample coso:fromSamplePoint ?samplePoint;
    	dcterms:identifier ?sampleId;
    	coso:sampleOfMaterialType/rdfs:label ?type.
    ?observation rdf:type coso:ContaminantObservation;
    	coso:observedAtSamplePoint ?samplePoint;
    	coso:ofDSSToxSubstance/skos:altLabel ?substance;
    	coso:hasResult/coso:measurementValue ?result;
    	coso:hasResult/coso:measurementUnit/qudt:symbol ?unit.
    BIND((CONCAT(?substance, ": ", str(?result) , " ", ?unit) ) as ?subVal)

    } GROUP BY ?samplePoint ?spWKT ?sample ?unit
'''

print(q1)


PREFIX dcterms: <http://purl.org/dc/terms/>
PREFIX qudt: <http://qudt.org/schema/qudt/>
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
PREFIX geo: <http://www.opengis.net/ont/geosparql#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX naics: <http://w3id.org/fio/v1/naics#>
PREFIX spatial: <http://purl.org/spatialai/spatial/spatial-full#>
PREFIX kwgr: <http://stko-kwg.geog.ucsb.edu/lod/resource/>
PREFIX kwg-ont: <http://stko-kwg.geog.ucsb.edu/lod/ontology/>
PREFIX coso: <http://w3id.org/coso/v1/contaminoso#>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX fio: <http://w3id.org/fio/v1/fio#>
PREFIX hyf: <https://www.opengis.net/def/schema/hy_features/hyf/>

PREFIX owl: <http://www.w3.org/2002/07/owl#>
select DISTINCT ?samplePoint ?spWKT ?sample (GROUP_CONCAT(DISTINCT ?sampleId; separator="; ") as ?samples) (COUNT(DISTINCT ?subVal) as ?resultCount) (MAX(?result) as ?Max) ?unit (GROUP_CONCAT(DISTINCT ?subVal; separator=" <br> ") as ?results)
where {
      

In [11]:
sparqlGET.setQuery(q1)
samplepoint_result = sparqlGET.query()
samplepoints = convertToDataframe(samplepoint_result)
samplepoints

Unnamed: 0,samplePoint,spWKT,sample,samples,resultCount,Max,unit,results
0,https://geoconnex.us/iow/wqp/INSTOR_WQX-11825,POINT(-86.962778 39.111944),http://w3id.org/sawgraph/v1/us-wqp-data#d.wqp....,INSTOR_WQX-AB43910.FISHPREP,2,1.0,Î¼g/kg,PFDS: 1 Î¼g/kg <br> PFDS: 0.19 Î¼g/kg
1,https://geoconnex.us/iow/wqp/INSTOR_WQX-16693,POINT(-86.506207 38.977414),http://w3id.org/sawgraph/v1/us-wqp-data#d.wqp....,INSTOR_WQX-AB50126.FISHPREP,4,0.48,Î¼g/kg,PFDS: 0.24 Î¼g/kg <br> PFDS: 0.32 Î¼g/kg <br> ...
2,https://geoconnex.us/iow/wqp/INSTOR_WQX-18362,POINT(-87.385580 39.981361),http://w3id.org/sawgraph/v1/us-wqp-data#d.wqp....,INSTOR_WQX-AC39472.FISHPREP,1,0.16,Î¼g/kg,PFDS: 0.16 Î¼g/kg
3,https://geoconnex.us/iow/wqp/INSTOR_WQX-18512,POINT(-86.501675 39.380992),http://w3id.org/sawgraph/v1/us-wqp-data#d.wqp....,INSTOR_WQX-AB45684.FISHPREP,2,1.4,Î¼g/kg,PFDS: 0.97 Î¼g/kg <br> PFDS: 1.4 Î¼g/kg
4,https://geoconnex.us/iow/wqp/INSTOR_WQX-18897,POINT(-86.126394 39.748424),http://w3id.org/sawgraph/v1/us-wqp-data#d.wqp....,INSTOR_WQX-AB45449.FISHPREP,1,1.2,Î¼g/kg,PFDS: 1.2 Î¼g/kg
...,...,...,...,...,...,...,...,...
64,https://geoconnex.us/iow/wqp/OST_SHPD-NRS18_IN...,POINT(-87.598000 38.451780),http://w3id.org/sawgraph/v1/us-wqp-data#d.wqp....,OST_SHPD-NRS18_IN_10007-124173,1,0.215,ng/g,PFDS: 0.215 ng/g
65,https://geoconnex.us/iow/wqp/OST_SHPD-NRS18_IN...,POINT(-86.798160 39.261750),http://w3id.org/sawgraph/v1/us-wqp-data#d.wqp....,OST_SHPD-NRS18_IN_10015-124913,1,0.407,ng/g,PFDS: 0.407 ng/g
66,https://geoconnex.us/iow/wqp/OST_SHPD-NRS18_MO...,POINT(-89.461550 37.198930),http://w3id.org/sawgraph/v1/us-wqp-data#d.wqp....,OST_SHPD-NRS18_MO_10024-898753,1,0.46,ng/g,PFDS: 0.46 ng/g
67,https://geoconnex.us/iow/wqp/OST_SHPD-NRS18_MO...,POINT(-89.982390 37.963010),http://w3id.org/sawgraph/v1/us-wqp-data#d.wqp....,OST_SHPD-NRS18_MO_10025-894873,1,0.907,ng/g,PFDS: 0.907 ng/g


### Old Queries (passing s2)

In [12]:
#sparqlGET.setQuery(q3)
#s2_result = sparqlGET.query()
#s2 = convertToDataframe(s2_result)
#print(facilities.info())
#s2

In [13]:
#s2list = s2['s2cell'].tolist()
#s2list = [s2.replace("http://stko-kwg.geog.ucsb.edu/lod/resource/","kwgr:") for s2 in s2list]
#s2_values_string = " ".join(s2list)
#print(s2_values_string)
#s2_values_string = convertS2ListToQueryString(s2['s2cell'].tolist())

#q4 = '''
#PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
#PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
#PREFIX owl: <http://www.w3.org/2002/07/owl#>

#PREFIX spatial: <http://purl.org/spatialai/spatial/spatial-full#>
#PREFIX kwg-ont: <http://stko-kwg.geog.ucsb.edu/lod/ontology/>
#PREFIX kwgr: <http://stko-kwg.geog.ucsb.edu/lod/resource/>

#SELECT ?s2cell WHERE {
#	?s2neighbor spatial:connectedTo kwgr:administrativeRegion.USA.'''+admin_region.split()[0]+''' .
#  VALUES ?s2neighbor {''' + s2_values_string + '''}
#  ?s2neighbor kwg-ont:sfTouches | owl:sameAs ?s2cell.
# }'''


#sparqlGET = SPARQLWrapper2("https://frink.apps.renci.org/spatialkg/sparql")
#sparqlGET.setHTTPAuth(DIGEST)
#sparqlGET.setMethod(POST)
#sparqlGET.setReturnFormat(JSON)

#sparqlGET.setQuery(q4)
#s2_filtered_result = sparqlGET.query()
#s2_filtered = convertToDataframe(s2_filtered_result)

#s2_filtered_values_string = convertS2ListToQueryString(s2_filtered['s2cell'].tolist())

#print(len(s2_filtered['s2cell'].tolist()))
#print(s2_filtered_values_string)

In [14]:
# Query the hydrology graph for the downstream S2cells

#q5='''
#PREFIX geo: <http://www.opengis.net/ont/geosparql#>
#PREFIX spatial: <http://purl.org/spatialai/spatial/spatial-full#>
#PREFIX kwg-ont: <http://stko-kwg.geog.ucsb.edu/lod/ontology/>
#PREFIX kwgr: <http://stko-kwg.geog.ucsb.edu/lod/resource/>

#PREFIX hyf: <https://www.opengis.net/def/schema/hy_features/hyf/>
#PREFIX nhdplusv2: <http://nhdplusv2.spatialai.org/v1/nhdplusv2#>

#PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
#PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>

#SELECT DISTINCT ?s2cell WHERE {
# 		?upstream_flowline rdf:type hyf:HY_FlowPath ;
#		          spatial:connectedTo ?s2cellus ;
		# find all flowlines downstream of them
#						  hyf:downstreamFlowPathTC ?downstream_flowline .
#		VALUES ?s2cellus {''' + s2_filtered_values_string + '''}
#      ?s2cell spatial:connectedTo ?downstream_flowline ;
#              rdf:type kwg-ont:S2Cell_Level13 .
#        }
#'''

#sparqlGET2 = SPARQLWrapper2("https://frink.apps.renci.org/hydrologykg/sparql")
#sparqlGET2.setHTTPAuth(DIGEST)
#sparqlGET2.setMethod(POST)
#sparqlGET2.setReturnFormat(JSON)

#sparqlGET2.setQuery(q5)
#hydrology_result = sparqlGET2.query()
#print(hydrology_result.fullResult)
#hydrology = convertToDataframe(hydrology_result)

#s2_filtered_values_string = convertS2ListToQueryString(hydrology['s2cell'].tolist())


#print(len(hydrology['s2cell'].tolist()))
#print(s2_filtered_values_string)

In [15]:
# Query the spatial graph to filter the S2 cells to a specific admin region

#q6='''
#PREFIX geo: <http://www.opengis.net/ont/geosparql#>
#PREFIX spatial: <http://purl.org/spatialai/spatial/spatial-full#>
#PREFIX kwg-ont: <http://stko-kwg.geog.ucsb.edu/lod/ontology/>
#PREFIX kwgr: <http://stko-kwg.geog.ucsb.edu/lod/resource/>

#PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
#PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>

#SELECT DISTINCT ?s2cell WHERE {
# 		?s2cell spatial:connectedTo kwgr:administrativeRegion.USA.''' + admin_region.split()[0] + '''.
#    VALUES ?s2cell {'''+ s2_filtered_values_string + '''}
#        }
#'''

#print(q6)

#sparqlGET = SPARQLWrapper2("https://frink.apps.renci.org/spatialkg/sparql")
#sparqlGET.setHTTPAuth(DIGEST)
#sparqlGET.setMethod(POST)
#sparqlGET.setReturnFormat(JSON)
#sparqlGET.setQuery(q6)
#s2_filtered_result = sparqlGET.query()
#s2_filtered = convertToDataframe(s2_filtered_result)

#s2_filtered_values_string = convertS2ListToQueryString(s2_filtered['s2cell'].tolist())

#print(s2_filtered_values_string)

In [16]:
# Query just the PFAS graph for samples that match the S2 cells returned from the FIO graph

#s2list = s2['s2cell'].tolist()
#s2list = [s2.replace("http://stko-kwg.geog.ucsb.edu/lod/resource/","kwgr:") for s2 in s2list]
#s2_values_string = " ".join(s2list)
#print(s2_values_string)

#q6='''
# Get the number of results and all substances that have been detected at over 4.0 ng/L at samplepoints from a specific set of S2 cells (here cells with plastics manufacturers) together with the sample material type where these substances have been detected in
# This does not require access to the Spatial graph

#PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
#PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
#PREFIX skos: <http://www.w3.org/2004/02/skos/core#>

#PREFIX geo: <http://www.opengis.net/ont/geosparql#>
#PREFIX spatial: <http://purl.org/spatialai/spatial/spatial-full#>
#PREFIX kwg-ont: <http://stko-kwg.geog.ucsb.edu/lod/ontology/>
#PREFIX kwgr: <http://stko-kwg.geog.ucsb.edu/lod/resource/>
#PREFIX coso: <http://w3id.org/coso/v1/contaminoso#>
#PREFIX qudt: <http://qudt.org/schema/qudt/>

#SELECT (COUNT(DISTINCT ?subVal) as ?resultCount) (MAX(?result_value) as ?max) (GROUP_CONCAT(DISTINCT ?substance; separator=";<br> ") as ?substances) (GROUP_CONCAT(DISTINCT ?matTypeLabel; separator="; <br>") as ?materials) ?sp ?spWKT  WHERE {
#      ?sp rdf:type coso:SamplePoint;
#      spatial:connectedTo ?s2cell ;
#  	  geo:hasGeometry/geo:asWKT ?spWKT.
#    VALUES ?s2cell {''' + s2_filtered_values_string + '''}
#  ?observation rdf:type coso:ContaminantObservation;
#    	coso:observedAtSamplePoint ?sp;
#    	coso:ofSubstance ?substance ;
#        coso:analyzedSample ?sample ;
#    	coso:hasResult ?result .
#    ?sample rdfs:label ?sampleLabel;
#  		coso:sampleOfMaterialType ?matType.
#  ?matType rdfs:label ?matTypeLabel.
#   ?result coso:measurementValue ?result_value;
#   		coso:measurementUnit ?unit .
  # FILTER (?result_value > 4).
  #VALUES ?unit {<http://qudt.org/vocab/unit/NanoGM-PER-L>}.
#  ?unit qudt:symbol ?unit_sym.
#    BIND((CONCAT(str(?result_value) , " ", ?unit_sym)) as ?subVal)
#} GROUP BY ?sp ?spWKT
#'''
#print(q6)

#sparqlGET2 = SPARQLWrapper2("https://frink.apps.renci.org/sawgraph/sparql")
#sparqlGET2.setHTTPAuth(DIGEST)
#sparqlGET2.setMethod(POST)
#sparqlGET2.setReturnFormat(JSON)

#sparqlGET2.setQuery(q6)
#samplepoint_result = sparqlGET2.query()
#samplepoints = convertToDataframe(samplepoint_result)
#samplepoints

## Prep data for mapping

In [17]:
if not samplepoints.empty:
    samplepoints['spWKT'] = samplepoints['spWKT'].apply(wkt.loads)
    samplepoints = gpd.GeoDataFrame(samplepoints, geometry='spWKT')
    samplepoints.set_crs(epsg=4326, inplace=True, allow_override=True)
    samplepoints['samplePoint'] = samplepoints['samplePoint'].apply(lambda x:f'<a href="{x}" target="_blank">{x}</a>')
    # clean up unit encoding
    samplepoints.unit = samplepoints.unit.str.replace('Î¼','μ')
    samplepoints.results = samplepoints.results.str.replace('Î¼','μ')
else:
    print("No sample points found for the selected criteria. Skipping geospatial processing for sample points.")

In [18]:
facilities['facWKT'] = facilities['facWKT'].apply(wkt.loads)
facilities = gpd.GeoDataFrame(facilities, geometry='facWKT')
facilities['facility'] = facilities['facility'].apply(
    lambda x: f'<a href="https://frs-public.epa.gov/ords/frs_public2/fii_query_detail.disp_program_facility?p_registry_id={x.split(".")[-1]}" target="_blank">{x}</a>'
)

facilities.set_crs(epsg=4326, inplace=True, allow_override=True)

Unnamed: 0,facility,facWKT,facilityName,industryCode,industryName
0,"<a href=""https://frs-public.epa.gov/ords/frs_p...",POINT (-86.2997 39.748),HERITAGE ENVIRONMENTAL SERVICES LLC,http://w3id.org/fio/v1/naics#NAICS-56221,Waste Treatment and Disposal
1,"<a href=""https://frs-public.epa.gov/ords/frs_p...",POINT (-86.2997 39.748),HERITAGE ENVIRONMENTAL SERVICES LLC,http://w3id.org/fio/v1/naics#NAICS-562211,Hazardous Waste Treatment and Disposal
2,"<a href=""https://frs-public.epa.gov/ords/frs_p...",POINT (-87.46157 41.63344),"TRADEBE TREATMENT & RECYCLINGOF TN, LLC",http://w3id.org/fio/v1/naics#NAICS-56221,Waste Treatment and Disposal
3,"<a href=""https://frs-public.epa.gov/ords/frs_p...",POINT (-87.46157 41.63344),"TRADEBE TREATMENT & RECYCLINGOF TN, LLC",http://w3id.org/fio/v1/naics#NAICS-562211,Hazardous Waste Treatment and Disposal
4,"<a href=""https://frs-public.epa.gov/ords/frs_p...",POINT (-87.46157 41.63344),"TRADEBE TREATMENT & RECYCLINGOF TN, LLC",http://w3id.org/fio/v1/naics#NAICS-562219,Other Nonhazardous Waste Treatment and Disposal
...,...,...,...,...,...
246,"<a href=""https://frs-public.epa.gov/ords/frs_p...",POINT (-85.0911 40.3759),JAY COUNTY LANDFILL LLC,http://w3id.org/fio/v1/naics#NAICS-562212,Solid Waste Landfill
247,"<a href=""https://frs-public.epa.gov/ords/frs_p...",POINT (-86.3404 40.7239),OAK RIDGE RECYCLING & DISPOSAL FACILITY,http://w3id.org/fio/v1/naics#NAICS-56221,Waste Treatment and Disposal
248,"<a href=""https://frs-public.epa.gov/ords/frs_p...",POINT (-86.3404 40.7239),OAK RIDGE RECYCLING & DISPOSAL FACILITY,http://w3id.org/fio/v1/naics#NAICS-562212,Solid Waste Landfill
249,"<a href=""https://frs-public.epa.gov/ords/frs_p...",POINT (-86.16114 41.49366),PRAIRIE VIEW HIGH BTU LLC,http://w3id.org/fio/v1/naics#NAICS-56221,Waste Treatment and Disposal


In [19]:
#facilities['facility'] = facilities['facility'].apply(lambda x:f'<a href="{x}">{x}</a>')

In [20]:
streams['dsflWKT'] = streams['dsflWKT'].apply(wkt.loads)
streams = gpd.GeoDataFrame(streams, geometry='dsflWKT')
streams['downstream_flowline'] = streams['downstream_flowline'].apply(lambda x:f'<a href="{x}" target="_blank">{x}</a>')
streams.set_crs(epsg=4326, inplace=True, allow_override=True)
streams

Unnamed: 0,downstream_flowline,dsflWKT,fl_type,streamName
0,"<a href=""https://geoconnex.us/nhdplusv2/comid/...","LINESTRING (-86.80225 38.01327, -86.80231 38.0...",StreamRiver,Anderson River
1,"<a href=""https://geoconnex.us/nhdplusv2/comid/...","LINESTRING (-86.81233 38.00007, -86.81354 37.9...",StreamRiver,Anderson River
2,"<a href=""https://geoconnex.us/nhdplusv2/comid/...","LINESTRING (-86.84745 37.99295, -86.8487 37.99...",StreamRiver,
3,"<a href=""https://geoconnex.us/nhdplusv2/comid/...","LINESTRING (-86.87149 37.97986, -86.87233 37.9...",StreamRiver,Big Slough
4,"<a href=""https://geoconnex.us/nhdplusv2/comid/...","LINESTRING (-86.90179 37.95296, -86.90341 37.9...",StreamRiver,Big Slough
...,...,...,...,...
3042,"<a href=""https://geoconnex.us/nhdplusv2/comid/...","LINESTRING (-88.00003 38.10789, -88.002 38.107...",ArtificialPath,Wabash River Old Channel
3043,"<a href=""https://geoconnex.us/nhdplusv2/comid/...","LINESTRING (-90.53698 39.92099, -90.53701 39.9...",ArtificialPath,Illinois River
3044,"<a href=""https://geoconnex.us/nhdplusv2/comid/...","LINESTRING (-90.57375 39.89214, -90.57677 39.8...",ArtificialPath,Illinois River
3045,"<a href=""https://geoconnex.us/nhdplusv2/comid/...","LINESTRING (-90.45461 40.01053, -90.45525 40.0...",ArtificialPath,Illinois River


In [21]:
from branca.element import Figure

map = samplepoints.explore(name=f'<span style="color:DarkOrange;">Samples</span>', color='DarkOrange',
                           style_kwds=dict(style_function=lambda x: {"radius": 5 if x['properties']["Max"] in ["non-detect", "http://w3id.org/coso/v1/contaminoso#non-detect"]  else (5 if float(x['properties']["Max"])< 40 else (float(x['properties']["Max"])/8 if float(x['properties']["Max"]) < 160 else 25)), #scale points between 5 and 25
                                                                     "opacity":0.3,
                                                                     "color": 'Black' if x['properties']["Max"] in ["non-detect", "http://w3id.org/coso/v1/contaminoso#non-detect"]  else 'DimGray',
                                                                     }),
                           marker_kwds=dict(radius=6),
                           marker_type='circle_marker',
                           popup = True, #["substances", "materials", "Max", "resultCount"],
                           popup_kwds={'max_height':500}
                                ) #
#map

In [22]:
streams.explore(m=map,
                name=f'<span style="color:LightSkyBlue;">Streams</span>',
                color='LightSkyBlue',
                popup=['streamName', 'fl_type', 'downstream_flowline'],
                popup_kwds={'max_width':350}
                )
print(streams.streamName.unique())

['Anderson River' nan 'Big Slough' 'Ohio River' 'Bluegrass Creek'
 'Locust Creek' 'Pigeon Creek' 'Big Clifty Creek' 'Turkey Run'
 'Muddy Fork' 'Silver Creek' 'Fall Run' 'Fall Branch' 'Coal Creek'
 'Sugar Creek' 'Turman Creek' 'Big Bayou' 'Wabash River Old Channel'
 'Old Channel Wabash River' 'Big Creek' 'Olive Creek' 'Coffee Bayou'
 'Lower Sandy Slough' 'Yellow River' 'Kankakee River' 'Tower Creek'
 'Rayns Creek' 'Iroquois River' 'Hickory Branch' 'Illinois River'
 'Quiver Creek' 'White Oak Creek' 'Sangamon River' 'Patoka River'
 'Hall Creek' 'South Fork Patoka River' 'North Fork Salt Creek'
 'Salt Creek' 'Boggs Creek' 'Leatherwood Creek' 'East Fork White River'
 'Little Sand Creek' 'Flatrock River' 'Pink Creek' 'Big Blue River'
 'Sixmile Creek' 'Driftwood River' 'White River' 'Fish Creek'
 'South Fork Prairie Creek' 'Prairie Creek' 'Hawkins Creek' 'Payne Branch'
 'Crooked Creek' 'Little Eagle Creek' 'Pleasant Run'
 'West Fork White Lick Creek' 'White Lick Creek' 'Eagle Creek'
 'Dry Bra

In [23]:
#map = folium.Map()
c = 0
colors = ['Purple', 'PaleVioletRed', 'Orchid', 'Fuchsia', 'MediumVioletRed', 'HotPink', 'LightPink', 'red', 'lightred', 'pink', 'orange',
          'MidnightBlue', 'MediumBlue', 'SlateBlue', 'MediumSlateBlue', 'DodgerBlue', 'DeepSkyBlue', 'SkyBlue', 'CadetBlue', 'DarkCyan', 'LightSeaGreen',
          'MediumSageGreen', 'lightblue', 'gray', 'blue', 'darkred', 'lightgreen', 'green', 'darkblue', 'darkpurple', 'cadetblue', 'orange', 'lightgray', 'darkgreen']
for industry in list(facilities.industryName.unique()):
  #print(industry)
  #print(facilities[facilities['industryName']== industry].info())
  facilities[facilities['industryName']== industry].explore(m=map,
                                                            name=f'<span style="color:{colors[c]};">{industry}</span>',
                                                            color=colors[c],
                                                            marker_kwds=dict(radius=3),
                                                            popup=['facility', 'facilityName', 'industryName'])
  c += 1

# Map

In [24]:
# @title
samplepoints.set_crs(epsg=4326, inplace=True, allow_override=True)
facilities.set_crs(epsg=4326, inplace=True, allow_override=True)

folium.LayerControl(collapsed=False).add_to(map)
fig = Figure(width='100%', height=900)
fig.add_child(map)


In [25]:
fig.save(f'SAWGraph-demo_Tracing_Downstream.html')