<a href="https://colab.research.google.com/github/SAWGraph/public/blob/main/SAWGraph_Y3_Demo_TracingDownstream.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

This notebook generates maps for the UI mockup answering the following 3 questions:
* Q1: What samples in Maine are downstream of certain types of facilities (by NAICS Industry or Group Code)?

# Setup


In [None]:
%%capture
!pip install mapclassify --upgrade --quiet
!pip install SPARQLWrapper --upgrade --quiet
!pip install rdflib

In [None]:
#from branca.element import Figure                                  # For controlling the size of the final map
import folium                                                      # For map layer control
import geopandas as gpd                                            # For geospatial dataframes
import pandas as pd                                                # For dataframes
from shapely import wkt                                            # For working with WKT coordinates in a GeoDataFrame
from SPARQLWrapper import SPARQLWrapper2, JSON, GET, POST, DIGEST   # For querying SPARQL endpoints
import rdflib                                                      # For working with URIs

def convertToDataframe(results):
  d = []
  for x in results.bindings:
        row = {}
        for k in x:
            v = x[k]
            vv = rdflib.term.Literal(v.value, datatype=v.datatype).toPython()  # type: ignore[no-untyped-call]
            row[k] = vv
        d.append(row)
  df = pd.DataFrame(d)
  return df

def convertS2ListToQueryString(s2list):
  s2list_short = [s2cell.replace("http://stko-kwg.geog.ucsb.edu/lod/resource/","kwgr:") for s2cell in s2list]
  s2_values_string = " ".join(s2list_short)
  return s2_values_string


In [None]:
#for interactive widgets
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets
from IPython.display import display

# Q1 - What samples are downstream of certain types of facilities (by NAICS Industry or Group Code)?


In [None]:
industry = "5622 (Waste Treatment and Disposal)" # @param ["5622 (Waste Treatment and Disposal)","3222 (Converted Paper Manufacturing)", "221310 (Water Supply and Irrigation)", "221320 (Sewage Treatment)","3261 (Plastics Product Manufacturing)","3133 (Textile and Fabric Finishing and Coating)","3251 (Basic Chemical Manufacturing)","3255 (Paint, Coating, and Adhesive Manufacturing)", "3364 (Aerospace Product and Parts)","812320 (Drycleaning and Laundry Services)", "561740 (Carpet and Upholstery Cleaning Services)"]{"allow-input":true}

icode = industry.split()[0]
print(icode)


5622


In [None]:
admin_region = "18 (Indiana)" # @param ["23 (Maine)","23019 (Penboscot County, Maine)", "24 (New Hampshire)","18 (Indiana)", "17 (Illinois)"] {"allow-input":true}

## Queries

In [None]:
%%time
q1='''
PREFIX dcterms: <http://purl.org/dc/terms/>
PREFIX qudt: <http://qudt.org/schema/qudt/>
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
PREFIX geo: <http://www.opengis.net/ont/geosparql#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX naics: <http://w3id.org/fio/v1/naics#>
PREFIX spatial: <http://purl.org/spatialai/spatial/spatial-full#>
PREFIX kwgr: <http://stko-kwg.geog.ucsb.edu/lod/resource/>
PREFIX kwg-ont: <http://stko-kwg.geog.ucsb.edu/lod/ontology/>
PREFIX coso: <http://w3id.org/coso/v1/contaminoso#>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX fio: <http://w3id.org/fio/v1/fio#>

PREFIX owl: <http://www.w3.org/2002/07/owl#>
select DISTINCT ?samplePoint ?spWKT ?sample (GROUP_CONCAT(DISTINCT ?sampleId; separator="; ") as ?samples) (COUNT(DISTINCT ?subVal) as ?resultCount) (MAX(?result) as ?Max) ?unit (GROUP_CONCAT(DISTINCT ?subVal; separator=" <br> ") as ?results)
where {
        SERVICE <repository:FIO>{
        #find facilities
        ?s2neighbor kwg-ont:sfContains ?facility.
        ?facility fio:ofIndustry ?industryGroup;
            fio:ofIndustry ?industryCode.
        		#geo:hasGeometry/geo:asWKT ?facWKT;
        		#rdfs:label ?facilityName.
        ?industryCode a naics:NAICS-IndustryCode;  # only NAICS specific industry codes
            fio:subcodeOf ?industryGroup ;
            rdfs:label ?industryName.
        '''
if len(icode)>4:
  q1 += 'VALUES ?industryCode {naics:NAICS-' + str(icode) + '}.'
else:
  q1 += 'VALUES ?industryGroup {naics:NAICS-' + str(icode) + '}.'

q1 += '''
    }
            SERVICE <repository:Spatial>{
        #determine near distance
        ?s2 kwg-ont:sfTouches|owl:sameAs ?s2neighbor.
        ?s2neighbor rdf:type kwg-ont:S2Cell_Level13.
    }
    #find samples
    ?samplePoint kwg-ont:sfWithin ?s2;
    	rdf:type coso:SamplePoint;
    	geo:hasGeometry/geo:asWKT ?spWKT.
    ?s2 rdf:type kwg-ont:S2Cell_Level13.
    ?sample coso:fromSamplePoint ?samplePoint;
    	dcterms:identifier ?sampleId;
    	coso:sampleOfMaterialType/rdfs:label ?type.
    ?observation rdf:type coso:ContaminantObservation;
    	coso:observedAtSamplePoint ?samplePoint;
    	coso:ofSubstance/ skos:altLabel ?substance;
    	coso:hasResult/coso:measurementValue ?result;
    	coso:hasResult/coso:measurementUnit/qudt:symbol ?unit.
    BIND((CONCAT(?substance, ": ", str(?result) , " ", ?unit) ) as ?subVal)

    } GROUP BY ?samplePoint ?spWKT ?sample ?unit
'''

#WRetrieve facility details
q2 = '''
PREFIX geo: <http://www.opengis.net/ont/geosparql#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX naics: <http://w3id.org/fio/v1/naics#>
PREFIX spatial: <http://purl.org/spatialai/spatial/spatial-full#>
PREFIX kwgr: <http://stko-kwg.geog.ucsb.edu/lod/resource/>
PREFIX kwg-ont: <http://stko-kwg.geog.ucsb.edu/lod/ontology/>
PREFIX coso: <http://w3id.org/coso/v1/contaminoso#>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX fio: <http://w3id.org/fio/v1/fio#>

#Where are landfills or dod facilities

select DISTINCT ?facility ?facWKT ?facilityName ?industry ?industryName where {
    #SERVICE <repository:Spatial>{
    #    #determine near distance
    #    ?s2 kwg-ont:sfTouches|owl:sameAs ?s2neighbor.
    #    ?s2neighbor rdf:type kwg-ont:S2Cell_Level13.
    #}
    #SERVICE <repository:FIO>{
        #find facilities
        #?s2neighbor kwg-ont:sfContains ?facility.
            ?facility fio:ofIndustry ?industryGroup;
            fio:ofIndustry ?industryCode ;
        		geo:hasGeometry/geo:asWKT ?facWKT;
        		rdfs:label ?facilityName.
        ?industryCode a naics:NAICS-IndustryCode;  # only NAICS specific industry codes
            fio:subcodeOf ?industryGroup ;
            rdfs:label ?industryName.
        '''
if len(icode)>4:
  q2 += 'VALUES ?industryCode {naics:NAICS-' + str(icode) + '}.'
else:
  q2 += 'VALUES ?industryGroup {naics:NAICS-' + str(icode) + '}.'

q2 += '''
    #}

}
'''

#WRetrieve S2 cells
q3 = '''
PREFIX geo: <http://www.opengis.net/ont/geosparql#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX naics: <http://w3id.org/fio/v1/naics#>
PREFIX spatial: <http://purl.org/spatialai/spatial/spatial-full#>
PREFIX kwgr: <http://stko-kwg.geog.ucsb.edu/lod/resource/>
PREFIX kwg-ont: <http://stko-kwg.geog.ucsb.edu/lod/ontology/>
PREFIX coso: <http://w3id.org/coso/v1/contaminoso#>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX fio: <http://w3id.org/fio/v1/fio#>

#Where are landfills or dod facilities

select DISTINCT ?s2cell where {
        #find facilities
        ?s2cell rdf:type kwg-ont:S2Cell_Level13 ;
                kwg-ont:sfContains ?facility.
            ?facility fio:ofIndustry ?industryGroup;
            fio:ofIndustry ?industryCode ;
        		geo:hasGeometry/geo:asWKT ?facWKT;
        		rdfs:label ?facilityName.
        ?industryCode a naics:NAICS-IndustryCode;  # only NAICS specific industry codes
            fio:subcodeOf ?industryGroup ;
            rdfs:label ?industryName.
        '''
if len(icode)>4:
  q3 += 'VALUES ?industryCode {naics:NAICS-' + str(icode) + '}.'
else:
  q3 += 'VALUES ?industryGroup {naics:NAICS-' + str(icode) + '}.'

q3 += '''
} GROUP BY ?s2cell
'''

#print(q1)
print(q2)
print(q3)


PREFIX geo: <http://www.opengis.net/ont/geosparql#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX naics: <http://w3id.org/fio/v1/naics#>
PREFIX spatial: <http://purl.org/spatialai/spatial/spatial-full#>
PREFIX kwgr: <http://stko-kwg.geog.ucsb.edu/lod/resource/>
PREFIX kwg-ont: <http://stko-kwg.geog.ucsb.edu/lod/ontology/>
PREFIX coso: <http://w3id.org/coso/v1/contaminoso#>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX fio: <http://w3id.org/fio/v1/fio#>

#Where are landfills or dod facilities

select DISTINCT ?facility ?facWKT ?facilityName ?industry ?industryName where {
    #SERVICE <repository:Spatial>{
    #    #determine near distance
    #    ?s2 kwg-ont:sfTouches|owl:sameAs ?s2neighbor.
    #    ?s2neighbor rdf:type kwg-ont:S2Cell_Level13.
    #}
    #SERVICE <repository:FIO>{
        #find facilities
        #?s2neighbor kwg-ont:sfContains ?facility.
            ?facility fio:ofIndustry ?industryGroup;
            fio:ofIndustry ?industryCode ;


In [None]:
endpointGET = 'https://frink.apps.renci.org/federation/sparql' # @param ['https://frink.apps.renci.org/fiokg/sparql','https://frink.apps.renci.org/qlever-geo/sparql', 'https://frink.apps.renci.org/federation/sparql']


sparqlGET = SPARQLWrapper2(endpointGET)
sparqlGET.setHTTPAuth(DIGEST)
#sparqlGET.setCredentials('sawgraph-endpoint', 'skailab')
sparqlGET.setMethod(POST)
sparqlGET.setReturnFormat(JSON)

sparqlGET.setQuery(q2)
facility_result = sparqlGET.query()
facilities = convertToDataframe(facility_result)
#print(facilities.info())
facilities

Unnamed: 0,facility,facWKT,facilityName,industryName
0,http://w3id.org/fio/v1/epa-frs-data#d.FRS-Faci...,POINT(-71.400660 42.271140),GENERAL CHEMICAL,Hazardous Waste Treatment and Disposal
1,http://w3id.org/fio/v1/epa-frs-data#d.FRS-Faci...,POINT(-71.400660 42.271140),GENERAL CHEMICAL,Waste Treatment and Disposal
2,http://w3id.org/fio/v1/epa-frs-data#d.FRS-Faci...,POINT(-70.787500 41.802300),SEMASS RESOURCE RECOVERY FACILITY,Solid Waste Combustors and Incinerators
3,http://w3id.org/fio/v1/epa-frs-data#d.FRS-Faci...,POINT(-70.787500 41.802300),SEMASS RESOURCE RECOVERY FACILITY,Waste Treatment and Disposal
4,http://w3id.org/fio/v1/epa-frs-data#d.FRS-Faci...,POINT(-71.413515 41.765944),SAFETY-KLEEN SYSTEMS,Hazardous Waste Treatment and Disposal
...,...,...,...,...
13696,http://w3id.org/fio/v1/epa-frs-data#d.FRS-Faci...,POINT(-106.661917 39.722444),EAGLE COUNTY LANDFILL,Waste Treatment and Disposal
13697,http://w3id.org/fio/v1/epa-frs-data#d.FRS-Faci...,POINT(-77.589306 39.967583),"OAK HILL, CENTRAL & SOUTH SCOTLAND SEWER INTER...",Other Nonhazardous Waste Treatment and Disposal
13698,http://w3id.org/fio/v1/epa-frs-data#d.FRS-Faci...,POINT(-77.589306 39.967583),"OAK HILL, CENTRAL & SOUTH SCOTLAND SEWER INTER...",Waste Treatment and Disposal
13699,http://w3id.org/fio/v1/epa-frs-data#d.FRS-Faci...,POINT(-74.318410 40.151340),JACKSON TWP,Other Nonhazardous Waste Treatment and Disposal


In [None]:
sparqlGET.setQuery(q3)
s2_result = sparqlGET.query()
s2 = convertToDataframe(s2_result)
#print(facilities.info())
s2

Unnamed: 0,s2cell
0,http://stko-kwg.geog.ucsb.edu/lod/resource/s2....
1,http://stko-kwg.geog.ucsb.edu/lod/resource/s2....
2,http://stko-kwg.geog.ucsb.edu/lod/resource/s2....
3,http://stko-kwg.geog.ucsb.edu/lod/resource/s2....
4,http://stko-kwg.geog.ucsb.edu/lod/resource/s2....
...,...
5266,http://stko-kwg.geog.ucsb.edu/lod/resource/s2....
5267,http://stko-kwg.geog.ucsb.edu/lod/resource/s2....
5268,http://stko-kwg.geog.ucsb.edu/lod/resource/s2....
5269,http://stko-kwg.geog.ucsb.edu/lod/resource/s2....


In [None]:
#s2list = s2['s2cell'].tolist()
#s2list = [s2.replace("http://stko-kwg.geog.ucsb.edu/lod/resource/","kwgr:") for s2 in s2list]
#s2_values_string = " ".join(s2list)
#print(s2_values_string)
s2_values_string = convertS2ListToQueryString(s2['s2cell'].tolist())

q4 = '''
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX owl: <http://www.w3.org/2002/07/owl#>

PREFIX spatial: <http://purl.org/spatialai/spatial/spatial-full#>
PREFIX kwg-ont: <http://stko-kwg.geog.ucsb.edu/lod/ontology/>
PREFIX kwgr: <http://stko-kwg.geog.ucsb.edu/lod/resource/>

SELECT ?s2cell WHERE {
	?s2neighbor spatial:connectedTo kwgr:administrativeRegion.USA.'''+admin_region.split()[0]+''' .
  VALUES ?s2neighbor {''' + s2_values_string + '''}
  ?s2neighbor kwg-ont:sfTouches | owl:sameAs ?s2cell.
 }'''


#sparqlGET = SPARQLWrapper2("https://frink.apps.renci.org/spatialkg/sparql")
#sparqlGET.setHTTPAuth(DIGEST)
#sparqlGET.setMethod(POST)
#sparqlGET.setReturnFormat(JSON)

sparqlGET.setQuery(q4)
s2_filtered_result = sparqlGET.query()
s2_filtered = convertToDataframe(s2_filtered_result)

s2_filtered_values_string = convertS2ListToQueryString(s2_filtered['s2cell'].tolist())

print(len(s2_filtered['s2cell'].tolist()))
print(s2_filtered_values_string)

963
kwgr:s2.level13.9804678354081677312 kwgr:s2.level13.9804678388441415680 kwgr:s2.level13.9804678457160892416 kwgr:s2.level13.9804678491520630784 kwgr:s2.level13.9804678594599845888 kwgr:s2.level13.9804678628959584256 kwgr:s2.level13.9804678800758276096 kwgr:s2.level13.9804678835118014464 kwgr:s2.level13.9804781192778612736 kwgr:s2.level13.9804781227138351104 kwgr:s2.level13.9804781398937042944 kwgr:s2.level13.9804781433296781312 kwgr:s2.level13.9804781536375996416 kwgr:s2.level13.9804781570735734784 kwgr:s2.level13.9804781639455211520 kwgr:s2.level13.9804781673814949888 kwgr:s2.level13.9804628704259735552 kwgr:s2.level13.9804628738619473920 kwgr:s2.level13.9804628841698689024 kwgr:s2.level13.9804801774261895168 kwgr:s2.level13.9804801808621633536 kwgr:s2.level13.9804801842981371904 kwgr:s2.level13.9804801911700848640 kwgr:s2.level13.9804801946060587008 kwgr:s2.level13.9804828506138345472 kwgr:s2.level13.9804828540498083840 kwgr:s2.level13.9804828574857822208 kwgr:s2.level13.98048286

In [None]:
# Query the hydrology graph for the downstream S2cells

q5='''
PREFIX geo: <http://www.opengis.net/ont/geosparql#>
PREFIX spatial: <http://purl.org/spatialai/spatial/spatial-full#>
PREFIX kwg-ont: <http://stko-kwg.geog.ucsb.edu/lod/ontology/>
PREFIX kwgr: <http://stko-kwg.geog.ucsb.edu/lod/resource/>

PREFIX hyf: <https://www.opengis.net/def/schema/hy_features/hyf/>
PREFIX nhdplusv2: <http://nhdplusv2.spatialai.org/v1/nhdplusv2#>

PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>

SELECT DISTINCT ?s2cell WHERE {
 		?upstream_flowline rdf:type hyf:HY_FlowPath ;
		          spatial:connectedTo ?s2cellus ;
		# find all flowlines downstream of them
						  hyf:downstreamFlowPathTC ?downstream_flowline .
		VALUES ?s2cellus {''' + s2_filtered_values_string + '''}
      ?s2cell spatial:connectedTo ?downstream_flowline ;
              rdf:type kwg-ont:S2Cell_Level13 .
        }
'''

sparqlGET2 = SPARQLWrapper2("https://frink.apps.renci.org/hydrologykg/sparql")
sparqlGET2.setHTTPAuth(DIGEST)
sparqlGET2.setMethod(POST)
sparqlGET2.setReturnFormat(JSON)

sparqlGET2.setQuery(q5)
hydrology_result = sparqlGET2.query()
#print(hydrology_result.fullResult)
hydrology = convertToDataframe(hydrology_result)

s2_filtered_values_string = convertS2ListToQueryString(hydrology['s2cell'].tolist())


print(len(hydrology['s2cell'].tolist()))
print(s2_filtered_values_string)

8532
kwgr:s2.level13.9804677529447956480 kwgr:s2.level13.9804677598167433216 kwgr:s2.level13.9804677632527171584 kwgr:s2.level13.9804678216642723840 kwgr:s2.level13.9804678319721938944 kwgr:s2.level13.9804678354081677312 kwgr:s2.level13.9804679075636183040 kwgr:s2.level13.9804678628959584256 kwgr:s2.level13.9804678732038799360 kwgr:s2.level13.9804678766398537728 kwgr:s2.level13.9804681480817868800 kwgr:s2.level13.9804681377738653696 kwgr:s2.level13.9804681343378915328 kwgr:s2.level13.9804680518745194496 kwgr:s2.level13.9804680553104932864 kwgr:s2.level13.9804681274659438592 kwgr:s2.level13.9804681309019176960 kwgr:s2.level13.9804680278227025920 kwgr:s2.level13.9804680415665979392 kwgr:s2.level13.9804680450025717760 kwgr:s2.level13.9804687493772083200 kwgr:s2.level13.9804687528131821568 kwgr:s2.level13.9804688868161617920 kwgr:s2.level13.9804688902521356288 kwgr:s2.level13.9804688936881094656 kwgr:s2.level13.9804689246118739968 kwgr:s2.level13.9804689280478478336 kwgr:s2.level13.9804689

In [None]:
# Query the spatial graph to filter the S2 cells to a specific admin region

q6='''
PREFIX geo: <http://www.opengis.net/ont/geosparql#>
PREFIX spatial: <http://purl.org/spatialai/spatial/spatial-full#>
PREFIX kwg-ont: <http://stko-kwg.geog.ucsb.edu/lod/ontology/>
PREFIX kwgr: <http://stko-kwg.geog.ucsb.edu/lod/resource/>

PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>

SELECT DISTINCT ?s2cell WHERE {
 		?s2cell spatial:connectedTo kwgr:administrativeRegion.USA.''' + admin_region.split()[0] + '''.
    VALUES ?s2cell {'''+ s2_filtered_values_string + '''}
        }
'''

print(q6)

sparqlGET = SPARQLWrapper2("https://frink.apps.renci.org/spatialkg/sparql")
sparqlGET.setHTTPAuth(DIGEST)
sparqlGET.setMethod(POST)
sparqlGET.setReturnFormat(JSON)
sparqlGET.setQuery(q6)
s2_filtered_result = sparqlGET.query()
s2_filtered = convertToDataframe(s2_filtered_result)

s2_filtered_values_string = convertS2ListToQueryString(s2_filtered['s2cell'].tolist())

print(s2_filtered_values_string)


PREFIX geo: <http://www.opengis.net/ont/geosparql#>
PREFIX spatial: <http://purl.org/spatialai/spatial/spatial-full#>
PREFIX kwg-ont: <http://stko-kwg.geog.ucsb.edu/lod/ontology/>
PREFIX kwgr: <http://stko-kwg.geog.ucsb.edu/lod/resource/>

PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>

SELECT DISTINCT ?s2cell WHERE {
 		?s2cell spatial:connectedTo kwgr:administrativeRegion.USA.18.
    VALUES ?s2cell {kwgr:s2.level13.9804677529447956480 kwgr:s2.level13.9804677598167433216 kwgr:s2.level13.9804677632527171584 kwgr:s2.level13.9804678216642723840 kwgr:s2.level13.9804678319721938944 kwgr:s2.level13.9804678354081677312 kwgr:s2.level13.9804679075636183040 kwgr:s2.level13.9804678628959584256 kwgr:s2.level13.9804678732038799360 kwgr:s2.level13.9804678766398537728 kwgr:s2.level13.9804681480817868800 kwgr:s2.level13.9804681377738653696 kwgr:s2.level13.9804681343378915328 kwgr:s2.level13.9804680518745194496 kwgr:s2.level13.9804680553

In [None]:
# Query just the PFAS graph for samples that match the S2 cells returned from the FIO graph

#s2list = s2['s2cell'].tolist()
#s2list = [s2.replace("http://stko-kwg.geog.ucsb.edu/lod/resource/","kwgr:") for s2 in s2list]
#s2_values_string = " ".join(s2list)
#print(s2_values_string)

q6='''
# Get the number of results and all substances that have been detected at over 4.0 ng/L at samplepoints from a specific set of S2 cells (here cells with plastics manufacturers) together with the sample material type where these substances have been detected in
# This does not require access to the Spatial graph

PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>

PREFIX geo: <http://www.opengis.net/ont/geosparql#>
PREFIX spatial: <http://purl.org/spatialai/spatial/spatial-full#>
PREFIX kwg-ont: <http://stko-kwg.geog.ucsb.edu/lod/ontology/>
PREFIX kwgr: <http://stko-kwg.geog.ucsb.edu/lod/resource/>
PREFIX coso: <http://w3id.org/coso/v1/contaminoso#>
PREFIX qudt: <http://qudt.org/schema/qudt/>

SELECT (COUNT(DISTINCT ?subVal) as ?resultCount) (MAX(?result_value) as ?max) (GROUP_CONCAT(DISTINCT ?substance; separator=";<br> ") as ?substances) (GROUP_CONCAT(DISTINCT ?matTypeLabel; separator="; <br>") as ?materials) ?sp ?spWKT  WHERE {
      ?sp rdf:type coso:SamplePoint;
      spatial:connectedTo ?s2cell ;
  	  geo:hasGeometry/geo:asWKT ?spWKT.
    VALUES ?s2cell {''' + s2_filtered_values_string + '''}
  ?observation rdf:type coso:ContaminantObservation;
    	coso:observedAtSamplePoint ?sp;
    	coso:ofSubstance ?substance ;
        coso:analyzedSample ?sample ;
    	coso:hasResult ?result .
    ?sample rdfs:label ?sampleLabel;
  		coso:sampleOfMaterialType ?matType.
  ?matType rdfs:label ?matTypeLabel.
   ?result coso:measurementValue ?result_value;
   		coso:measurementUnit ?unit .
  # FILTER (?result_value > 4).
  #VALUES ?unit {<http://qudt.org/vocab/unit/NanoGM-PER-L>}.
  ?unit qudt:symbol ?unit_sym.
    BIND((CONCAT(str(?result_value) , " ", ?unit_sym)) as ?subVal)
} GROUP BY ?sp ?spWKT
'''
print(q6)

sparqlGET2 = SPARQLWrapper2("https://frink.apps.renci.org/sawgraph/sparql")
sparqlGET2.setHTTPAuth(DIGEST)
sparqlGET2.setMethod(POST)
sparqlGET2.setReturnFormat(JSON)

sparqlGET2.setQuery(q6)
samplepoint_result = sparqlGET2.query()
samplepoints = convertToDataframe(samplepoint_result)
samplepoints


# Get the number of results and all substances that have been detected at over 4.0 ng/L at samplepoints from a specific set of S2 cells (here cells with plastics manufacturers) together with the sample material type where these substances have been detected in
# This does not require access to the Spatial graph

PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>

PREFIX geo: <http://www.opengis.net/ont/geosparql#>
PREFIX spatial: <http://purl.org/spatialai/spatial/spatial-full#>
PREFIX kwg-ont: <http://stko-kwg.geog.ucsb.edu/lod/ontology/>
PREFIX kwgr: <http://stko-kwg.geog.ucsb.edu/lod/resource/>
PREFIX coso: <http://w3id.org/coso/v1/contaminoso#>
PREFIX qudt: <http://qudt.org/schema/qudt/>

SELECT (COUNT(DISTINCT ?subVal) as ?resultCount) (MAX(?result_value) as ?max) (GROUP_CONCAT(DISTINCT ?substance; separator=";<br> ") as ?substances) (GROUP_CONCAT(DISTINCT ?matTypeLabel

Unnamed: 0,resultCount,max,substances,materials,sp,spWKT
0,41,16.00,http://w3id.org/sawgraph/v1/us-wqp-data#charac...,Micropterus salmoides; <br>Catostomus commerso...,https://geoconnex.us/iow/wqp/INSTOR_WQX-8144,POINT (-87.5 41.6188888888)
1,11,0.81,http://w3id.org/sawgraph/v1/us-wqp-data#charac...,Lepomis macrochirus; <br>Micropterus salmoides...,https://geoconnex.us/iow/wqp/INSTOR_WQX-7834,POINT (-87.0708333333 39.7225)
2,2,15.00,http://w3id.org/sawgraph/v1/us-wqp-data#charac...,Micropterus salmoides; <br>Pomoxis annularis; ...,https://geoconnex.us/iow/wqp/INSTOR_WQX-2219,POINT (-85.6608333333 40.5844444444)
3,25,9.67,http://w3id.org/sawgraph/v1/us-wqp-data#charac...,Ictiobus bubalus; <br>Micropterus punctulatus;...,https://geoconnex.us/iow/wqp/INSTOR_WQX-2350,POINT (-87.9422222222 38.13125)
4,14,8.00,http://w3id.org/sawgraph/v1/us-wqp-data#charac...,Ictiobus bubalus; <br>Aplodinotus grunniens; <...,https://geoconnex.us/iow/wqp/INSTOR_WQX-18905,POINT (-86.4416 39.458238)
...,...,...,...,...,...,...
113,2,8.87,http://w3id.org/sawgraph/v1/us-wqp-data#charac...,Pomoxis annularis; <br>Sander vitreus; <br>Tissue,https://geoconnex.us/iow/wqp/INSTOR_WQX-2272,POINT (-85.5430555555 40.8797222222)
114,8,15.00,http://w3id.org/sawgraph/v1/us-wqp-data#charac...,Lepomis megalotis; <br>Tissue,https://geoconnex.us/iow/wqp/INSTOR_WQX-12896,POINT (-86.5554722222 39.2434444444)
115,16,6.90,http://w3id.org/sawgraph/v1/us-wqp-data#charac...,Lepomis megalotis; <br>Micropterus dolomieu; <...,https://geoconnex.us/iow/wqp/INSTOR_WQX-2371,POINT (-86.2506669632 39.7782506594)
116,22,11.00,http://w3id.org/sawgraph/v1/us-wqp-data#charac...,Lepomis macrochirus; <br>Micropterus dolomieu;...,https://geoconnex.us/iow/wqp/INSTOR_WQX-2573,POINT (-85.8652777777 39.4963888888)


## Prep data for mapping

In [None]:
samplepoints['spWKT'] = samplepoints['spWKT'].apply(wkt.loads)
samplepoints = gpd.GeoDataFrame(samplepoints, geometry='spWKT')

samplepoints.set_crs(epsg=4326, inplace=True, allow_override=True)


Unnamed: 0,resultCount,max,substances,materials,sp,spWKT
0,41,16.00,http://w3id.org/sawgraph/v1/us-wqp-data#charac...,Micropterus salmoides; <br>Catostomus commerso...,https://geoconnex.us/iow/wqp/INSTOR_WQX-8144,POINT (-87.5 41.61889)
1,11,0.81,http://w3id.org/sawgraph/v1/us-wqp-data#charac...,Lepomis macrochirus; <br>Micropterus salmoides...,https://geoconnex.us/iow/wqp/INSTOR_WQX-7834,POINT (-87.07083 39.7225)
2,2,15.00,http://w3id.org/sawgraph/v1/us-wqp-data#charac...,Micropterus salmoides; <br>Pomoxis annularis; ...,https://geoconnex.us/iow/wqp/INSTOR_WQX-2219,POINT (-85.66083 40.58444)
3,25,9.67,http://w3id.org/sawgraph/v1/us-wqp-data#charac...,Ictiobus bubalus; <br>Micropterus punctulatus;...,https://geoconnex.us/iow/wqp/INSTOR_WQX-2350,POINT (-87.94222 38.13125)
4,14,8.00,http://w3id.org/sawgraph/v1/us-wqp-data#charac...,Ictiobus bubalus; <br>Aplodinotus grunniens; <...,https://geoconnex.us/iow/wqp/INSTOR_WQX-18905,POINT (-86.4416 39.45824)
...,...,...,...,...,...,...
113,2,8.87,http://w3id.org/sawgraph/v1/us-wqp-data#charac...,Pomoxis annularis; <br>Sander vitreus; <br>Tissue,https://geoconnex.us/iow/wqp/INSTOR_WQX-2272,POINT (-85.54306 40.87972)
114,8,15.00,http://w3id.org/sawgraph/v1/us-wqp-data#charac...,Lepomis megalotis; <br>Tissue,https://geoconnex.us/iow/wqp/INSTOR_WQX-12896,POINT (-86.55547 39.24344)
115,16,6.90,http://w3id.org/sawgraph/v1/us-wqp-data#charac...,Lepomis megalotis; <br>Micropterus dolomieu; <...,https://geoconnex.us/iow/wqp/INSTOR_WQX-2371,POINT (-86.25067 39.77825)
116,22,11.00,http://w3id.org/sawgraph/v1/us-wqp-data#charac...,Lepomis macrochirus; <br>Micropterus dolomieu;...,https://geoconnex.us/iow/wqp/INSTOR_WQX-2573,POINT (-85.86528 39.49639)


In [None]:
facilities['facWKT'] = facilities['facWKT'].apply(wkt.loads)
facilities = gpd.GeoDataFrame(facilities, geometry='facWKT')

facilities.set_crs(epsg=4326, inplace=True, allow_override=True)

Unnamed: 0,facility,facWKT,facilityName,industryName
0,http://w3id.org/fio/v1/epa-frs-data#d.FRS-Faci...,POINT (-71.40066 42.27114),GENERAL CHEMICAL,Hazardous Waste Treatment and Disposal
1,http://w3id.org/fio/v1/epa-frs-data#d.FRS-Faci...,POINT (-71.40066 42.27114),GENERAL CHEMICAL,Waste Treatment and Disposal
2,http://w3id.org/fio/v1/epa-frs-data#d.FRS-Faci...,POINT (-70.7875 41.8023),SEMASS RESOURCE RECOVERY FACILITY,Solid Waste Combustors and Incinerators
3,http://w3id.org/fio/v1/epa-frs-data#d.FRS-Faci...,POINT (-70.7875 41.8023),SEMASS RESOURCE RECOVERY FACILITY,Waste Treatment and Disposal
4,http://w3id.org/fio/v1/epa-frs-data#d.FRS-Faci...,POINT (-71.41352 41.76594),SAFETY-KLEEN SYSTEMS,Hazardous Waste Treatment and Disposal
...,...,...,...,...
13696,http://w3id.org/fio/v1/epa-frs-data#d.FRS-Faci...,POINT (-106.66192 39.72244),EAGLE COUNTY LANDFILL,Waste Treatment and Disposal
13697,http://w3id.org/fio/v1/epa-frs-data#d.FRS-Faci...,POINT (-77.58931 39.96758),"OAK HILL, CENTRAL & SOUTH SCOTLAND SEWER INTER...",Other Nonhazardous Waste Treatment and Disposal
13698,http://w3id.org/fio/v1/epa-frs-data#d.FRS-Faci...,POINT (-77.58931 39.96758),"OAK HILL, CENTRAL & SOUTH SCOTLAND SEWER INTER...",Waste Treatment and Disposal
13699,http://w3id.org/fio/v1/epa-frs-data#d.FRS-Faci...,POINT (-74.31841 40.15134),JACKSON TWP,Other Nonhazardous Waste Treatment and Disposal


In [None]:
from branca.element import Figure

map = samplepoints.explore(name=f'<span style="color:DarkOrange;">Samples</span>', color='DarkOrange',
                           style_kwds=dict(style_function=lambda x: {"radius": float(x['properties']["max"])/8 if float(x['properties']["max"]) < 160 else 25,
                                                                     "opacity":0.3,
                                                                     "color":'DimGray',
                                                                     }),
                           marker_kwds=dict(radius=6),
                           marker_type='circle_marker',
                           popup = ["substances", "materials", "max", "resultCount"],
                                ) #
#map

In [None]:
#map = folium.Map()
c = 0
colors = ['MidnightBlue','Blue','DodgerBlue','DeepSkyBlue','CadetBlue','DarkCyan','LightSeaGreen','MediumSageGreen','SpringGreen','PaleVioletRed','Purple','Fuchsia','HotPink','LightPink']
for industry in list(facilities.industryName.unique()):
  #print(industry)
  #print(facilities[facilities['industryName']== industry].info())
  facilities[facilities['industryName']== industry].explore(m=map,
                                                            name=f'<span style="color:{colors[c]};">{industry}</span>',
                                                            color=colors[c],
                                                            marker_kwds=dict(radius=3),
                                                            popup=True)
  c += 1

## Map

In [None]:
samplepoints.set_crs(epsg=4326, inplace=True, allow_override=True)
facilities.set_crs(epsg=4326, inplace=True, allow_override=True)

folium.LayerControl(collapsed=False).add_to(map)
fig = Figure(width='100%', height=900)
fig.add_child(map)


In [None]:
fig.save(f'SAWGraph-demo_Tracing_Downstream.html')