This notebook generates maps for the UI mockup answering the following 3 questions:
* Q1: What samples in a particular region (e.g. the state of Maine) are downstream of certain types of facilities (by NAICS Industry or Group Code)?

# Setup


In [None]:
# @title
%%capture
!pip install mapclassify --upgrade --quiet
!pip install SPARQLWrapper --upgrade --quiet
!pip install rdflib

In [None]:
# @title
#from branca.element import Figure                                  # For controlling the size of the final map
import folium                                                      # For map layer control
import geopandas as gpd                                            # For geospatial dataframes
import pandas as pd                                                # For dataframes
from shapely import wkt                                            # For working with WKT coordinates in a GeoDataFrame
from SPARQLWrapper import SPARQLWrapper2, JSON, GET, POST, DIGEST   # For querying SPARQL endpoints
import rdflib                                                      # For working with URIs

def convertToDataframe(results):
  d = []
  for x in results.bindings:
        row = {}
        for k in x:
            v = x[k]
            vv = rdflib.term.Literal(v.value, datatype=v.datatype).toPython()  # type: ignore[no-untyped-call]
            row[k] = vv
        d.append(row)
  df = pd.DataFrame(d)
  return df

# not used anymore, previously used to manually pass S2 cells across multiple repositories
def convertS2ListToQueryString(s2list):
  s2list_short = [s2cell.replace("http://stko-kwg.geog.ucsb.edu/lod/resource/","kwgr:") for s2cell in s2list]
  s2_values_string = " ".join(s2list_short)
  return s2_values_string


In [None]:
# @title
#for interactive widgets
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets
from IPython.display import display

# Q1 - What samples are downstream of certain types of facilities (by NAICS Industry or Group Code)?


In [None]:
industry = "2213" # @param ["5622 (Waste Treatment and Disposal)","3222 (Converted Paper Manufacturing)", "221310 (Water Supply and Irrigation)", "221320 (Sewage Treatment)","3261 (Plastics Product Manufacturing)","3133 (Textile and Fabric Finishing and Coating)","3251 (Basic Chemical Manufacturing)","3255 (Paint, Coating, and Adhesive Manufacturing)", "3364 (Aerospace Product and Parts)","812320 (Drycleaning and Laundry Services)", "561740 (Carpet and Upholstery Cleaning Services)"]{"allow-input":true}

icode = industry.split()[0]

print(icode)

if len(icode)>4:
  industryValueFilter = 'VALUES ?industryCode {naics:NAICS-' + str(icode) + '}.'
else:
  industryValueFilter = 'VALUES ?industryGroup {naics:NAICS-' + str(icode) + '}.'



2213


In [None]:
admin_region = "23011 (Kennebec County, Maine)" # @param ["All", "23 (Maine)","23011 (Kennebec County, Maine)","23019 (Penboscot County, Maine)", "24 (New Hampshire)","18 (Indiana)", "17 (Illinois)"] {"allow-input":true}

regionCode = admin_region.split()[0]
if regionCode == 'All':
  regionFilter = ''
else:
  if len(regionCode)<=5:
    regionURI = 'kwgr:administrativeRegion.USA.' + regionCode
    print(regionURI)
    regionFilter = '''?county rdf:type kwg-ont:AdministrativeRegion_3 ;
                   kwg-ont:administrativePartOf ''' + regionURI + ''' . '''
  else:
    # need to implement region filters with datacommons geoids.
    regionFilter = ''




kwgr:administrativeRegion.USA.23011


## Queries

In [None]:
#WRetrieve facility details
q2 = '''
PREFIX geo: <http://www.opengis.net/ont/geosparql#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX naics: <http://w3id.org/fio/v1/naics#>
PREFIX spatial: <http://purl.org/spatialai/spatial/spatial-full#>
PREFIX kwgr: <http://stko-kwg.geog.ucsb.edu/lod/resource/>
PREFIX kwg-ont: <http://stko-kwg.geog.ucsb.edu/lod/ontology/>
PREFIX coso: <http://w3id.org/coso/v1/contaminoso#>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX fio: <http://w3id.org/fio/v1/fio#>

#Where are landfills or dod facilities

select DISTINCT ?facility ?facWKT ?facilityName ?industryCode ?industryName where {
    #SERVICE <repository:Spatial>{
    #    #determine near distance
    #    ?s2 kwg-ont:sfTouches|owl:sameAs ?s2neighbor.
    #    ?s2neighbor rdf:type kwg-ont:S2Cell_Level13.
    #}
    #SERVICE <repository:FIO>{
        #find facilities
        #?s2neighbor kwg-ont:sfContains ?facility.
            ?facility fio:ofIndustry ?industryGroup;
            fio:ofIndustry ?industryCode ;
            spatial:connectedTo ?county ;
        		geo:hasGeometry/geo:asWKT ?facWKT;
        		rdfs:label ?facilityName.
        ''' + regionFilter + '''
        ?industryCode a naics:NAICS-IndustryCode;  # only NAICS specific industry codes
            fio:subcodeOf ?industryGroup ;
            rdfs:label ?industryName.
        ''' + industryValueFilter + '''
    #}

}
'''

print(q2)


PREFIX geo: <http://www.opengis.net/ont/geosparql#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX naics: <http://w3id.org/fio/v1/naics#>
PREFIX spatial: <http://purl.org/spatialai/spatial/spatial-full#>
PREFIX kwgr: <http://stko-kwg.geog.ucsb.edu/lod/resource/>
PREFIX kwg-ont: <http://stko-kwg.geog.ucsb.edu/lod/ontology/>
PREFIX coso: <http://w3id.org/coso/v1/contaminoso#>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX fio: <http://w3id.org/fio/v1/fio#>

#Where are landfills or dod facilities

select DISTINCT ?facility ?facWKT ?facilityName ?industryCode ?industryName where {
    #SERVICE <repository:Spatial>{
    #    #determine near distance
    #    ?s2 kwg-ont:sfTouches|owl:sameAs ?s2neighbor.
    #    ?s2neighbor rdf:type kwg-ont:S2Cell_Level13.
    #}
    #SERVICE <repository:FIO>{
        #find facilities
        #?s2neighbor kwg-ont:sfContains ?facility.
            ?facility fio:ofIndustry ?industryGroup;
            fio:ofIndustry ?industryCod

In [None]:
endpointGET = 'https://frink.apps.renci.org/federation/sparql' # @param ['https://frink.apps.renci.org/fiokg/sparql','https://frink.apps.renci.org/qlever-geo/sparql', 'https://frink.apps.renci.org/federation/sparql']


sparqlGET = SPARQLWrapper2(endpointGET)
sparqlGET.setHTTPAuth(DIGEST)
#sparqlGET.setCredentials('sawgraph-endpoint', 'skailab')
sparqlGET.setMethod(POST)
sparqlGET.setReturnFormat(JSON)

sparqlGET.setQuery(q2)
facility_result = sparqlGET.query()
facilities = convertToDataframe(facility_result)
#print(facilities.info())
facilities

Unnamed: 0,facility,facWKT,facilityName,industryCode,industryName
0,http://w3id.org/fio/v1/epa-frs-data#d.FRS-Faci...,POINT(-69.653771 44.527040),KENNEBEC WASTEWATER TREATMENT FACILITY,http://w3id.org/fio/v1/naics#NAICS-221320,Sewage Treatment Facilities
1,http://w3id.org/fio/v1/epa-frs-data#d.FRS-Faci...,POINT(-69.779380 44.302050),AUGUSTA WASTEWATER TREATMENT FACILITY,http://w3id.org/fio/v1/naics#NAICS-221320,Sewage Treatment Facilities
2,http://w3id.org/fio/v1/epa-frs-data#d.FRS-Faci...,POINT(-69.758280 44.191380),GARDINER WWTF,http://w3id.org/fio/v1/naics#NAICS-221320,Sewage Treatment Facilities
3,http://w3id.org/fio/v1/epa-frs-data#d.FRS-Faci...,POINT(-69.787090 44.291700),HALLOWELL WATER DISTRICT CSO,http://w3id.org/fio/v1/naics#NAICS-221320,Sewage Treatment Facilities
4,http://w3id.org/fio/v1/epa-frs-data#d.FRS-Faci...,POINT(-69.793800 44.204780),GARDINER PUBLIC WORKS FACILITY,http://w3id.org/fio/v1/naics#NAICS-221320,Sewage Treatment Facilities
5,http://w3id.org/fio/v1/epa-frs-data#d.FRS-Faci...,POINT(-69.958217 44.318637),WINTHROP UTILITIES DISTRICT,http://w3id.org/fio/v1/naics#NAICS-221310,Water Supply and Irrigation Systems
6,http://w3id.org/fio/v1/epa-frs-data#d.FRS-Faci...,POINT(-69.759580 44.173360),CITY OF GARDINER,http://w3id.org/fio/v1/naics#NAICS-221320,Sewage Treatment Facilities
7,http://w3id.org/fio/v1/epa-frs-data#d.FRS-Faci...,POINT(-69.758280 44.191380),CITY OF GARDINER,http://w3id.org/fio/v1/naics#NAICS-221320,Sewage Treatment Facilities
8,http://w3id.org/fio/v1/epa-frs-data#d.FRS-Faci...,POINT(-69.779500 44.302120),GREATER AUGUSTA UTILITY DISTRICT,http://w3id.org/fio/v1/naics#NAICS-221320,Sewage Treatment Facilities


In [None]:
#Find streams
q1a='''
PREFIX dcterms: <http://purl.org/dc/terms/>
PREFIX qudt: <http://qudt.org/schema/qudt/>
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
PREFIX geo: <http://www.opengis.net/ont/geosparql#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX naics: <http://w3id.org/fio/v1/naics#>
PREFIX spatial: <http://purl.org/spatialai/spatial/spatial-full#>
PREFIX kwgr: <http://stko-kwg.geog.ucsb.edu/lod/resource/>
PREFIX kwg-ont: <http://stko-kwg.geog.ucsb.edu/lod/ontology/>
PREFIX coso: <http://w3id.org/coso/v1/contaminoso#>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX fio: <http://w3id.org/fio/v1/fio#>
PREFIX hyf: <https://www.opengis.net/def/schema/hy_features/hyf/>
PREFIX nhdplusv2: <http://nhdplusv2.spatialai.org/v1/nhdplusv2#>

PREFIX owl: <http://www.w3.org/2002/07/owl#>
select DISTINCT ?downstream_flowline ?dsflWKT ?fl_type ?streamName
where {
        {SELECT ?s2 WHERE {
           #find facilities
           ?s2 spatial:connectedTo ?facility.

#        {select ?s2neighbor where {
#          #find facilities
#          ?s2neighbor kwg-ont:sfContains ?facility.

          ?facility fio:ofIndustry ?industryGroup;
            fio:ofIndustry ?industryCode;
            spatial:connectedTo ?county.
        		#geo:hasGeometry/geo:asWKT ?facWKT;
        		#rdfs:label ?facilityName.
          ''' + regionFilter + '''
          ?industryCode a naics:NAICS-IndustryCode;  # only NAICS specific industry codes
            fio:subcodeOf ?industryGroup ;
            rdfs:label ?industryName.
          ''' + industryValueFilter + ''' }
          }
    #determine near streams
    ?s2 kwg-ont:sfTouches|owl:sameAs ?s2neighbor.
    ?s2neighbor rdf:type kwg-ont:S2Cell_Level13;
              spatial:connectedTo ?upstream_flowline.

 		?upstream_flowline rdf:type hyf:HY_FlowPath ;
		          # find all flowlines downstream of them
						  hyf:downstreamFlowPathTC ?downstream_flowline .
    ?downstream_flowline geo:hasGeometry/geo:asWKT ?dsflWKT;
              	nhdplusv2:hasFTYPE ?fl_type.
    OPTIONAL {?downstream_flowline rdfs:label ?streamName}


    }
'''

print(q1a)


PREFIX dcterms: <http://purl.org/dc/terms/>
PREFIX qudt: <http://qudt.org/schema/qudt/>
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
PREFIX geo: <http://www.opengis.net/ont/geosparql#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX naics: <http://w3id.org/fio/v1/naics#>
PREFIX spatial: <http://purl.org/spatialai/spatial/spatial-full#>
PREFIX kwgr: <http://stko-kwg.geog.ucsb.edu/lod/resource/>
PREFIX kwg-ont: <http://stko-kwg.geog.ucsb.edu/lod/ontology/>
PREFIX coso: <http://w3id.org/coso/v1/contaminoso#>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX fio: <http://w3id.org/fio/v1/fio#>
PREFIX hyf: <https://www.opengis.net/def/schema/hy_features/hyf/>
PREFIX nhdplusv2: <http://nhdplusv2.spatialai.org/v1/nhdplusv2#>

PREFIX owl: <http://www.w3.org/2002/07/owl#>
select DISTINCT ?downstream_flowline ?dsflWKT ?fl_type ?streamName
where {
        {SELECT ?s2 WHERE {
           #find facilities
           ?s2 spatial:connectedTo ?facility.

#        {sele

In [None]:
sparqlGET.setQuery(q1a)
stream_result = sparqlGET.query()
streams = convertToDataframe(stream_result)
streams

Unnamed: 0,downstream_flowline,dsflWKT,fl_type,streamName
0,https://geoconnex.us/nhdplusv2/comid/3321300,LINESTRING (-69.65169180232664 44.533667797541...,StreamRiver,
1,https://geoconnex.us/nhdplusv2/comid/3321314,LINESTRING (-69.66410166897407 44.542558930861...,StreamRiver,
2,https://geoconnex.us/nhdplusv2/comid/3321342,LINESTRING (-69.675090868957 44.53233339754359...,StreamRiver,
3,https://geoconnex.us/nhdplusv2/comid/3321344,LINESTRING (-69.679179002284 44.52842153088301...,StreamRiver,
4,https://geoconnex.us/nhdplusv2/comid/3321346,LINESTRING (-69.66555420230515 44.517680330899...,StreamRiver,
...,...,...,...,...
136,https://geoconnex.us/nhdplusv2/comid/5205302,LINESTRING (-69.82019240206512 43.953051931776...,ArtificialPath,
137,https://geoconnex.us/nhdplusv2/comid/5205304,LINESTRING (-69.82522246872395 43.978239131736...,ArtificialPath,
138,https://geoconnex.us/nhdplusv2/comid/5205308,LINESTRING (-69.81243166874384 43.947831931784...,ArtificialPath,
139,https://geoconnex.us/nhdplusv2/comid/5205318,LINESTRING (-69.78827760211465 43.800855332012...,ArtificialPath,


In [None]:
## Trace downstream
q1='''
PREFIX dcterms: <http://purl.org/dc/terms/>
PREFIX qudt: <http://qudt.org/schema/qudt/>
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
PREFIX geo: <http://www.opengis.net/ont/geosparql#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX naics: <http://w3id.org/fio/v1/naics#>
PREFIX spatial: <http://purl.org/spatialai/spatial/spatial-full#>
PREFIX kwgr: <http://stko-kwg.geog.ucsb.edu/lod/resource/>
PREFIX kwg-ont: <http://stko-kwg.geog.ucsb.edu/lod/ontology/>
PREFIX coso: <http://w3id.org/coso/v1/contaminoso#>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX fio: <http://w3id.org/fio/v1/fio#>
PREFIX hyf: <https://www.opengis.net/def/schema/hy_features/hyf/>

PREFIX owl: <http://www.w3.org/2002/07/owl#>
select DISTINCT ?samplePoint ?spWKT ?sample (GROUP_CONCAT(DISTINCT ?sampleId; separator="; ") as ?samples) (COUNT(DISTINCT ?subVal) as ?resultCount) (MAX(?result) as ?Max) ?unit (GROUP_CONCAT(DISTINCT ?subVal; separator=" <br> ") as ?results)
where {
      { SELECT DISTINCT ?s2cell WHERE {
        #find facilities
        ?s2origin spatial:connectedTo ?facility.
        ?facility fio:ofIndustry ?industryGroup;
            fio:ofIndustry ?industryCode;
            spatial:connectedTo ?county.
        		#geo:hasGeometry/geo:asWKT ?facWKT;
        		#rdfs:label ?facilityName.
        ''' + regionFilter + '''
        ?industryCode a naics:NAICS-IndustryCode;  # only NAICS specific industry codes
            fio:subcodeOf ?industryGroup ;
            rdfs:label ?industryName.
        ''' + industryValueFilter + '''
    #determine near streams
    ?s2origin kwg-ont:sfTouches|owl:sameAs ?s2neighbor.
    ?s2neighbor rdf:type kwg-ont:S2Cell_Level13;
              spatial:connectedTo ?upstream_flowline.

 		?upstream_flowline rdf:type hyf:HY_FlowPath ;
		          # find all flowlines downstream of them
						  hyf:downstreamFlowPathTC ?downstream_flowline .
      ?s2cell spatial:connectedTo ?downstream_flowline ;
              rdf:type kwg-ont:S2Cell_Level13 .
    }}

    #find samples
    ?samplePoint kwg-ont:sfWithin ?s2cell;
    	rdf:type coso:SamplePoint;
    	geo:hasGeometry/geo:asWKT ?spWKT.
    ?s2cell rdf:type kwg-ont:S2Cell_Level13.
    ?sample coso:fromSamplePoint ?samplePoint;
    	dcterms:identifier ?sampleId;
    	coso:sampleOfMaterialType/rdfs:label ?type.
    ?observation rdf:type coso:ContaminantObservation;
    	coso:observedAtSamplePoint ?samplePoint;
    	coso:ofDSSToxSubstance/skos:altLabel ?substance;
    	coso:hasResult/coso:measurementValue ?result;
    	coso:hasResult/coso:measurementUnit/qudt:symbol ?unit.
    BIND((CONCAT(?substance, ": ", str(?result) , " ", ?unit) ) as ?subVal)

    } GROUP BY ?samplePoint ?spWKT ?sample ?unit
'''

print(q1)


PREFIX dcterms: <http://purl.org/dc/terms/>
PREFIX qudt: <http://qudt.org/schema/qudt/>
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
PREFIX geo: <http://www.opengis.net/ont/geosparql#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX naics: <http://w3id.org/fio/v1/naics#>
PREFIX spatial: <http://purl.org/spatialai/spatial/spatial-full#>
PREFIX kwgr: <http://stko-kwg.geog.ucsb.edu/lod/resource/>
PREFIX kwg-ont: <http://stko-kwg.geog.ucsb.edu/lod/ontology/>
PREFIX coso: <http://w3id.org/coso/v1/contaminoso#>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX fio: <http://w3id.org/fio/v1/fio#>
PREFIX hyf: <https://www.opengis.net/def/schema/hy_features/hyf/>

PREFIX owl: <http://www.w3.org/2002/07/owl#>
select DISTINCT ?samplePoint ?spWKT ?sample (GROUP_CONCAT(DISTINCT ?sampleId; separator="; ") as ?samples) (COUNT(DISTINCT ?subVal) as ?resultCount) (MAX(?result) as ?Max) ?unit (GROUP_CONCAT(DISTINCT ?subVal; separator=" <br> ") as ?results)
where {
      

In [None]:
sparqlGET.setQuery(q1)
samplepoint_result = sparqlGET.query()
samplepoints = convertToDataframe(samplepoint_result)
samplepoints

Unnamed: 0,samplePoint,spWKT,sample,samples,resultCount,Max,unit,results
0,http://w3id.org/sawgraph/v1/me-egad-data#sampl...,POINT(-69.804092 43.930117),http://w3id.org/sawgraph/v1/me-egad-data#sampl...,MARRISON WELL,6,2.4,ng/L,PFPeA: 0.846 ng/L <br> PFHpA: 0.316 ng/L <br> ...
1,http://w3id.org/sawgraph/v1/me-egad-data#sampl...,POINT(-69.987594 44.260541),http://w3id.org/sawgraph/v1/me-egad-data#sampl...,LK9961-SW-2023,7,1.49,ng/L,PFBS: 0.369 ng/L <br> PFHpA: 0.783 ng/L <br> P...
2,http://w3id.org/sawgraph/v1/me-egad-data#sampl...,POINT(-69.987594 44.260541),http://w3id.org/sawgraph/v1/me-egad-data#sampl...,"LK9961-BLC-C1(1,2,3,4,5)-2023",7,1.49,ng/L,PFPeA: 0.685 ng/L <br> PFOA: 1.49 ng/L <br> PF...
3,http://w3id.org/sawgraph/v1/me-egad-data#sampl...,POINT(-69.987594 44.260541),http://w3id.org/sawgraph/v1/me-egad-data#sampl...,"LK9961-BLC-C2(6,7,8,9,10)-2023",7,1.49,ng/L,PFHxA: 0.874 ng/L <br> PFBS: 0.369 ng/L <br> P...
4,http://w3id.org/sawgraph/v1/me-egad-data#sampl...,POINT(-69.987594 44.260541),http://w3id.org/sawgraph/v1/me-egad-data#sampl...,"LK9961-LMB-C1(1,2,3,4,5)-2023",7,1.49,ng/L,PFHpA: 0.783 ng/L <br> PFBS: 0.369 ng/L <br> P...
...,...,...,...,...,...,...,...,...
270,http://w3id.org/sawgraph/v1/me-egad-data#sampl...,POINT(-69.763201 44.124432),http://w3id.org/sawgraph/v1/me-egad-data#sampl...,"KRD-SMB-C2(6,7,8,9,10)-2023",9,3.86,ng/L,PFBA: 2.61 ng/L <br> PFOA: 3.86 ng/L <br> PFNA...
271,http://w3id.org/sawgraph/v1/me-egad-data#sampl...,POINT(-69.698163 44.435633),http://w3id.org/sawgraph/v1/me-egad-data#sampl...,"KSD-SMB-C2(2,3,5,6,9)",1,5.175,ng/g,6:2 FTSA: 5.175 ng/g
272,http://w3id.org/sawgraph/v1/me-egad-data#sampl...,POINT(-69.698163 44.435633),http://w3id.org/sawgraph/v1/me-egad-data#sampl...,"KSD-WCF-C1(1,2,5,6,10)",1,5.175,ng/g,6:2 FTSA: 5.175 ng/g
273,http://w3id.org/sawgraph/v1/me-egad-data#sampl...,POINT(-69.698163 44.435633),http://w3id.org/sawgraph/v1/me-egad-data#sampl...,"KSD-WCF-C2(3,4,7,8,9)",1,5.175,ng/g,6:2 FTSA: 5.175 ng/g


### Old Queries (passing s2)

In [None]:
#sparqlGET.setQuery(q3)
#s2_result = sparqlGET.query()
#s2 = convertToDataframe(s2_result)
#print(facilities.info())
#s2

In [None]:
#s2list = s2['s2cell'].tolist()
#s2list = [s2.replace("http://stko-kwg.geog.ucsb.edu/lod/resource/","kwgr:") for s2 in s2list]
#s2_values_string = " ".join(s2list)
#print(s2_values_string)
#s2_values_string = convertS2ListToQueryString(s2['s2cell'].tolist())

#q4 = '''
#PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
#PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
#PREFIX owl: <http://www.w3.org/2002/07/owl#>

#PREFIX spatial: <http://purl.org/spatialai/spatial/spatial-full#>
#PREFIX kwg-ont: <http://stko-kwg.geog.ucsb.edu/lod/ontology/>
#PREFIX kwgr: <http://stko-kwg.geog.ucsb.edu/lod/resource/>

#SELECT ?s2cell WHERE {
#	?s2neighbor spatial:connectedTo kwgr:administrativeRegion.USA.'''+admin_region.split()[0]+''' .
#  VALUES ?s2neighbor {''' + s2_values_string + '''}
#  ?s2neighbor kwg-ont:sfTouches | owl:sameAs ?s2cell.
# }'''


#sparqlGET = SPARQLWrapper2("https://frink.apps.renci.org/spatialkg/sparql")
#sparqlGET.setHTTPAuth(DIGEST)
#sparqlGET.setMethod(POST)
#sparqlGET.setReturnFormat(JSON)

#sparqlGET.setQuery(q4)
#s2_filtered_result = sparqlGET.query()
#s2_filtered = convertToDataframe(s2_filtered_result)

#s2_filtered_values_string = convertS2ListToQueryString(s2_filtered['s2cell'].tolist())

#print(len(s2_filtered['s2cell'].tolist()))
#print(s2_filtered_values_string)

In [None]:
# Query the hydrology graph for the downstream S2cells

#q5='''
#PREFIX geo: <http://www.opengis.net/ont/geosparql#>
#PREFIX spatial: <http://purl.org/spatialai/spatial/spatial-full#>
#PREFIX kwg-ont: <http://stko-kwg.geog.ucsb.edu/lod/ontology/>
#PREFIX kwgr: <http://stko-kwg.geog.ucsb.edu/lod/resource/>

#PREFIX hyf: <https://www.opengis.net/def/schema/hy_features/hyf/>
#PREFIX nhdplusv2: <http://nhdplusv2.spatialai.org/v1/nhdplusv2#>

#PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
#PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>

#SELECT DISTINCT ?s2cell WHERE {
# 		?upstream_flowline rdf:type hyf:HY_FlowPath ;
#		          spatial:connectedTo ?s2cellus ;
		# find all flowlines downstream of them
#						  hyf:downstreamFlowPathTC ?downstream_flowline .
#		VALUES ?s2cellus {''' + s2_filtered_values_string + '''}
#      ?s2cell spatial:connectedTo ?downstream_flowline ;
#              rdf:type kwg-ont:S2Cell_Level13 .
#        }
#'''

#sparqlGET2 = SPARQLWrapper2("https://frink.apps.renci.org/hydrologykg/sparql")
#sparqlGET2.setHTTPAuth(DIGEST)
#sparqlGET2.setMethod(POST)
#sparqlGET2.setReturnFormat(JSON)

#sparqlGET2.setQuery(q5)
#hydrology_result = sparqlGET2.query()
#print(hydrology_result.fullResult)
#hydrology = convertToDataframe(hydrology_result)

#s2_filtered_values_string = convertS2ListToQueryString(hydrology['s2cell'].tolist())


#print(len(hydrology['s2cell'].tolist()))
#print(s2_filtered_values_string)

In [None]:
# Query the spatial graph to filter the S2 cells to a specific admin region

#q6='''
#PREFIX geo: <http://www.opengis.net/ont/geosparql#>
#PREFIX spatial: <http://purl.org/spatialai/spatial/spatial-full#>
#PREFIX kwg-ont: <http://stko-kwg.geog.ucsb.edu/lod/ontology/>
#PREFIX kwgr: <http://stko-kwg.geog.ucsb.edu/lod/resource/>

#PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
#PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>

#SELECT DISTINCT ?s2cell WHERE {
# 		?s2cell spatial:connectedTo kwgr:administrativeRegion.USA.''' + admin_region.split()[0] + '''.
#    VALUES ?s2cell {'''+ s2_filtered_values_string + '''}
#        }
#'''

#print(q6)

#sparqlGET = SPARQLWrapper2("https://frink.apps.renci.org/spatialkg/sparql")
#sparqlGET.setHTTPAuth(DIGEST)
#sparqlGET.setMethod(POST)
#sparqlGET.setReturnFormat(JSON)
#sparqlGET.setQuery(q6)
#s2_filtered_result = sparqlGET.query()
#s2_filtered = convertToDataframe(s2_filtered_result)

#s2_filtered_values_string = convertS2ListToQueryString(s2_filtered['s2cell'].tolist())

#print(s2_filtered_values_string)

In [None]:
# Query just the PFAS graph for samples that match the S2 cells returned from the FIO graph

#s2list = s2['s2cell'].tolist()
#s2list = [s2.replace("http://stko-kwg.geog.ucsb.edu/lod/resource/","kwgr:") for s2 in s2list]
#s2_values_string = " ".join(s2list)
#print(s2_values_string)

#q6='''
# Get the number of results and all substances that have been detected at over 4.0 ng/L at samplepoints from a specific set of S2 cells (here cells with plastics manufacturers) together with the sample material type where these substances have been detected in
# This does not require access to the Spatial graph

#PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
#PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
#PREFIX skos: <http://www.w3.org/2004/02/skos/core#>

#PREFIX geo: <http://www.opengis.net/ont/geosparql#>
#PREFIX spatial: <http://purl.org/spatialai/spatial/spatial-full#>
#PREFIX kwg-ont: <http://stko-kwg.geog.ucsb.edu/lod/ontology/>
#PREFIX kwgr: <http://stko-kwg.geog.ucsb.edu/lod/resource/>
#PREFIX coso: <http://w3id.org/coso/v1/contaminoso#>
#PREFIX qudt: <http://qudt.org/schema/qudt/>

#SELECT (COUNT(DISTINCT ?subVal) as ?resultCount) (MAX(?result_value) as ?max) (GROUP_CONCAT(DISTINCT ?substance; separator=";<br> ") as ?substances) (GROUP_CONCAT(DISTINCT ?matTypeLabel; separator="; <br>") as ?materials) ?sp ?spWKT  WHERE {
#      ?sp rdf:type coso:SamplePoint;
#      spatial:connectedTo ?s2cell ;
#  	  geo:hasGeometry/geo:asWKT ?spWKT.
#    VALUES ?s2cell {''' + s2_filtered_values_string + '''}
#  ?observation rdf:type coso:ContaminantObservation;
#    	coso:observedAtSamplePoint ?sp;
#    	coso:ofSubstance ?substance ;
#        coso:analyzedSample ?sample ;
#    	coso:hasResult ?result .
#    ?sample rdfs:label ?sampleLabel;
#  		coso:sampleOfMaterialType ?matType.
#  ?matType rdfs:label ?matTypeLabel.
#   ?result coso:measurementValue ?result_value;
#   		coso:measurementUnit ?unit .
  # FILTER (?result_value > 4).
  #VALUES ?unit {<http://qudt.org/vocab/unit/NanoGM-PER-L>}.
#  ?unit qudt:symbol ?unit_sym.
#    BIND((CONCAT(str(?result_value) , " ", ?unit_sym)) as ?subVal)
#} GROUP BY ?sp ?spWKT
#'''
#print(q6)

#sparqlGET2 = SPARQLWrapper2("https://frink.apps.renci.org/sawgraph/sparql")
#sparqlGET2.setHTTPAuth(DIGEST)
#sparqlGET2.setMethod(POST)
#sparqlGET2.setReturnFormat(JSON)

#sparqlGET2.setQuery(q6)
#samplepoint_result = sparqlGET2.query()
#samplepoints = convertToDataframe(samplepoint_result)
#samplepoints

## Prep data for mapping

In [None]:
if not samplepoints.empty:
    samplepoints['spWKT'] = samplepoints['spWKT'].apply(wkt.loads)
    samplepoints = gpd.GeoDataFrame(samplepoints, geometry='spWKT')
    samplepoints.set_crs(epsg=4326, inplace=True, allow_override=True)
    samplepoints['samplePoint'] = samplepoints['samplePoint'].apply(lambda x:f'<a href="{x}" target="_blank">{x}</a>')
    # clean up unit encoding
    samplepoints.unit = samplepoints.unit.str.replace('Î¼','μ')
    samplepoints.results = samplepoints.results.str.replace('Î¼','μ')
else:
    print("No sample points found for the selected criteria. Skipping geospatial processing for sample points.")

In [None]:
facilities['facWKT'] = facilities['facWKT'].apply(wkt.loads)
facilities = gpd.GeoDataFrame(facilities, geometry='facWKT')
facilities['facility'] = facilities['facility'].apply(
    lambda x: f'<a href="https://frs-public.epa.gov/ords/frs_public2/fii_query_detail.disp_program_facility?p_registry_id={x.split(".")[-1]}" target="_blank">{x}</a>'
)

facilities.set_crs(epsg=4326, inplace=True, allow_override=True)

Unnamed: 0,facility,facWKT,facilityName,industryCode,industryName
0,"<a href=""https://frs-public.epa.gov/ords/frs_p...",POINT (-69.65377 44.52704),KENNEBEC WASTEWATER TREATMENT FACILITY,http://w3id.org/fio/v1/naics#NAICS-221320,Sewage Treatment Facilities
1,"<a href=""https://frs-public.epa.gov/ords/frs_p...",POINT (-69.77938 44.30205),AUGUSTA WASTEWATER TREATMENT FACILITY,http://w3id.org/fio/v1/naics#NAICS-221320,Sewage Treatment Facilities
2,"<a href=""https://frs-public.epa.gov/ords/frs_p...",POINT (-69.75828 44.19138),GARDINER WWTF,http://w3id.org/fio/v1/naics#NAICS-221320,Sewage Treatment Facilities
3,"<a href=""https://frs-public.epa.gov/ords/frs_p...",POINT (-69.78709 44.2917),HALLOWELL WATER DISTRICT CSO,http://w3id.org/fio/v1/naics#NAICS-221320,Sewage Treatment Facilities
4,"<a href=""https://frs-public.epa.gov/ords/frs_p...",POINT (-69.7938 44.20478),GARDINER PUBLIC WORKS FACILITY,http://w3id.org/fio/v1/naics#NAICS-221320,Sewage Treatment Facilities
5,"<a href=""https://frs-public.epa.gov/ords/frs_p...",POINT (-69.95822 44.31864),WINTHROP UTILITIES DISTRICT,http://w3id.org/fio/v1/naics#NAICS-221310,Water Supply and Irrigation Systems
6,"<a href=""https://frs-public.epa.gov/ords/frs_p...",POINT (-69.75958 44.17336),CITY OF GARDINER,http://w3id.org/fio/v1/naics#NAICS-221320,Sewage Treatment Facilities
7,"<a href=""https://frs-public.epa.gov/ords/frs_p...",POINT (-69.75828 44.19138),CITY OF GARDINER,http://w3id.org/fio/v1/naics#NAICS-221320,Sewage Treatment Facilities
8,"<a href=""https://frs-public.epa.gov/ords/frs_p...",POINT (-69.7795 44.30212),GREATER AUGUSTA UTILITY DISTRICT,http://w3id.org/fio/v1/naics#NAICS-221320,Sewage Treatment Facilities


In [None]:
#facilities['facility'] = facilities['facility'].apply(lambda x:f'<a href="{x}">{x}</a>')

In [None]:
streams['dsflWKT'] = streams['dsflWKT'].apply(wkt.loads)
streams = gpd.GeoDataFrame(streams, geometry='dsflWKT')
streams['downstream_flowline'] = streams['downstream_flowline'].apply(lambda x:f'<a href="{x}" target="_blank">{x}</a>')
streams.set_crs(epsg=4326, inplace=True, allow_override=True)
streams

Unnamed: 0,downstream_flowline,dsflWKT,fl_type,streamName
0,"<a href=""https://geoconnex.us/nhdplusv2/comid/...","LINESTRING (-69.65169 44.53367, -69.65169 44.5...",StreamRiver,
1,"<a href=""https://geoconnex.us/nhdplusv2/comid/...","LINESTRING (-69.6641 44.54256, -69.66436 44.54...",StreamRiver,
2,"<a href=""https://geoconnex.us/nhdplusv2/comid/...","LINESTRING (-69.67509 44.53233, -69.67551 44.5...",StreamRiver,
3,"<a href=""https://geoconnex.us/nhdplusv2/comid/...","LINESTRING (-69.67918 44.52842, -69.67876 44.5...",StreamRiver,
4,"<a href=""https://geoconnex.us/nhdplusv2/comid/...","LINESTRING (-69.66555 44.51768, -69.66539 44.5...",StreamRiver,
...,...,...,...,...
136,"<a href=""https://geoconnex.us/nhdplusv2/comid/...","LINESTRING (-69.82019 43.95305, -69.81873 43.9...",ArtificialPath,
137,"<a href=""https://geoconnex.us/nhdplusv2/comid/...","LINESTRING (-69.82522 43.97824, -69.8282 43.97...",ArtificialPath,
138,"<a href=""https://geoconnex.us/nhdplusv2/comid/...","LINESTRING (-69.81243 43.94783, -69.81011 43.9...",ArtificialPath,
139,"<a href=""https://geoconnex.us/nhdplusv2/comid/...","LINESTRING (-69.78828 43.80086, -69.78807 43.7...",ArtificialPath,


In [None]:
from branca.element import Figure

map = samplepoints.explore(name=f'<span style="color:DarkOrange;">Samples</span>', color='DarkOrange',
                           style_kwds=dict(style_function=lambda x: {"radius": 5 if x['properties']["Max"] in ["non-detect", "http://w3id.org/coso/v1/contaminoso#non-detect"]  else (5 if float(x['properties']["Max"])< 40 else (float(x['properties']["Max"])/8 if float(x['properties']["Max"]) < 160 else 25)), #scale points between 5 and 25
                                                                     "opacity":0.3,
                                                                     "color": 'Black' if x['properties']["Max"] in ["non-detect", "http://w3id.org/coso/v1/contaminoso#non-detect"]  else 'DimGray',
                                                                     }),
                           marker_kwds=dict(radius=6),
                           marker_type='circle_marker',
                           popup = True, #["substances", "materials", "Max", "resultCount"],
                           popup_kwds={'max_height':500}
                                ) #
#map

In [None]:
streams.explore(m=map,
                name=f'<span style="color:LightSkyBlue;">Streams</span>',
                color='LightSkyBlue',
                popup=['streamName', 'fl_type', 'downstream_flowline'],
                popup_kwds={'max_width':350}
                )
print(streams.streamName.unique())

[nan 'Chaffee Brook' 'Kennebec River' 'Whitney Brook' 'Vaughn Brook'
 'Jug Stream' 'Rolling Dam Brook' 'Morton Brook']


In [None]:
#map = folium.Map()
c = 0
colors = ['Purple', 'PaleVioletRed', 'Orchid', 'Fuchsia', 'MediumVioletRed', 'HotPink', 'LightPink', 'red', 'lightred', 'pink', 'orange',
          'MidnightBlue', 'MediumBlue', 'SlateBlue', 'MediumSlateBlue', 'DodgerBlue', 'DeepSkyBlue', 'SkyBlue', 'CadetBlue', 'DarkCyan', 'LightSeaGreen',
          'MediumSageGreen', 'lightblue', 'gray', 'blue', 'darkred', 'lightgreen', 'green', 'darkblue', 'darkpurple', 'cadetblue', 'orange', 'lightgray', 'darkgreen']
for industry in list(facilities.industryName.unique()):
  #print(industry)
  #print(facilities[facilities['industryName']== industry].info())
  facilities[facilities['industryName']== industry].explore(m=map,
                                                            name=f'<span style="color:{colors[c]};">{industry}</span>',
                                                            color=colors[c],
                                                            marker_kwds=dict(radius=3),
                                                            popup=['facility', 'facilityName', 'industryName'])
  c += 1

# Map

In [None]:
# @title
samplepoints.set_crs(epsg=4326, inplace=True, allow_override=True)
facilities.set_crs(epsg=4326, inplace=True, allow_override=True)

folium.LayerControl(collapsed=False).add_to(map)
fig = Figure(width='100%', height=900)
fig.add_child(map)


In [None]:
from datetime import date

today = date.today()
fig.save(f'SAWGraph-demo_{today}_DownstreamTracing_{regionCode}_{"".join(industry.split())}.html')

#fig.save(f'SAWGraph-demo_Tracing_Downstream.html')