This notebook generates maps for the UI mockup answering the following 3 questions:
* Q1: What samples are near certain types of facilities (e.g. water suppy systems, waste disposal sites, etc.)?

# Setup


In [None]:
%%capture
!pip install mapclassify --upgrade --quiet
!pip install SPARQLWrapper --upgrade --quiet
!pip install rdflib

In [None]:
#from branca.element import Figure                                  # For controlling the size of the final map
import folium                                                      # For map layer control
import geopandas as gpd                                            # For geospatial dataframes
import pandas as pd                                                # For dataframes
from shapely import wkt                                            # For working with WKT coordinates in a GeoDataFrame
from SPARQLWrapper import SPARQLWrapper2, JSON, GET, POST, DIGEST   # For querying SPARQL endpoints
import rdflib                                                      # For working with URIs

def convertToDataframe(results):
  d = []
  for x in results.bindings:
        row = {}
        for k in x:
            v = x[k]
            vv = rdflib.term.Literal(v.value, datatype=v.datatype).toPython()  # type: ignore[no-untyped-call]
            row[k] = vv
        d.append(row)
  df = pd.DataFrame(d)
  return df

#def convertS2ListToQueryString(s2):
#  s2list = s2['s2cell'].tolist()
#  s2list = [s2.replace("http://stko-kwg.geog.ucsb.edu/lod/resource/","kwgr:") for s2 in s2list]
#  s2_values_string = " ".join(s2list)
#  return s2_values_string


In [None]:
#for interactive widgets
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets
from IPython.display import display
import numpy as np

In [None]:
server = 'https://frink.apps.renci.org/' # @param ['https://frink.apps.renci.org/', 'https://gdb.acg.maine.edu:7201/repositories/']

In [None]:
# @title
endpointGET = f'{server}/federation/sparql'


sparqlGET = SPARQLWrapper2(endpointGET)
sparqlGET.setHTTPAuth(DIGEST)
#sparqlGET.setCredentials('sawgraph-endpoint', 'skailab')
sparqlGET.setMethod(POST)
sparqlGET.setReturnFormat(JSON)

# Q1 - What samples in Maine are near a certain types of facilities (by NAICS Industry or Group Code)?


## Industry Selection

In [None]:
#Get list of PFAS industries
q1 = ''' PREFIX fs: <https://www.compliancequest.com/training-management-software-system-solutions/>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX spatial: <http://purl.org/spatialai/spatial/spatial-full#>
PREFIX dcterms: <http://purl.org/dc/terms/>
PREFIX geo: <http://www.opengis.net/ont/geosparql#>
PREFIX sockg: <https://idir.uta.edu/sockg-ontology#>
PREFIX kwg-ont: <http://stko-kwg.geog.ucsb.edu/lod/ontology/>
PREFIX fio: <http://w3id.org/fio/v1/fio#>
PREFIX naics: <http://w3id.org/fio/v1/naics#>
PREFIX fio-pfas:  <http://w3id.org/fio/v1/pfas#>

SELECT DISTINCT ?NAICS ?industryGroup ?industrySector
WHERE {
  ?pfasList fio:hasMember ?industryCode;
                      rdfs:subClassOf fio-pfas:IndustryCollectionByPFASContaminationConcern.
  ?industryCode rdfs:label ?industry;
                fio:subcodeOf ?industryG.
  ?industryG rdf:type naics:NAICS-IndustryGroup;
                    rdfs:label ?industryGroup;
                    dcterms:identifier ?NAICS.
  ?industryG fio:subcodeOf ?industryS.
  ?industryS rdf:type naics:NAICS-IndustrySector;
          rdfs:label ?industrySector.
} ORDER BY ?industrySector
'''
sparqlGET.setQuery(q1)
industry_result = sparqlGET.query()
PFASindustries = convertToDataframe(industry_result)
#print(PFASindustries.info())
# @title

print("Please select an industry that is on some list of PFAS-handling industries.")

Dropdown_ = widgets.Dropdown(
    options=PFASindustries['NAICS']+" (" +  PFASindustries['industrySector']+": "+PFASindustries['industryGroup']+") ",
    description='Industry:',
)
output = widgets.Output()

def on_change(change):
  industry = change['new']


Dropdown_.observe(on_change, names='value')
display(Dropdown_)

Please select an industry that is on some list of PFAS-handling industries.


Dropdown(description='Industry:', options=('5617 (Administrative and Support and Waste Management and Remediat…

In [None]:
# @title
#industry = "5622 (Waste Treatment and Disposal)" # @param ["5622 (Waste Treatment and Disposal)","3222 (Converted Paper Manufacturing)", "221310 (Water Supply and Irrigation)", "221320 (Sewage Treatment)","3261 (Plastics Product Manufacturing)","3133 (Textile and Fabric Finishing and Coating)","3251 (Basic Chemical Manufacturing)","3255 (Paint, Coating, and Adhesive Manufacturing)", "3364 (Aerospace Product and Parts)","812320 (Drycleaning and Laundry Services)", "561740 (Carpet and Upholstery Cleaning Services)"]{"allow-input":true}
industry = Dropdown_.value

icode = industry.split()[0]
print(icode)

3255


## Queries

### Facilities and their location

In [None]:
# @title
#WRetrieve facility details
q2 = '''
PREFIX geo: <http://www.opengis.net/ont/geosparql#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX naics: <http://w3id.org/fio/v1/naics#>
PREFIX spatial: <http://purl.org/spatialai/spatial/spatial-full#>
PREFIX kwgr: <http://stko-kwg.geog.ucsb.edu/lod/resource/>
PREFIX kwg-ont: <http://stko-kwg.geog.ucsb.edu/lod/ontology/>
PREFIX coso: <http://w3id.org/coso/v1/contaminoso#>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX fio: <http://w3id.org/fio/v1/fio#>

#Where are landfills or dod facilities

select DISTINCT ?facility ?facWKT ?facilityName ?industry ?industryName where {
        ?facility fio:ofIndustry ?industryGroup;
            fio:ofIndustry ?industryCode ;
        		geo:hasGeometry/geo:asWKT ?facWKT;
        		rdfs:label ?facilityName.
        ?industryCode a naics:NAICS-IndustryCode;  # only NAICS specific industry codes
            fio:subcodeOf ?industryGroup ;
            rdfs:label ?industryName.
        '''
if len(icode)>4:
  q2 += 'VALUES ?industryCode {naics:NAICS-' + str(icode) + '}.'
else:
  q2 += 'VALUES ?industryGroup {naics:NAICS-' + str(icode) + '}.'

q2 += '''
    #}

}
'''

#WRetrieve S2 cells
q3 = '''
PREFIX geo: <http://www.opengis.net/ont/geosparql#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX naics: <http://w3id.org/fio/v1/naics#>
PREFIX spatial: <http://purl.org/spatialai/spatial/spatial-full#>
PREFIX kwgr: <http://stko-kwg.geog.ucsb.edu/lod/resource/>
PREFIX kwg-ont: <http://stko-kwg.geog.ucsb.edu/lod/ontology/>
PREFIX coso: <http://w3id.org/coso/v1/contaminoso#>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX fio: <http://w3id.org/fio/v1/fio#>

#Where are landfills or dod facilities

select DISTINCT ?s2cell where {
        #find facilities
        ?s2cell rdf:type kwg-ont:S2Cell_Level13 ;
                kwg-ont:sfContains ?facility.
            ?facility fio:ofIndustry ?industryGroup;
            fio:ofIndustry ?industryCode.
        ?industryCode a naics:NAICS-IndustryCode;  # only NAICS specific industry codes
            fio:subcodeOf ?industryGroup.
        '''
if len(icode)>4:
  q3 += 'VALUES ?industryCode {naics:NAICS-' + str(icode) + '}.'
else:
  q3 += 'VALUES ?industryGroup {naics:NAICS-' + str(icode) + '}.'

q3 += '''
} GROUP BY ?s2cell
'''

#print(q1)
print(q2)
print(q3)


PREFIX geo: <http://www.opengis.net/ont/geosparql#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX naics: <http://w3id.org/fio/v1/naics#>
PREFIX spatial: <http://purl.org/spatialai/spatial/spatial-full#>
PREFIX kwgr: <http://stko-kwg.geog.ucsb.edu/lod/resource/>
PREFIX kwg-ont: <http://stko-kwg.geog.ucsb.edu/lod/ontology/>
PREFIX coso: <http://w3id.org/coso/v1/contaminoso#>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX fio: <http://w3id.org/fio/v1/fio#>

#Where are landfills or dod facilities

select DISTINCT ?facility ?facWKT ?facilityName ?industry ?industryName where {
        ?facility fio:ofIndustry ?industryGroup;
            fio:ofIndustry ?industryCode ;
        		geo:hasGeometry/geo:asWKT ?facWKT;
        		rdfs:label ?facilityName.
        ?industryCode a naics:NAICS-IndustryCode;  # only NAICS specific industry codes
            fio:subcodeOf ?industryGroup ;
            rdfs:label ?industryName.
        VALUES ?industryGroup {naics:NAICS-325

In [None]:
sparqlGET.setQuery(q2)
facility_result = sparqlGET.query()
facilities = convertToDataframe(facility_result)
#print(facilities.info())
facilities

Unnamed: 0,facility,facWKT,facilityName,industryName
0,http://w3id.org/fio/v1/epa-frs-data#d.FRS-Faci...,POINT(-72.555437 42.161908),RANDOLPH PRODUCTS CO,Paint and Coating Manufacturing
1,http://w3id.org/fio/v1/epa-frs-data#d.FRS-Faci...,POINT(-72.725830 42.125550),RPM WOOD FINISHES GROUP INC,Paint and Coating Manufacturing
2,http://w3id.org/fio/v1/epa-frs-data#d.FRS-Faci...,POINT(-71.869697 42.552405),ADVANCE COATINGS COMPANY,Paint and Coating Manufacturing
3,http://w3id.org/fio/v1/epa-frs-data#d.FRS-Faci...,POINT(-71.068120 42.768560),CF JAMESON,Paint and Coating Manufacturing
4,http://w3id.org/fio/v1/epa-frs-data#d.FRS-Faci...,POINT(-71.178550 42.687510),KEY POLYMER LLC,Adhesive Manufacturing
...,...,...,...,...
2864,http://w3id.org/fio/v1/epa-frs-data#d.FRS-Faci...,POINT(-74.536727 40.544014),AKZO NOBEL COATINGS INC,Paint and Coating Manufacturing
2865,http://w3id.org/fio/v1/epa-frs-data#d.FRS-Faci...,POINT(-85.138995 39.637537),"C P, INC",Paint and Coating Manufacturing
2866,http://w3id.org/fio/v1/epa-frs-data#d.FRS-Faci...,POINT(-74.132490 40.739640),BENJAMIN MOORE & CO.,Paint and Coating Manufacturing
2867,http://w3id.org/fio/v1/epa-frs-data#d.FRS-Faci...,POINT(-86.284573 39.907879),IDEM NOT FOUND,Adhesive Manufacturing


In [None]:
sparqlGET.setQuery(q3)
s2_result = sparqlGET.query()
s2 = convertToDataframe(s2_result)
#print(facilities.info())
s2

Unnamed: 0,s2cell
0,http://stko-kwg.geog.ucsb.edu/lod/resource/s2....
1,http://stko-kwg.geog.ucsb.edu/lod/resource/s2....
2,http://stko-kwg.geog.ucsb.edu/lod/resource/s2....
3,http://stko-kwg.geog.ucsb.edu/lod/resource/s2....
4,http://stko-kwg.geog.ucsb.edu/lod/resource/s2....
...,...
2385,http://stko-kwg.geog.ucsb.edu/lod/resource/s2....
2386,http://stko-kwg.geog.ucsb.edu/lod/resource/s2....
2387,http://stko-kwg.geog.ucsb.edu/lod/resource/s2....
2388,http://stko-kwg.geog.ucsb.edu/lod/resource/s2....


### Neighborhood around facilities

In [None]:
# @title
#s2list = s2['s2cell'].tolist()
#s2list = [s2.replace("http://stko-kwg.geog.ucsb.edu/lod/resource/","kwgr:") for s2 in s2list]
#s2_values_string = " ".join(s2list)
#print(s2_values_string)

q4 = '''
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX owl: <http://www.w3.org/2002/07/owl#>

PREFIX spatial: <http://purl.org/spatialai/spatial/spatial-full#>
PREFIX kwg-ont: <http://stko-kwg.geog.ucsb.edu/lod/ontology/>
PREFIX kwgr: <http://stko-kwg.geog.ucsb.edu/lod/resource/>
PREFIX fio: <http://w3id.org/fio/v1/fio#>
PREFIX geo: <http://www.opengis.net/ont/geosparql#>
PREFIX naics: <http://w3id.org/fio/v1/naics#>
PREFIX coso: <http://w3id.org/coso/v1/contaminoso#>

SELECT ?s2neighbor WHERE {
  {select DISTINCT ?s2cell where {
        #find facilities
        ?s2cell rdf:type kwg-ont:S2Cell_Level13 ;
                kwg-ont:sfContains ?facility.
            ?facility fio:ofIndustry ?industryGroup;
            fio:ofIndustry ?industryCode ;
        		rdfs:label ?facilityName.
        ?industryCode a naics:NAICS-IndustryCode;  # only NAICS specific industry codes
            fio:subcodeOf ?industryGroup ;
            rdfs:label ?industryName.
        VALUES ?industryGroup {naics:NAICS-''' + str(icode) + '''}.
        } GROUP BY ?s2cell}
  ?s2neighbor kwg-ont:sfTouches ?s2cell.
 }'''

# don't need to run this unless S2 cell geometries are desired.

#sparqlGET.setQuery(q4)
#s2_filtered_result = sparqlGET.query()
#s2_filtered = convertToDataframe(s2_filtered_result)
#s2_filtered


### Samples taken in the neighborhood

In [None]:
# Query just the PFAS graph for samples that match the S2 cells returned from the FIO graph
q5='''
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>

PREFIX geo: <http://www.opengis.net/ont/geosparql#>
PREFIX spatial: <http://purl.org/spatialai/spatial/spatial-full#>
PREFIX kwg-ont: <http://stko-kwg.geog.ucsb.edu/lod/ontology/>
PREFIX kwgr: <http://stko-kwg.geog.ucsb.edu/lod/resource/>
PREFIX fio: <http://w3id.org/fio/v1/fio#>
PREFIX naics: <http://w3id.org/fio/v1/naics#>
PREFIX coso: <http://w3id.org/coso/v1/contaminoso#>
PREFIX qudt: <http://qudt.org/schema/qudt/>

SELECT  DISTINCT (COUNT(DISTINCT ?observation) as ?resultCount) (MAX(?numericResult) as ?max) (GROUP_CONCAT(DISTINCT ?subVal; separator="</br>") as ?results) (GROUP_CONCAT(DISTINCT ?datedSubVal; separator="</br>") as ?datedresults) (GROUP_CONCAT(?year; separator=" </br> ") as ?dates) (GROUP_CONCAT(DISTINCT ?Typelabels; separator=";") as ?Type) (GROUP_CONCAT(DISTINCT ?material) as ?Materials) ?sp ?spName ?spWKT
WHERE {

      {SELECT DISTINCT ?s2neighbor WHERE {
        #find facilities
        ?s2cell rdf:type kwg-ont:S2Cell_Level13 ;
                kwg-ont:sfContains ?facility.
            ?facility fio:ofIndustry ?industryGroup;
            fio:ofIndustry ?industryCode.
        ?industryCode a naics:NAICS-IndustryCode;  # only NAICS specific industry codes
            fio:subcodeOf ?industryGroup ;
            rdfs:label ?industryName.
        VALUES ?industryGroup {naics:NAICS-'''+str(icode)+'''}.
        ?s2neighbor kwg-ont:sfTouches|owl:sameAs ?s2cell.
        } }

     ?sp rdf:type coso:SamplePoint;
        spatial:connectedTo ?s2neighbor ;
        rdfs:label ?spName;
        geo:hasGeometry/geo:asWKT ?spWKT.
    ?observation rdf:type coso:ContaminantObservation;
        coso:observedAtSamplePoint ?sp;
        coso:ofSubstance ?substance1 ;
        coso:observedTime ?time ;
          coso:analyzedSample ?sample ;
        coso:hasResult ?result .
    ?sample rdfs:label ?sampleLabel;
      coso:sampleOfMaterialType/rdfs:label ?material.
  {SELECT ?sample (GROUP_CONCAT(DISTINCT ?sampleClassLabel; separator=";") as ?Typelabels) WHERE{
      ?sample a ?sampleClass.
  	?sampleClass rdfs:label ?sampleClassLabel.
    VALUES ?sampleClass {coso:WaterSample coso:AnimalMaterialSample coso:PlantMaterialSample coso:SolidMaterialSample}
    } GROUP BY ?sample }
    ?result coso:measurementValue ?result_value;
        coso:measurementUnit ?unit .
  OPTIONAL {?result qudt:quantityValue/qudt:numericValue ?numericResult}
    ?substance1 rdfs:label ?substance.
    ?unit qudt:symbol ?unit_sym.
  BIND(SUBSTR(?time, 1, 7) as ?year) #just year and month
  BIND(CONCAT('<b>',str(?result_value), '</b>', " ", ?unit_sym, " ", ?substance) as ?subVal)  # results with units
  BIND(CONCAT(?year, ' <b> ',str(?result_value), '</b>', " ", ?unit_sym, " ", ?substance) as ?datedSubVal) # result with simple date and units
  }  GROUP BY ?sp ?spName ?spWKT
  order by DESC(?time)

  '''

sparqlGET.setQuery(q5)
samplepoint_result = sparqlGET.query()
samplepoints = convertToDataframe(samplepoint_result)


print(q5)



PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>

PREFIX geo: <http://www.opengis.net/ont/geosparql#>
PREFIX spatial: <http://purl.org/spatialai/spatial/spatial-full#>
PREFIX kwg-ont: <http://stko-kwg.geog.ucsb.edu/lod/ontology/>
PREFIX kwgr: <http://stko-kwg.geog.ucsb.edu/lod/resource/>
PREFIX fio: <http://w3id.org/fio/v1/fio#>
PREFIX naics: <http://w3id.org/fio/v1/naics#>
PREFIX coso: <http://w3id.org/coso/v1/contaminoso#>
PREFIX qudt: <http://qudt.org/schema/qudt/>

SELECT  DISTINCT (COUNT(DISTINCT ?observation) as ?resultCount) (MAX(?numericResult) as ?max) (GROUP_CONCAT(DISTINCT ?subVal; separator="</br>") as ?results) (GROUP_CONCAT(DISTINCT ?datedSubVal; separator="</br>") as ?datedresults) (GROUP_CONCAT(?year; separator=" </br> ") as ?dates) (GROUP_CONCAT(DISTINCT ?Typelabel

In [None]:
samplepoints.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 133 entries, 0 to 132
Data columns (total 10 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   resultCount   133 non-null    int64 
 1   max           133 non-null    object
 2   results       133 non-null    object
 3   Type          133 non-null    object
 4   Materials     133 non-null    object
 5   sp            133 non-null    object
 6   spName        133 non-null    object
 7   spWKT         133 non-null    object
 8   datedresults  12 non-null     object
 9   dates         12 non-null     object
dtypes: int64(1), object(9)
memory usage: 10.5+ KB


## Prep data for mapping

In [None]:
samplepoints['spWKT'] = samplepoints['spWKT'].apply(wkt.loads)
samplepoints = gpd.GeoDataFrame(samplepoints, geometry='spWKT')

samplepoints.set_crs(epsg=4326, inplace=True, allow_override=True)


Unnamed: 0,resultCount,max,results,Type,Materials,sp,spName,spWKT,datedresults,dates
0,1,0.18,<b>0.18</b> ng/L 2H-Perfluorohexanoic acid,Water Sample,Water,https://geoconnex.us/iow/wqp/AZDEQ_WPD-55-5855...,WCP-82 (AZDEQ_WPD-55-585575_61817) site data i...,POINT (-112.15047 33.50223),,
1,1,0.332,<b>0.332</b> ng/L 5H-Perfluorohexanesulfonic acid,Water Sample,Water,https://geoconnex.us/iow/wqp/AZDEQ_WPD-55-9224...,24MW-21M (AZDEQ_WPD-55-922406_81736) site data...,POINT (-112.052 33.47002),,
2,1,0.623,<b>0.623</b> ng/L 2H-Perfluorohexanoic acid,Water Sample,Water,https://geoconnex.us/iow/wqp/AZDEQ_WPD-55-9220...,MVH-16d (AZDEQ_WPD-55-922050_81642) site data ...,POINT (-112.48003 34.55376),,
3,1,0.773,<b>0.773</b> ng/L 5H-Perfluorohexanesulfonic acid,Water Sample,Water,https://geoconnex.us/iow/wqp/AZDEQ_WPD-55-2295...,LHH-05 (AZDEQ_WPD-55-229557_81743) site data i...,POINT (-114.35992 34.50295),,
4,1,2.5,<b>2.5</b> ng/L 2H-Perfluorohexanoic acid,Water Sample,Water,https://geoconnex.us/iow/wqp/AZDEQ_WPD-55-2294...,MW-11R (AZDEQ_WPD-55-229483_81752) site data i...,POINT (-112.04795 33.50381),,
...,...,...,...,...,...,...,...,...,...,...
128,137,330.0,<b>18</b> ng/L PERFLUOROPENTANOIC ACID</br><b>...,Water Sample,WASTE WATER,http://w3id.org/sawgraph/v1/me-egad-data#sampl...,EGAD sample point 150513,POINT (-70.33891 43.68824),2023-06 <b> 18</b> ng/L PERFLUOROPENTANOIC ACI...,2023-06 </br> 2023-02 </br> 2022-10 </br> 2023...
129,147,39.6,<b>7.96</b> ng/L PERFLUOROOCTANOIC ACID-LINEAR...,Water Sample,WASTE WATER,http://w3id.org/sawgraph/v1/me-egad-data#sampl...,EGAD sample point 150036,POINT (-70.20676 44.07472),2022-11 <b> 7.96</b> ng/L PERFLUOROOCTANOIC AC...,2022-11 </br> 2022-11 </br> 2023-07 </br> 2023...
130,154,103.0,<b>28.8</b> ng/L PERFLUOROHEXANOIC ACID</br><b...,Water Sample,WASTE WATER,http://w3id.org/sawgraph/v1/me-egad-data#sampl...,EGAD sample point 149584,POINT (-70.34116 43.68818),2023-04 <b> 28.8</b> ng/L PERFLUOROHEXANOIC AC...,2023-04 </br> 2023-03 </br> 2023-05 </br> 2023...
131,210,6.1,<b>2.24</b> Î¼g/L Perfluorooctanesulfonate</br...,Water Sample,Water,https://geoconnex.us/iow/wqp/MNPCA-MN0001449-S...,Non-Contact Cooling Water (MNPCA-MN0001449-SD0...,POINT (-92.90393 44.78816),,


In [None]:
facilities['facWKT'] = facilities['facWKT'].apply(wkt.loads)
facilities = gpd.GeoDataFrame(facilities, geometry='facWKT')

facilities.set_crs(epsg=4326, inplace=True, allow_override=True)

Unnamed: 0,facility,facWKT,facilityName,industryName
0,http://w3id.org/fio/v1/epa-frs-data#d.FRS-Faci...,POINT (-72.55544 42.16191),RANDOLPH PRODUCTS CO,Paint and Coating Manufacturing
1,http://w3id.org/fio/v1/epa-frs-data#d.FRS-Faci...,POINT (-72.72583 42.12555),RPM WOOD FINISHES GROUP INC,Paint and Coating Manufacturing
2,http://w3id.org/fio/v1/epa-frs-data#d.FRS-Faci...,POINT (-71.8697 42.5524),ADVANCE COATINGS COMPANY,Paint and Coating Manufacturing
3,http://w3id.org/fio/v1/epa-frs-data#d.FRS-Faci...,POINT (-71.06812 42.76856),CF JAMESON,Paint and Coating Manufacturing
4,http://w3id.org/fio/v1/epa-frs-data#d.FRS-Faci...,POINT (-71.17855 42.68751),KEY POLYMER LLC,Adhesive Manufacturing
...,...,...,...,...
2864,http://w3id.org/fio/v1/epa-frs-data#d.FRS-Faci...,POINT (-74.53673 40.54401),AKZO NOBEL COATINGS INC,Paint and Coating Manufacturing
2865,http://w3id.org/fio/v1/epa-frs-data#d.FRS-Faci...,POINT (-85.139 39.63754),"C P, INC",Paint and Coating Manufacturing
2866,http://w3id.org/fio/v1/epa-frs-data#d.FRS-Faci...,POINT (-74.13249 40.73964),BENJAMIN MOORE & CO.,Paint and Coating Manufacturing
2867,http://w3id.org/fio/v1/epa-frs-data#d.FRS-Faci...,POINT (-86.28457 39.90788),IDEM NOT FOUND,Adhesive Manufacturing


In [None]:
facilities['url']= '<a href="' + facilities.facility+ '">' + facilities.facility + "</a>"
facilities['industryName'] = facilities['industryName'].str.strip()
facilities.drop(columns=['facility'], inplace=True)
facilities.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 2869 entries, 0 to 2868
Data columns (total 4 columns):
 #   Column        Non-Null Count  Dtype   
---  ------        --------------  -----   
 0   facWKT        2869 non-null   geometry
 1   facilityName  2869 non-null   object  
 2   industryName  2869 non-null   object  
 3   url           2869 non-null   object  
dtypes: geometry(1), object(3)
memory usage: 89.8+ KB


In [None]:
samplepoints['max'] = samplepoints['max'].astype(float)
samplepoints['url']= '<a href="'+ samplepoints.sp+'">'+samplepoints.sp+'</a>'
samplepoints.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 133 entries, 0 to 132
Data columns (total 11 columns):
 #   Column        Non-Null Count  Dtype   
---  ------        --------------  -----   
 0   resultCount   133 non-null    int64   
 1   max           133 non-null    float64 
 2   results       133 non-null    object  
 3   Type          133 non-null    object  
 4   Materials     133 non-null    object  
 5   sp            133 non-null    object  
 6   spName        133 non-null    object  
 7   spWKT         133 non-null    geometry
 8   datedresults  12 non-null     object  
 9   dates         12 non-null     object  
 10  url           133 non-null    object  
dtypes: float64(1), geometry(1), int64(1), object(8)
memory usage: 11.6+ KB


In [None]:
samplepoints['Result'] = samplepoints['datedresults'].fillna(samplepoints['results'])

In [None]:
samplepoints

Unnamed: 0,resultCount,max,results,Type,Materials,sp,spName,spWKT,datedresults,dates,url,Result
0,1,0.180,<b>0.18</b> ng/L 2H-Perfluorohexanoic acid,Water Sample,Water,https://geoconnex.us/iow/wqp/AZDEQ_WPD-55-5855...,WCP-82 (AZDEQ_WPD-55-585575_61817) site data i...,POINT (-112.15047 33.50223),,,"<a href=""https://geoconnex.us/iow/wqp/AZDEQ_WP...",<b>0.18</b> ng/L 2H-Perfluorohexanoic acid
1,1,0.332,<b>0.332</b> ng/L 5H-Perfluorohexanesulfonic acid,Water Sample,Water,https://geoconnex.us/iow/wqp/AZDEQ_WPD-55-9224...,24MW-21M (AZDEQ_WPD-55-922406_81736) site data...,POINT (-112.052 33.47002),,,"<a href=""https://geoconnex.us/iow/wqp/AZDEQ_WP...",<b>0.332</b> ng/L 5H-Perfluorohexanesulfonic acid
2,1,0.623,<b>0.623</b> ng/L 2H-Perfluorohexanoic acid,Water Sample,Water,https://geoconnex.us/iow/wqp/AZDEQ_WPD-55-9220...,MVH-16d (AZDEQ_WPD-55-922050_81642) site data ...,POINT (-112.48003 34.55376),,,"<a href=""https://geoconnex.us/iow/wqp/AZDEQ_WP...",<b>0.623</b> ng/L 2H-Perfluorohexanoic acid
3,1,0.773,<b>0.773</b> ng/L 5H-Perfluorohexanesulfonic acid,Water Sample,Water,https://geoconnex.us/iow/wqp/AZDEQ_WPD-55-2295...,LHH-05 (AZDEQ_WPD-55-229557_81743) site data i...,POINT (-114.35992 34.50295),,,"<a href=""https://geoconnex.us/iow/wqp/AZDEQ_WP...",<b>0.773</b> ng/L 5H-Perfluorohexanesulfonic acid
4,1,2.500,<b>2.5</b> ng/L 2H-Perfluorohexanoic acid,Water Sample,Water,https://geoconnex.us/iow/wqp/AZDEQ_WPD-55-2294...,MW-11R (AZDEQ_WPD-55-229483_81752) site data i...,POINT (-112.04795 33.50381),,,"<a href=""https://geoconnex.us/iow/wqp/AZDEQ_WP...",<b>2.5</b> ng/L 2H-Perfluorohexanoic acid
...,...,...,...,...,...,...,...,...,...,...,...,...
128,137,330.000,<b>18</b> ng/L PERFLUOROPENTANOIC ACID</br><b>...,Water Sample,WASTE WATER,http://w3id.org/sawgraph/v1/me-egad-data#sampl...,EGAD sample point 150513,POINT (-70.33891 43.68824),2023-06 <b> 18</b> ng/L PERFLUOROPENTANOIC ACI...,2023-06 </br> 2023-02 </br> 2022-10 </br> 2023...,"<a href=""http://w3id.org/sawgraph/v1/me-egad-d...",2023-06 <b> 18</b> ng/L PERFLUOROPENTANOIC ACI...
129,147,39.600,<b>7.96</b> ng/L PERFLUOROOCTANOIC ACID-LINEAR...,Water Sample,WASTE WATER,http://w3id.org/sawgraph/v1/me-egad-data#sampl...,EGAD sample point 150036,POINT (-70.20676 44.07472),2022-11 <b> 7.96</b> ng/L PERFLUOROOCTANOIC AC...,2022-11 </br> 2022-11 </br> 2023-07 </br> 2023...,"<a href=""http://w3id.org/sawgraph/v1/me-egad-d...",2022-11 <b> 7.96</b> ng/L PERFLUOROOCTANOIC AC...
130,154,103.000,<b>28.8</b> ng/L PERFLUOROHEXANOIC ACID</br><b...,Water Sample,WASTE WATER,http://w3id.org/sawgraph/v1/me-egad-data#sampl...,EGAD sample point 149584,POINT (-70.34116 43.68818),2023-04 <b> 28.8</b> ng/L PERFLUOROHEXANOIC AC...,2023-04 </br> 2023-03 </br> 2023-05 </br> 2023...,"<a href=""http://w3id.org/sawgraph/v1/me-egad-d...",2023-04 <b> 28.8</b> ng/L PERFLUOROHEXANOIC AC...
131,210,6.100,<b>2.24</b> Î¼g/L Perfluorooctanesulfonate</br...,Water Sample,Water,https://geoconnex.us/iow/wqp/MNPCA-MN0001449-S...,Non-Contact Cooling Water (MNPCA-MN0001449-SD0...,POINT (-92.90393 44.78816),,,"<a href=""https://geoconnex.us/iow/wqp/MNPCA-MN...",<b>2.24</b> Î¼g/L Perfluorooctanesulfonate</br...


In [None]:
from branca.element import Figure
import math
map = folium.Map(location=[35, -80.95], zoom_start=5, control_scale=True)
folium.features.GeoJson(samplepoints, name=f'<span style="color:DarkOrange;">Sample Points</span>',  marker=folium.CircleMarker(fill_color='DarkOrange', weight=0.8),
                        style_function=lambda x: {"radius": 1 if (x['properties']["max"] is None or math.isnan(x['properties']["max"])) else (x['properties']["max"]/8 if x['properties']["max"] < 160 else 25),"opacity":0.3,"color":'DimGray'},
                        tooltip=folium.GeoJsonTooltip(fields=['spName', 'resultCount', 'max', 'Type', 'Materials']),
                        popup=folium.GeoJsonPopup(fields=['spName', 'url', 'resultCount', 'max', 'Result', 'Type', 'Materials'], max_height=500, min_width=400, max_width=800),
                        ).add_to(map)

<folium.features.GeoJson at 0x79cf54b78f20>

In [None]:
print(facilities.industryName.unique())

['Paint and Coating Manufacturing' 'Adhesive Manufacturing']


In [None]:
c = 0
colors = ['MidnightBlue','Blue','DodgerBlue','DeepSkyBlue','CadetBlue','DarkCyan','LightSeaGreen','MediumSageGreen','SpringGreen','PaleVioletRed','Purple','Fuchsia','HotPink','LightPink']
for industry in list(facilities.industryName.unique()):
  #print(industry)
  #print(facilities[facilities['industryName']== industry].info())
  facilities[facilities['industryName']== industry].explore(m=map,
                                                            name=f'<span style="color:{colors[c]};">{industry}</span>',
                                                            color=colors[c],
                                                            marker_kwds=dict(radius=3),
                                                            popup=True)
  c += 1

## Map

In [None]:
folium.LayerControl(collapsed=False).add_to(map)
fig = Figure(width='100%', height=900)
fig.add_child(map)


In [None]:
from datetime import date

today = date.today()
fig.save(f'SAWGraph-demo_{today}_SamplesNearFacilities_{"".join(industry.split())}.html')