# Charlotte Data Explorer
Reads and plots Charlotte water quality data pulled using the `CharlotteScraper.ipynb` notebook.

In [2]:
#Imports
import glob
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import datetime as dt

## Bokeh map components
from bokeh.models import ColumnDataSource,WMTSTileSource
from bokeh.plotting import figure, show, output_file
from bokeh.tile_providers import *
from bokeh.io import output_notebook
output_notebook()

In [3]:
#Function to convert WGS84 points to Web Mercator
def wgs84_to_web_mercator(df, lon="lon", lat="lat"):
    """Converts decimal longitude/latitude to Web Mercator format"""
    k = 6378137
    df["x"] = df[lon] * (k * np.pi/180.0)
    df["y"] = np.log(np.tan((90 + df[lat]) * np.pi/360.0)) * k
    return df

### Examine Organics data
* Identify and merge all the yearly *result* and *location* data (skip 2015 for now as it has different format)
* Append coordinates information to result records
* Plot, for a given year, violations vs ok

In [4]:
#Get files
locationFiles = glob.glob('../../data/Charlotte/CLTW_WaterQualityUpdates_2018/*Organic Chemicals.csv')
resultFiles = glob.glob('../../data/Charlotte/CLTW_WaterQualityUpdates_2018/Organics Results *.csv')

In [5]:
#Merge result files
dfList = []
for file in resultFiles[1:]:
    df = pd.read_csv(file,index_col="OBJECTID")#,parse_dates=['CollectionDate'])
    #print(str(df.columns.values.tolist()))
    dfList.append(df)
dfResults = pd.concat(dfList,axis='rows')
#Drop the GlobalID column
dfResults.drop("GlobalID",axis=1,inplace=True)
#Add a year column
dfResults['CollectionDate'] = pd.to_datetime(dfResults['CollectionDate'],format='%Y-%m-%d')
dfResults.insert(0,'Year',dfResults['CollectionDate'].dt.year)

In [6]:
#Merge locations files
dfList = []
for file in locationFiles[1:]:
    df = pd.read_csv(file,index_col="OBJECTID")
    df.insert(0,'Year',file.split("\\")[1][:4])
    #print(str(df.columns.values.tolist()))
    dfList.append(df)
dfLocations = pd.concat(dfList)

In [7]:
#Check that RawLocation codes are unique
datesDF = dfLocations.groupby(['RawLocationCode','x','y','Year'])['NPA'].count().unstack('Year')
datesDF.max() #None should be > 1

Year
2016    1.0
2017    1.0
2018    1.0
dtype: float64

In [8]:
#Convert results to numbers, setting trace values to 0
dfResults['Value'] = dfResults['Result'].apply(lambda x: x.split()[0] if (x[0] != '<') else 0)
dfResults['Value'] = dfResults['Value'].astype(np.float)

In [9]:
#Group by analyte and year
dfX = dfResults.groupby(['Year','AnalyteName'])['Value']
dfYear = dfX.mean().unstack('Year')
dfYear.sample(5)

Year,2017,2018
AnalyteName,Unnamed: 1_level_1,Unnamed: 2_level_1
"Cis-1,3-Dichloropropene",0.0,0.0
Tert-Butylbenzene,0.0,0.0
Trichloroethylene,0.0,0.0
Naphthalene,0.0,0.0
"Trans-1,3-Dichloropropene",0.0,0.0


In [10]:
#Examine the row identifier columns
dfLocations[['Year','RawLocationCode','x','y']].head()

Unnamed: 0_level_0,Year,RawLocationCode,x,y
OBJECTID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,2016,WD-E45,-80.878375,35.070923
2,2016,WD-SS-008,-80.785923,35.086359
3,2016,WD-SS-407,-80.7032,35.127483
4,2016,WD-SS-012,-80.840593,35.132097
5,2016,WD-SS-004,-81.018405,35.14578


In [11]:
#Add coordinates to the results by joining the location data
dfSite = pd.merge(dfResults,dfLocations[['RawLocationCode','x','y']],how='left',on=['RawLocationCode'])
#Convert coordinates to web mercator
dfSite.rename({'x':'lon','y':'lat'},axis='columns',inplace=True)
dfSite = wgs84_to_web_mercator(dfSite)
dfSite.head()

Unnamed: 0,Year,Address,AnalyteName,CollectionDate,FullResult,LowerAllowableLimit,ParameterGroup,RawLocationCode,Result,UpperAllowableLimit,Violation,Value,lon,lat,x,y
0,2017,"7980 Babe Stillwell Rd Huntersville, NC 28078","1,2,3-Trichloropropane",2017-02-01,Less than ppb,,Organic Chemicals,WT-N05,< 0.0005 mg/L,0 mg/L,NO,0.0,-80.89332,35.429176,-9005003.0,4222358.0
1,2017,"820 Beatties Ford Rd Charlotte, NC 28216",m-Dichlorobenzene,2017-02-01,Less than ppb,,Organic Chemicals,WT-V03,< 0.0005 mg/L,0 mg/L,NO,0.0,-80.85589,35.250582,-9000836.0,4197987.0
2,2017,"820 Beatties Ford Rd Charlotte, NC 28216",m-Dichlorobenzene,2017-02-01,Less than ppb,,Organic Chemicals,WT-V03,< 0.0005 mg/L,0 mg/L,NO,0.0,-80.85589,35.250582,-9000836.0,4197987.0
3,2017,"820 Beatties Ford Rd Charlotte, NC 28216",m-Dichlorobenzene,2017-02-01,Less than ppb,,Organic Chemicals,WT-V03,< 0.0005 mg/L,0 mg/L,NO,0.0,-80.85589,35.250582,-9000836.0,4197987.0
4,2017,"820 Beatties Ford Rd Charlotte, NC 28216",p-Isopropyltoluene,2017-02-01,Less than ppb,,Organic Chemicals,WT-V03,< 0.0005 mg/L,0 mg/L,NO,0.0,-80.85589,35.250582,-9000836.0,4197987.0


In [12]:
#Create a bokeh CDS from the dataframe
source = ColumnDataSource(dfSite)

In [13]:
#Simple data source
p = figure(title="Map")
p.circle(x='x',y='y',source=source)
show(p)

In [14]:
xMin = dfSite.x.min();xMax = dfSite.x.max()
yMin = dfSite.y.min();yMax = dfSite.y.max()
p = figure(x_range=(xMin, xMax), y_range=(yMin, yMax))#,
           #x_axis_type="mercator", y_axis_type="mercator")
p.add_tile(CARTODBPOSITRON)
p.circle(x='x',y='y',source=source)
show(p)