In [1]:
# Import libraries
import pandas as pd
import numpy as np
import datetime as dt

In [2]:
# Import Kateri's processed water quality data
df = pd.read_csv("../../data/raw/ul_data_wqp_processed_2020-03-05.csv")

In [3]:
# Selecting relevant columns
df = df.loc[:,["MonitoringLocationIdentifier",'MonitoringLocationName', "datetime",
               "CharacteristicName", "ResultMeasureValue",
               "LatitudeMeasure", "LongitudeMeasure"]].copy()

In [4]:
# Transform table
wq = df.pivot_table("ResultMeasureValue", ['MonitoringLocationIdentifier','MonitoringLocationName',
                                           'datetime',
                                           "LatitudeMeasure", "LongitudeMeasure"],'CharacteristicName') 
wq.columns.name = ''
# wq = wq.reset_index()
# wq.datetime = pd.to_datetime(wq['datetime'])

In [5]:
# Take average to get daily values and get rid of hourly variation for some monitoring locations
# wq = wq.groupby(['MonitoringLocationIdentifier',
#                  pd.Grouper(key='datetime', freq='D')])[wq.columns[5:]].mean()

In [6]:
# Make datetime column datetime type
wq = wq.reset_index()
wq.datetime = pd.to_datetime(wq['datetime'])

In [7]:
# Select rows from 2016
wq_16 = wq.loc[wq['datetime'] > '2015-12-31']

In [8]:
wq_16.head()

Unnamed: 0,MonitoringLocationIdentifier,MonitoringLocationName,datetime,LatitudeMeasure,LongitudeMeasure,"Alkalinity, total",Aluminum,Ammonia-nitrogen,Arsenic,Barium,...,Total Kjeldahl nitrogen,Total dissolved solids,Total fixed solids,Total suspended solids,Total volatile solids,Turbidity,Volatile suspended solids,Weather condition (WMO code 4501) (choice list),Zinc,pH
2,USGS-401327111462601,UTAH LAKE HAB STUDY SITE 3,2016-08-10 13:05:00,40.224119,-111.773939,163.0,,,,,...,,1037.386667,,,,110.0,,,,
3,USGS-401432111454301,UTAH LAKE HAB STUDY SITE 4,2016-08-10 15:15:00,40.242311,-111.761811,165.0,,,,,...,,1034.04,,,,74.0,,,,
4,USGS-401613111463301,UTAH LAKE HAB STUDY SITE 1,2016-08-10 10:15:00,40.270319,-111.775881,164.0,,,,,...,,980.683333,,,,100.0,,,,
5,USGS-401658111491601,UTAH LAKE HAB STUDY SITE 2,2016-08-10 11:30:00,40.2827,-111.8212,166.0,,,,,...,,1050.723333,,,,59.0,,,,
13,UTAHDWQ_WQX-4917305,Utah Lake at American Fork Marina near boat ramp,2016-09-26 12:15:00,40.34238,-111.800839,,,,,,...,,,,,,,,,,


In [9]:
# Select variables of interest
pred = wq_16.loc[:,["MonitoringLocationIdentifier", "MonitoringLocationName",
                    "datetime",
                    "LatitudeMeasure", "LongitudeMeasure",
                    "Depth, Secchi disk depth", "Turbidity",
                    "Total suspended solids",
                    "Chlorophyll a, uncorrected for pheophytin", 
                     "Chlorophyll a, corrected for pheophytin",
                    "Chlorophyll a, free of pheophytin"]].copy()

# Secchi disk depth

In [10]:
# Extract chlorophyll values
depth = pred.loc[:,["MonitoringLocationIdentifier", "MonitoringLocationName","datetime",
                    "LatitudeMeasure", "LongitudeMeasure","Depth, Secchi disk depth"]]

In [11]:
depth.head(50)

Unnamed: 0,MonitoringLocationIdentifier,MonitoringLocationName,datetime,LatitudeMeasure,LongitudeMeasure,"Depth, Secchi disk depth"
2,USGS-401327111462601,UTAH LAKE HAB STUDY SITE 3,2016-08-10 13:05:00,40.224119,-111.773939,
3,USGS-401432111454301,UTAH LAKE HAB STUDY SITE 4,2016-08-10 15:15:00,40.242311,-111.761811,
4,USGS-401613111463301,UTAH LAKE HAB STUDY SITE 1,2016-08-10 10:15:00,40.270319,-111.775881,
5,USGS-401658111491601,UTAH LAKE HAB STUDY SITE 2,2016-08-10 11:30:00,40.2827,-111.8212,
13,UTAHDWQ_WQX-4917305,Utah Lake at American Fork Marina near boat ramp,2016-09-26 12:15:00,40.34238,-111.800839,
14,UTAHDWQ_WQX-4917305,Utah Lake at American Fork Marina near boat ramp,2016-10-03 10:15:00,40.34238,-111.800839,
15,UTAHDWQ_WQX-4917305,Utah Lake at American Fork Marina near boat ramp,2017-05-16 12:47:00,40.34238,-111.800839,
16,UTAHDWQ_WQX-4917305,Utah Lake at American Fork Marina near boat ramp,2017-06-12 19:50:00,40.34238,-111.800839,
17,UTAHDWQ_WQX-4917305,Utah Lake at American Fork Marina near boat ramp,2017-07-04 14:15:00,40.34238,-111.800839,
18,UTAHDWQ_WQX-4917305,Utah Lake at American Fork Marina near boat ramp,2017-07-10 17:00:00,40.34238,-111.800839,


In [12]:
# rename columns
depth = depth.rename(columns={'MonitoringLocationIdentifier':'mlid',
                              'MonitoringLocationName':'location',
                              'LatitudeMeasure':'lat',
                              'LongitudeMeasure':'long',
                              'Depth, Secchi disk depth':'secchi_depth_meters'
                             }, inplace=False)

In [13]:
# Check column types
depth.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 611 entries, 2 to 1242
Data columns (total 6 columns):
mlid                   611 non-null object
location               611 non-null object
datetime               611 non-null datetime64[ns]
lat                    611 non-null float64
long                   611 non-null float64
secchi_depth_meters    230 non-null float64
dtypes: datetime64[ns](1), float64(3), object(2)
memory usage: 33.4+ KB


In [14]:
# Change date format
depth['datetime'] = depth['datetime'].dt.strftime('%Y%m%d')

In [15]:
# Export 
depth.to_csv("../../data/processed/secchi_depth.csv", index=False)

In [None]:
#take mean for same location on same day????????