In [1]:
# Import libraries
import pandas as pd
import numpy as np
import datetime as dt

In [2]:
# Import pre-interpolated data and post-interpolated data
pre = pd.read_csv("../../data/processed/secchi_depth.csv")
post = pd.read_excel("../../data/processed/Secchi_merge.xlsx")

In [3]:
# assign explanatory variables in a list for easy use later

# Data Wrangling

## Secchi depth

In [4]:
pre.head()

Unnamed: 0,mlid,location,datetime,lat,long,secchi_depth_meters
0,USGS-401327111462601,UTAH LAKE HAB STUDY SITE 3,20160810,40.224119,-111.773939,
1,USGS-401432111454301,UTAH LAKE HAB STUDY SITE 4,20160810,40.242311,-111.761811,
2,USGS-401613111463301,UTAH LAKE HAB STUDY SITE 1,20160810,40.270319,-111.775881,
3,USGS-401658111491601,UTAH LAKE HAB STUDY SITE 2,20160810,40.2827,-111.8212,
4,UTAHDWQ_WQX-4917305,Utah Lake at American Fork Marina near boat ramp,20160926,40.34238,-111.800839,


In [5]:
post.head()

Unnamed: 0,OID,OBJECTID_1,COUNT,AREA,MEAN,STD,SUM,X25.,X50.,X75.
0,0,20160420,27254,0.036135,0.326584,0.065224,8900.717586,0.292608,0.326837,0.379791
1,1,20160525,27254,0.036135,0.2,0.0,5450.800081,0.2,0.2,0.2
2,2,20160630,27254,0.036135,0.2,0.0,5450.800081,0.2,0.2,0.2
3,3,20170510,27254,0.036135,0.266793,0.010521,7271.177156,0.256585,0.267754,0.272464
4,4,20170511,27254,0.036135,0.271572,0.011862,7401.413938,0.265434,0.269956,0.278423


In [6]:
# Drop rows with NA values for water quality parameters
pre = pre.dropna(subset=['secchi_depth_meters'])

In [7]:
pre.shape

(230, 6)

In [8]:
pre.datetime.nunique()

38

In [9]:
post.OBJECTID_1.nunique()

33

In [10]:
# Dates for which spatial interpolation is not needed because there is only 1 value
# good answer https://stackoverflow.com/questions/19960077/how-to-filter-pandas-dataframe-using-in-and-not-in-like-in-sql
single = pre[~pre.datetime.isin(post.OBJECTID_1)]
single.head()

Unnamed: 0,mlid,location,datetime,lat,long,secchi_depth_meters
257,UTAHDWQ_WQX-4917450,UTAH LAKE AT MIDDLE OF PROVO BAY,20170918,40.189139,-111.699931,0.2
258,UTAHDWQ_WQX-4917450,UTAH LAKE AT MIDDLE OF PROVO BAY,20171010,40.189139,-111.699931,0.38
260,UTAHDWQ_WQX-4917450,UTAH LAKE AT MIDDLE OF PROVO BAY,20180517,40.189139,-111.699931,0.25
261,UTAHDWQ_WQX-4917450,UTAH LAKE AT MIDDLE OF PROVO BAY,20180611,40.189139,-111.699931,0.1
324,UTAHDWQ_WQX-4917600,UTAH LAKE GOSHEN BAY SOUTHWEST END,20171017,40.060235,-111.874384,0.2


In [11]:
# Append rows that were not used in interpolation
for index, row in single.iterrows():
    new_row = [["n", row.datetime, "", "", row[-1], "", row[-1], row[-1], 
                row[-1], row[-1]]]
    df = pd.DataFrame(new_row,columns=['OID','OBJECTID_1','COUNT','AREA','MEAN','STD','SUM','X25.','X50.','X75.'])
    post = post.append(df,ignore_index=True)

In [12]:
post.tail(10)

Unnamed: 0,OID,OBJECTID_1,COUNT,AREA,MEAN,STD,SUM,X25.,X50.,X75.
28,28,20190617,27254.0,0.0361352,0.650252,0.152027,17721.959504,0.549797,0.630033,0.737957
29,29,20190618,27254.0,0.0361352,0.345823,0.043238,9425.064244,0.305284,0.348735,0.389007
30,30,20190708,27254.0,0.0361352,0.294211,0.0362082,8018.421409,0.270048,0.298638,0.304595
31,31,20190812,27254.0,0.0361352,0.291811,0.0537108,7953.004422,0.273472,0.278827,0.290563
32,32,20190923,27254.0,0.0361352,0.25621,0.0470815,6982.756469,0.220447,0.251533,0.270792
33,n,20170918,,,0.2,,0.2,0.2,0.2,0.2
34,n,20171010,,,0.38,,0.38,0.38,0.38,0.38
35,n,20180517,,,0.25,,0.25,0.25,0.25,0.25
36,n,20180611,,,0.1,,0.1,0.1,0.1,0.1
37,n,20171017,,,0.2,,0.2,0.2,0.2,0.2


# Data Visualization

In [13]:
import plotly
plotly.__version__

'4.9.0'

In [65]:
import chart_studio.plotly as py
import plotly.graph_objs as go
import plotly.io as pio #to display low-level dicitonary graphs

In [15]:
#Convert date to datetime object
post.OBJECTID_1 = pd.to_datetime(post.OBJECTID_1, format='%Y%m%d')

In [16]:
post.head()

Unnamed: 0,OID,OBJECTID_1,COUNT,AREA,MEAN,STD,SUM,X25.,X50.,X75.
0,0,2016-04-20,27254,0.0361352,0.326584,0.065224,8900.717586,0.292608,0.326837,0.379791
1,1,2016-05-25,27254,0.0361352,0.2,0.0,5450.800081,0.2,0.2,0.2
2,2,2016-06-30,27254,0.0361352,0.2,0.0,5450.800081,0.2,0.2,0.2
3,3,2017-05-10,27254,0.0361352,0.266793,0.0105212,7271.177156,0.256585,0.267754,0.272464
4,4,2017-05-11,27254,0.0361352,0.271572,0.0118625,7401.413938,0.265434,0.269956,0.278423


In [83]:
sd1 = go.Scatter(x=list(post.OBJECTID_1),
                 y=list(post['X75.']),
                 name='Secchi Disk Depth 1',
                 mode='markers',
                 marker=dict(color='cyan', size=10)
                )



sd2 = go.Scatter(x=list(post.OBJECTID_1),
                 y=list(post['X75.']),
                 name='Secchi Disk Depth 2',
                 mode='markers',
                 marker=dict(color='pink', size=10)
                )

data = [sd1, sd2]

updatemenus = list([
    dict(type="buttons",
        active=0,
        buttons=list([
            dict(label = 'Secchi Disk Depth 1',
                method='update',
                args=[{'visible':[True, False]}]),
            dict(label='Secchi Disk Depth 2',
                method='update',
                args=[{'visible':[False, True]}
                     ])
        ]))
])

layout = dict(title="Water Quality Parameters", showlegend=False,
             updatemenus=updatemenus, plot_bgcolor="white",)

fig = go.Figure(data=data, layout=layout)
fig.update_traces(mode='markers', marker_line_width=2, marker_size=10)
fig.update_xaxes(showline=True, linewidth=2, linecolor='grey')
fig.update_yaxes(showline=True, linewidth=2, linecolor='grey')
pio.show(fig)




In [84]:
# Tweets
import daily and weekly tweet count