In [1]:
import pandas as pd
import scipy.stats as sps
import seaborn as sns
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D  # noqa: F401 unused import


def corrfunc(x, y, **kws):
    (r, p) = sps.pearsonr(x, y)
    ax = plt.gca()
    ax.annotate("r = {:.2f}".format(r),
                xy=(.1, .9), xycoords=ax.transAxes)
    ax.annotate("p = {:.3f}".format(p),
                xy=(.4, .9), xycoords=ax.transAxes)
    


In [2]:
df = pd.read_excel(r'./data/HK/HK_WQ.xlsx', sheetname = 'Raw', skiprows = 1)
df.set_index('ID', inplace = True)

subs = ['Dissolved Oxygen (mg/L)',
 '5-day Biochemical Oxygen Demand (mg/L)',
 'E. coli (cfu/100mL)',
 'Suspended Solids (mg/L)',
 'Chlorophyll-a (μg/L)',
 'pH',
 'Salinity (psu)',
 'Silica (mg/L)',
 'Temperature (°C)',
 'Turbidity (NTU)',
 'Total Phosphorus (mg/L)',
 'Total Nitrogen (mg/L)',
 'Season']

interest = {'zones' : None,
            'stations' : None,
            'depths' : None,
            'params' : subs}

In [None]:
if interest['zones'] is not None:
    zones = interest['zones']
else:
    zones = df['Water Control Zone'].unique()
    
if interest['stations'] is not None:
    stations = interest['stations']
else:
    stations = df['Station'].unique()

if interest['depths'] is not None:
    depths = interest['depths']
else:
    depths = df['Depth'].unique()
    
if interest['params'] is not None:
    params = interest['params']
else:
    params = df.columns[5:]

In [None]:
dfl = df[(df['Station'] == 'WM1') & (df['Depth'] == 'Surface Water')].drop('Unnamed: 30', axis = 1)[params]

In [None]:
pr = sns.pairplot(dfl, hue = 'Season')

In [None]:
plt.scatter(df['Chlorophyll-a (μg/L)'], df['pH'])

In [None]:
plt.show()

In [None]:
dfl.isnull().sum()

In [None]:
df[params]