This script is used to extract the King County moored data from PointWilliams. It loads in the King County data as a pandas dataframe from an excel file. These excel files are not organized to work as Pandas dataframes, so a significant amount of cleaning is necessary. Several additional variables have been calculated from the data to assist in matching this data to the SalishSeaCast model output. 

In [38]:
import numpy as np
import matplotlib.pyplot as plt
import os
import pandas as pd
import netCDF4 as nc
import datetime as dt
from salishsea_tools import evaltools as et, viz_tools, places
import gsw 
import matplotlib.gridspec as gridspec
import matplotlib as mpl
import matplotlib.dates as mdates
import cmocean as cmo
import scipy.interpolate as sinterp
import pickle
import cmocean
import json
import f90nml
from collections import OrderedDict
from matplotlib.colors import LogNorm

fs=16
mpl.rc('xtick', labelsize=fs)
mpl.rc('ytick', labelsize=fs)
mpl.rc('legend', fontsize=fs)
mpl.rc('axes', titlesize=fs)
mpl.rc('axes', labelsize=fs)
mpl.rc('figure', titlesize=fs)
mpl.rc('font', size=fs)
mpl.rc('font', family='sans-serif', weight='normal', style='normal')

import warnings
#warnings.filterwarnings('ignore')
from IPython.display import Markdown, display

%matplotlib inline

In [39]:
saveloc='/ocean/kflanaga/MEOPAR/mooredData'
year=2018
Mooring='PointWilliams'

In [40]:
df = pd.read_csv(f'{saveloc}/{Mooring}_1_1_{year}_to_12_31_{year}.csv',encoding='cp1252')

In [41]:
# Grabbing the Longitude and Latitude from places.
Lon, Lat = places.PLACES[Mooring]['lon lat']

In [42]:
#Drop the first 50 rows which contain no data whatsoever. 
df.drop(df.index[[tuple(range(0,50))]],inplace=True)

  result = getitem(key)


In [43]:
# The first row of the dataframe holds the column names of the data and must be set as equal to columns. 
df.columns=df.iloc[0]

In [44]:
# Remove the first and last rows of the datafame. 
df.drop(df.index[[0]],inplace=True)
df.drop(df.index[[-1]],inplace=True)

In [45]:
df=df.reset_index()
df=df.drop('index',axis=1)

In [46]:
# Change any strings of numbers into actual numeric objects like floats
df[df.columns[1:-1]]=df[df.columns[1:-1]].apply(lambda col:pd.to_numeric(col, errors='coerce'))

In [47]:
#Calculate or Rename variables to be used in model matching. 
df['dtUTC']=pd.to_datetime(df['Date'],format='%m/%d/%Y %I:%M:%S %p')
df['Lat']=Lat
df['Lon']=Lon
df['Z']=df['Depth_m']
press=gsw.p_from_z(-1*df['Z'],df['Lat'])
df['SA']=gsw.SA_from_SP(df['Salinity_PSU'],press,
                       df['Lon'],df['Lat'])
df['CT']=gsw.CT_from_t(df['SA'],df['Water_Temperature_degC'],press)
df['Chl']=df['Chlorophyll_Fluorescence_ug/L']
df['YD']=et.datetimeToYD(df['dtUTC'])
df['NO23']=df['SUNA_Nitrite+Nitrate_mgN/L_raw_raw']*(1000/14)

In [48]:
df=df.set_index(df.dtUTC)
df['year_hours']=((df.index.dayofyear -1))+df.index.hour/24
df=df.reset_index(drop=True)

In [49]:
# Create a set of daily averages to compare to daily model output. 
#dfg=df.groupby(by='YD')
#df_daily_avg=dfg.mean()
#df_daily_avg['Lat']=Lat
#df_daily_avg['Lon']=Lon

In [50]:
dfg=df.groupby(by='year_hours')
df_hourly_avg=dfg.mean()
df_hourly_avg['Lat']=Lat
df_hourly_avg['Lon']=Lon

In [51]:
#Creating a Column that contains datetimes. 
UTC=[]
# Maybe I can go through the days of the year bit by bit and break them down into hours. 
for yd in df_hourly_avg.index:
    if np.isnan(yd) == True:
        UTC.append(float("NaN"))
    else:
        start = dt.datetime(year,1,1)      
        delta = dt.timedelta(yd)     
        offset = start + delta
        time=offset.replace(microsecond=0)
        UTC.append(time)

In [52]:
df_hourly_avg['dtUTC']=UTC
df_hourly_avg=df_hourly_avg.reset_index()

In [53]:
##### Saving data as Pickle files
saveloc='/ocean/kflanaga/MEOPAR/mooredData'
#with open(os.path.join(saveloc,f'data_{Mooring}_{year}.pkl'),'wb') as hh:
#    pickle.dump(df,hh)
    
#with open(os.path.join(saveloc,f'daily_data_{Mooring}_{year}.pkl'),'wb') as hh:
#    pickle.dump(df_daily_avg,hh)

with open(os.path.join(saveloc,'hourly_pickle_files',f'hourly_data_{Mooring}_{year}.pkl'),'wb') as hh:
    pickle.dump(df_hourly_avg,hh)
