This script is used to extract the King County moored data from Dockton. It loads in the King County data as as a pandas dataframe from an excel file. These excel files are not organized to work as Pandas dataframes, so a significant amount of cleaning is necessary. Several additional variables have been calculated from the data to assist in matching this data to the SalishSeaCast model output. 

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import os
import pandas as pd
import netCDF4 as nc
import datetime as dt
from salishsea_tools import evaltools as et, viz_tools
import gsw 
import matplotlib.gridspec as gridspec
import matplotlib as mpl
import matplotlib.dates as mdates
import cmocean as cmo
import scipy.interpolate as sinterp
import pickle
import cmocean
import json
import f90nml
from collections import OrderedDict
from matplotlib.colors import LogNorm

fs=16
mpl.rc('xtick', labelsize=fs)
mpl.rc('ytick', labelsize=fs)
mpl.rc('legend', fontsize=fs)
mpl.rc('axes', titlesize=fs)
mpl.rc('axes', labelsize=fs)
mpl.rc('figure', titlesize=fs)
mpl.rc('font', size=fs)
mpl.rc('font', family='sans-serif', weight='normal', style='normal')

import warnings
#warnings.filterwarnings('ignore')
from IPython.display import Markdown, display

%matplotlib inline

In [2]:
saveloc='/ocean/kflanaga/MEOPAR/mooredData'
year=2018
Mooring='Dockton'

In [3]:
df = pd.read_csv(f'{saveloc}/{Mooring}_1_1_{year}_to_12_31_{year}.csv',encoding='cp1252')

In [4]:
df.columns

Index(['**Legend**', 'Unnamed: 1', 'Unnamed: 2', 'Unnamed: 3', 'Unnamed: 4',
       'Unnamed: 5', 'Unnamed: 6', 'Unnamed: 7', 'Unnamed: 8', 'Unnamed: 9',
       'Unnamed: 10', 'Unnamed: 11'],
      dtype='object')

In [5]:
# Grabbing the Longitude and Latitude from places.
Lon, Lat = places.PLACES[Mooring]['lon lat']

In [7]:
#Drop the first 50 rows which contain no data whatsoever. 
df.drop(df.index[[tuple(range(0,48))]],inplace=True)

  result = getitem(key)


In [8]:
# The first row of the dataframe holds the column names of the data and must be set as equal to columns.
df.columns=df.iloc[0]

In [9]:
# Remove the first and last rows of the datafame. 
df.drop(df.index[[0]],inplace=True)
df.drop(df.index[[-1]],inplace=True)

In [10]:
df=df.reset_index()

In [11]:
# Drop Useless Columns
df=df.drop('index',axis=1)
df=df.drop('Sonde_ID',axis=1)
df=df.drop('Sonde_Batt_V',axis=1)
df=df.drop('Logger_Batt_V',axis=1)

In [13]:
# Change any strings of numbers into actual numeric objects like floats
df[df.columns[1:-1]]=df[df.columns[1:-1]].apply(lambda col:pd.to_numeric(col, errors='coerce'))

In [14]:
#Calculate or Rename variables to be used in model matching. 
df['dtUTC']=pd.to_datetime(df['Date'],format='%m/%d/%Y %I:%M:%S %p')
df['Lat']=Lat
df['Lon']=Lon
df['Z']=df['Depth_m']
press=gsw.p_from_z(-1*df['Z'],df['Lat'])
df['SA']=gsw.SA_from_SP(df['Salinity_PSU'],press,
                       df['Lon'],df['Lat'])
df['CT']=gsw.CT_from_t(df['SA'],df['Water_Temperature_degC'],press)
df['Chl']=df['Chlorophyll_Fluorescence_ug/L']
df['YD']=et.datetimeToYD(df['dtUTC'])

In [16]:
# Create a set of daily averages to compare to daily model output. 
dfg=df.groupby(by='YD')
df_daily_avg=dfg.mean()
df_daily_avg['Lat']=Lat
df_daily_avg['Lon']=Lon

In [17]:
# Creating a column with dtUTC objects for each day. 
UTC=[]
for yd in range(0,len(dfg)):
    start = dt.datetime(year,1,1,12,0,0)      
    delta = dt.timedelta(yd)     
    offset = start + delta 
    UTC.append(offset)
df_daily_avg['dtUTC']=UTC
df_daily_avg=df_daily_avg.reset_index()

In [40]:
##### Saving data as Pickle files to be used in the summary file
saveloc='/ocean/kflanaga/MEOPAR/mooredData'
with open(os.path.join(saveloc,f'data_{Mooring}_{year}.pkl'),'wb') as hh:
    pickle.dump(df,hh)
    
with open(os.path.join(saveloc,f'daily_data_{Mooring}_{year}.pkl'),'wb') as hh:
    pickle.dump(df_daily_avg,hh)