This Script only exists as a place to load up all of the pickle files files while I am busy doing other things.

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import os
import pandas as pd
import netCDF4 as nc
import datetime as dt
from salishsea_tools import evaltools as et, viz_tools
import gsw 
import matplotlib.gridspec as gridspec
import matplotlib as mpl
import matplotlib.dates as mdates
import cmocean as cmo
import scipy.interpolate as sinterp
import pickle
import cmocean
import json
import f90nml
from collections import OrderedDict
from matplotlib.colors import LogNorm

fs=16
mpl.rc('xtick', labelsize=fs)
mpl.rc('ytick', labelsize=fs)
mpl.rc('legend', fontsize=fs)
mpl.rc('axes', titlesize=fs)
mpl.rc('axes', labelsize=fs)
mpl.rc('figure', titlesize=fs)
mpl.rc('font', size=fs)
mpl.rc('font', family='sans-serif', weight='normal', style='normal')

import warnings
#warnings.filterwarnings('ignore')
from IPython.display import Markdown, display

%matplotlib inline

In [2]:
year=2009
modelversion='nowcast-green.201905'
PATH= '/results2/SalishSea/nowcast-green.201905/'
datadir='/ocean/eolson/MEOPAR/obs/WADE/ptools_data/ecology'

### load observations

In [3]:
dfTime=pd.read_excel('/ocean/eolson/MEOPAR/obs/WADE/WDE_Data/OlsonSuchyAllen_UBC_PDR_P003790-010721.xlsx',
                    engine='openpyxl',sheet_name='EventDateTime')

In [4]:
## duplicate Station/Date entries with different times seem to be always within a couple of hours, 
# so just take the first (next cell)
test=dfTime.groupby(['FlightDate','SiteCode'])['TimeDown \n(Local - PST or PDT)'].count()

In [5]:
# drop duplicate rows
dfTime.drop_duplicates(subset=['FlightDate','SiteCode'],keep='first',inplace=True)
print(dfTime.keys())

Index(['FlightYear', 'FlightMonth', 'FlightDate', 'SiteCode', 'Sampled',
       'TimeDown \n(Local - PST or PDT)', 'FieldComment'],
      dtype='object')


In [6]:
dfTime['dtPac']=[dt.datetime.combine(idate, itime) for idate, itime \
         in zip(dfTime['FlightDate'],dfTime['TimeDown \n(Local - PST or PDT)'])]
dfTime['dtUTC']=[et.pac_to_utc(ii) for ii in dfTime['dtPac']]

In [7]:
# PROCESS STATION LOCATION INFO (based on Parker's code)
sta_fn='/ocean/eolson/MEOPAR/obs/WADE/WDE_Data/OlsonSuchyAllen_UBC_PDR_P003790-010721.xlsx'
sheetname='Site Info'
sta_df =pd.read_excel(sta_fn,engine='openpyxl',sheet_name=sheetname)
sta_df.dropna(how='any',subset=['Lat_NAD83 (deg / dec_min)','Long_NAD83 (deg / dec_min)','Station'],inplace=True)
sta_df = sta_df.set_index('Station')
# get locations in decimal degrees
for sta in sta_df.index:
    lat_str = sta_df.loc[sta, 'Lat_NAD83 (deg / dec_min)']
    lat_deg = float(lat_str.split()[0]) + float(lat_str.split()[1])/60
    sta_df.loc[sta,'Lat'] = lat_deg
    #
    lon_str = sta_df.loc[sta, 'Long_NAD83 (deg / dec_min)']
    lon_deg = float(lon_str.split()[0]) + float(lon_str.split()[1])/60
    sta_df.loc[sta,'Lon'] = -lon_deg    
sta_df.pop('Lat_NAD83 (deg / dec_min)');
sta_df.pop('Long_NAD83 (deg / dec_min)');

In [8]:
fn='/ocean/eolson/MEOPAR/obs/WADE/WDE_Data/OlsonSuchyAllen_UBC_PDR_P003790-010721.xlsx'
sheetname='LabChlaPheo'
chlPheo =pd.read_excel(fn,engine='openpyxl',sheet_name=sheetname)

In [9]:
chlPheo.dropna(how='any',subset=['Date','Station','SamplingDepth'],inplace=True)

In [10]:
# average over replicates
chlPheo2=pd.DataFrame(chlPheo.groupby(['Date','Station','SamplingDepth'],as_index=False).mean())

In [11]:
# join to station info (lat/lon)
chlPheo3=pd.merge(left=sta_df,right=chlPheo2,how='right',
                 left_on='Station',right_on='Station')

In [12]:
# join to date/time
dfTime['dtUTC']=[et.pac_to_utc(dt.datetime.combine(idate,itime)) for idate,itime in \
                zip(dfTime['FlightDate'],dfTime['TimeDown \n(Local - PST or PDT)'])]
dfTime2=dfTime.loc[:,['FlightDate','SiteCode','dtUTC']]
chlPheoFinal=pd.merge(left=chlPheo3,right=dfTime2,how='left',
                      left_on=['Date','Station'],right_on=['FlightDate','SiteCode'])

In [43]:
chlPheoFinal.dropna(how='any',subset=['dtUTC'],inplace=True)

In [45]:
chlPheoFinal['Z']=chlPheoFinal['SamplingDepth']
chlPheoFinal['Year']=[ii.year for ii in chlPheoFinal['dtUTC']]
chlPheoFinal['YD']=et.datetimeToYD(chlPheoFinal['dtUTC'])

In [46]:
len(chlPheoFinal),len(chlPheo3),len(dfTime2)

(11422, 11469, 5186)

## load CTD data

In [15]:
dfCTD0=pickle.load(open(os.path.join(datadir,f'Casts_{str(year)}.p'),'rb'))
dfCTD=pd.merge(left=sta_df,right=dfCTD0,how='right',
             left_on='Station',right_on='Station')
dfCTD['dtUTC']=[iiD+dt.timedelta(hours=20) for iiD in dfCTD['Date']] #Does this mean it also has that flaw where we are not sure when the data was collected?
dfCTD.rename(columns={'Latitude':'Lat','Longitude':'Lon'},inplace=True)
dfCTD['Z']=-1*dfCTD['Z']
# Calculate Absolute (Reference) Salinity (g/kg) and Conservative Temperature (deg C) from 
# Salinity (psu) and Temperature (deg C):
press=gsw.p_from_z(-1*dfCTD['Z'],dfCTD['Lat'])
dfCTD['SA']=gsw.SA_from_SP(dfCTD['Salinity'],press,
                           dfCTD['Lon'],dfCTD['Lat'])
dfCTD['CT']=gsw.CT_from_t(dfCTD['SA'],dfCTD['Temperature'],press)

dfCTD['Year']=[ii.year for ii in dfCTD['dtUTC']]
dfCTD['YD']=et.datetimeToYD(dfCTD['dtUTC'])

In [16]:
dfCTD.keys()

Index(['Station', 'Desig', 'Descrip', 'Basin', '*Max_Depth', 'Lat', 'Lon',
       'Salinity', 'Temperature', 'Sigma', 'Chl', 'DO', 'Turb', 'Z', 'Date',
       'dtUTC', 'SA', 'CT', 'Year', 'YD'],
      dtype='object')

In [17]:
# check that there is never more than one ctd cast per station per day:
test=dfCTD.groupby(['Station','Year','YD','Z']).count()
print('this should be 1: ',test['Date'].unique())

this should be 1:  [1]


## Creating Nutrients matched dataset with added CT and SA from CTD

In [18]:
def interpCTDvar(sta,yr,yd,ztarget,ctdvar):
    ctdlocs=(dfCTD.Station==sta)&(dfCTD.Year==yr)&(dfCTD.YD==yd)
    if np.sum(ctdlocs)==0:
        print(f'Warning: Station {sta}, Year {yr}, year day {yd} not found in dfCTD')
        return np.nan
    else:
        val=np.interp(ztarget,dfCTD.loc[ctdlocs,['Z']].values.flatten(),
                  dfCTD.loc[ctdlocs,[ctdvar]].values.flatten())
        return val

In [19]:
dfCTD.loc[dfCTD.Station=='PSS019']['YD'].unique()

array([110, 138, 159, 201, 230, 257, 285, 336])

In [56]:
chlPheoFinal.loc[chlPheoFinal.Station=='PSS019']['YD'].unique()
chlPheoYear=pd.DataFrame(chlPheoFinal.loc[chlPheoFinal.Year==year])

In [57]:
chlPheoYear['SA']=[interpCTDvar(sta,yr,yd,ztarget,'SA') for sta, yr, yd, ztarget \
           in zip(chlPheoYear['Station'],chlPheoYear['Year'],chlPheoYear['YD'],chlPheoYear['Z'])]



In [58]:
chlPheoYear['CT']=[interpCTDvar(sta,yr,yd,ztarget,'CT') for sta, yr, yd, ztarget \
           in zip(chlPheoYear['Station'],chlPheoYear['Year'],chlPheoYear['YD'],chlPheoYear['Z'])]



In [59]:
chlPheoYear

Unnamed: 0,Station,Desig,Descrip,Basin,*Max_Depth,Lat,Lon,Date,SamplingDepth,CTD Cast Rep,...,Pheo_QA,Pheo_SampleFieldReplicateNumber,FlightDate,SiteCode,dtUTC,Z,Year,YD,SA,CT
131,ADM001,C,Admiralty Inlet - Bush Pt.,Admiralty Inlet,114.0,48.029813,-122.617933,2009-04-06,1.014000,1.0,...,3.0,1.0,2009-04-06,ADM001,2009-04-06 19:02:00,1.014000,2009,96,29.886843,7.932160
132,ADM001,C,Admiralty Inlet - Bush Pt.,Admiralty Inlet,114.0,48.029813,-122.617933,2009-04-06,9.866000,1.0,...,3.0,1.0,2009-04-06,ADM001,2009-04-06 19:02:00,9.866000,2009,96,29.886843,7.932160
133,ADM001,C,Admiralty Inlet - Bush Pt.,Admiralty Inlet,114.0,48.029813,-122.617933,2009-04-06,29.737000,1.0,...,3.0,1.0,2009-04-06,ADM001,2009-04-06 19:02:00,29.737000,2009,96,29.886843,7.932160
134,ADM001,C,Admiralty Inlet - Bush Pt.,Admiralty Inlet,114.0,48.029813,-122.617933,2009-05-20,1.033000,1.0,...,3.0,1.0,2009-05-20,ADM001,2009-05-20 19:35:00,1.033000,2009,140,28.448336,10.232581
135,ADM001,C,Admiralty Inlet - Bush Pt.,Admiralty Inlet,114.0,48.029813,-122.617933,2009-05-20,9.818000,1.0,...,3.0,1.0,2009-05-20,ADM001,2009-05-20 19:35:00,9.818000,2009,140,28.448336,10.232581
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11190,RSR837,C,Rosario Strait - Peapod Rock,Strait of Georgia,56.0,48.616492,-122.762958,2009-10-12,9.925000,1.0,...,3.0,1.0,2009-10-12,RSR837,2009-10-12 18:46:00,9.925000,2009,285,30.772577,10.431024
11191,RSR837,C,Rosario Strait - Peapod Rock,Strait of Georgia,56.0,48.616492,-122.762958,2009-10-12,30.006001,1.0,...,3.0,1.0,2009-10-12,RSR837,2009-10-12 18:46:00,30.006001,2009,285,30.772577,10.431024
11192,RSR837,C,Rosario Strait - Peapod Rock,Strait of Georgia,56.0,48.616492,-122.762958,2009-12-02,1.283000,1.0,...,3.0,1.0,2009-12-02,RSR837,2009-12-02 20:26:00,1.283000,2009,336,29.567478,8.858209
11193,RSR837,C,Rosario Strait - Peapod Rock,Strait of Georgia,56.0,48.616492,-122.762958,2009-12-02,11.049000,1.0,...,3.0,1.0,2009-12-02,RSR837,2009-12-02 20:26:00,11.049000,2009,336,29.567478,8.858209


### set up variables for model-data matching

In [60]:
start_date = dt.datetime(year,1,1)
end_date = dt.datetime(year,12,31)
flen=1 # number of days per model output file. always 1 for 201905 and 201812 model runs
namfmt='nowcast' # for 201905 and 201812 model runs, this should always be 'nowcast'
filemap={'diatoms':'ptrc_T','ciliates':'ptrc_T','flagellates':'ptrc_T','votemper':'grid_T','vosaline':'grid_T'}
fdict={'ptrc_T':1,'grid_T':1}

In [61]:
data_Phea=et.matchData(chlPheoYear,filemap,fdict,start_date,end_date,'nowcast',PATH,1,quiet=False);

(Lat,Lon)= 47.21342666666666 -123.07765  not matched to domain


In [28]:
##### Saving data as Pickle files to be used in the summary file
saveloc='/ocean/kflanaga/MEOPAR/savedData'
with open(os.path.join(saveloc,f'data_Phea_{modelversion}_{year}.pkl'),'wb') as hh:
    pickle.dump(data_Phea,hh)