## PSF Nutrient Comparison

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import os
import pandas as pd
import netCDF4 as nc
import datetime as dt
from salishsea_tools import evaltools as et, viz_tools
import gsw
import matplotlib.gridspec as gridspec
import matplotlib as mpl
import matplotlib.dates as mdates
import cmocean as cmo
import scipy.interpolate as sinterp
import pytz

mpl.rc('xtick', labelsize=10)
mpl.rc('ytick', labelsize=10)
mpl.rc('legend', fontsize=10)
mpl.rc('axes', titlesize=10)
mpl.rc('axes', labelsize=10)
mpl.rc('figure', titlesize=10)
mpl.rc('font', size=10)
%matplotlib inline

In [2]:
def _lt0convert(arg):
    if arg=='<0':
        val=0.0
    else:
        val=pd.to_numeric(arg, errors='coerce',downcast=None)
    return float(val)

In [3]:
f2015 = pd.read_excel('/ocean/eolson/MEOPAR/obs/PSFCitSci/All_Yrs_Nutrients_2018-01-31_EOEdit.xlsx',
                 sheet_name = '2015 N+P+Si',dtype={'date (dd/mm/yyyy)':str})

f2015=f2015.drop(f2015.loc[(f2015['lon']<-360)|(f2015['lon']>360)].index)
f2015 = f2015.dropna(subset = ['date (dd/mm/yyyy)', 'Time (Local)', 'lat', 'lon', 'depth'], how='any')

ds=f2015['date (dd/mm/yyyy)'].values
ts=f2015['Time (Local)'].values
dts=[pytz.timezone('Canada/Pacific').localize(dt.datetime.strptime(ii,'%Y-%m-%d %H:%M:%S')+dt.timedelta(hours=jj.hour,minutes=jj.minute,seconds=jj.second)).astimezone(pytz.utc).replace(tzinfo=None)
    for ii,jj in zip(ds,ts)]
f2015['dtUTC']=dts

f2015.rename(columns={'lat':'Lat','lon':'Lon','depth':'Z','station':'Station','no23':'NO23','po4':'PO4','si':'Si'},inplace=True)
f2015.drop(['num','date (dd/mm/yyyy)','Time (Local)'],axis=1,inplace=True)

f2015_g=f2015.groupby(['Station','Lat','Lon','dtUTC','Z'],as_index=False)
f2015_m=f2015_g.mean()

f2015=f2015_m.reindex()

In [4]:
f2016N = pd.read_excel('/ocean/eolson/MEOPAR/obs/PSFCitSci/All_Yrs_Nutrients_2018-01-31_EOEdit.xlsx',
                 sheet_name = '2016 N+P',dtypes={'NO3+NO':str,'PO4':str},na_values=('nan','NaN','30..09'))
f2016N = f2016N.drop(f2016N.keys()[11:], axis=1)

f2016N['NO23']=[_lt0convert(ii) for ii in f2016N['NO3+NO']]
f2016N['PO4_2']=[_lt0convert(ii) for ii in f2016N['PO4']]
f2016N = f2016N.dropna(subset = ['Date (dd/mm/yyyy)', 'Time (Local)', 'Latitude', 'Longitude', 'Depth'], how='any')

ds=f2016N['Date (dd/mm/yyyy)']
ts=f2016N['Time (Local)']

dts=[pytz.timezone('Canada/Pacific').localize(dt.datetime(ii.year,ii.month,ii.day)+dt.timedelta(hours=jj.hour,minutes=jj.minute,seconds=jj.second)).astimezone(pytz.utc).replace(tzinfo=None)
    for ii,jj in zip(ds,ts)]
f2016N['dtUTC']=dts

#f2016N['PO4_2']=pd.to_numeric(f2016N['PO4'],errors='coerce')
f2016N.drop(['Crew','Date (dd/mm/yyyy)','Time (Local)', 'Lat_reported',
       'Long_reported','PO4','NO3+NO'],axis=1,inplace=True)
f2016N.rename(columns={'PO4_2':'PO4','Latitude':'Lat','Longitude':'Lon','Depth':'Z'},inplace=True)

f2016N_g=f2016N.groupby(['Station','Lat','Lon','dtUTC','Z'],as_index=False)
f2016N_m=f2016N_g.mean()

f2016Si = pd.read_excel('/ocean/eolson/MEOPAR/obs/PSFCitSci/All_Yrs_Nutrients_2018-01-31_EOEdit.xlsx',
                 sheet_name = '2016 SiO2')
f2016Si = f2016Si.drop(f2016Si.keys()[9:], axis=1)
print(f2016Si.keys())

f2016Si = f2016Si.dropna(subset = ['DDMMYYYY', 'Time (Local)', 'Latitude', 'Longitude', 'Depth'], how='any')

ds=f2016Si['DDMMYYYY']
ts=f2016Si['Time (Local)']
dts=[pytz.timezone('Canada/Pacific').localize(dt.datetime(ii.year,ii.month,ii.day)+dt.timedelta(hours=jj.hour,minutes=jj.minute,seconds=jj.second)).astimezone(pytz.utc).replace(tzinfo=None)
    for ii,jj in zip(ds,ts)]
f2016Si['dtUTC']=dts

z=[0 if (iii=='S') else float(iii) for iii in f2016Si['Depth'].values]
f2016Si['Z']=z

f2016Si.rename(columns={'Latitude':'Lat','Longitude':'Lon','SiO2 µM':'Si','Site ID':'Station'},inplace=True)
f2016Si.drop(['DDMMYYYY','Time (Local)', 'Lat_reported',
       'Long_reported','Depth'],axis=1,inplace=True)

f2016Si_g=f2016Si.groupby(['Station','Lat','Lon','dtUTC','Z'],as_index=False)
f2016Si_m=f2016Si_g.mean()

f2016 = pd.merge(f2016N_m, f2016Si_m,  how='outer', left_on=['Station','Lat','Lon','dtUTC','Z'], right_on = ['Station','Lat','Lon','dtUTC','Z'])

Index(['Site ID', 'DDMMYYYY', 'Time (Local)', 'Lat_reported', 'Long_reported',
       'Latitude', 'Longitude', 'Depth', 'SiO2 µM'],
      dtype='object')


In [5]:
f2017 = pd.read_excel('/ocean/eolson/MEOPAR/obs/PSFCitSci/All_Yrs_Nutrients_2018-01-31_EOEdit.xlsx',
                 sheet_name = '2017 N+P+Si',skiprows=3,dtypes={'Date (dd/mm/yyyy)':dt.date,'Time (Local)':dt.time,
                                                              'NO3+NO':str,'PO4':str,'Si':str})

f2017['NO23']=[_lt0convert(ii) for ii in f2017['NO3+NO']]
f2017['PO4_2']=[_lt0convert(ii) for ii in f2017['PO4']]
f2017['Si_2']=[_lt0convert(ii) for ii in f2017['Si']]

degminlat=[ii.split('°') for ii in f2017['Latitude'].values]
f2017['Lat']=[float(ii[0])+float(ii[1])/60 for ii in degminlat]
degminlon=[ii.split('°') for ii in f2017['Longitude'].values]
f2017['Lon']=[-1.0*(float(ii[0])+float(ii[1])/60) for ii in degminlon]
f2017 = f2017.dropna(subset = ['Date (dd/mm/yyyy)', 'Time (Local)', 'Lat', 'Lon', 'Depth'], how='any')

ds=f2017['Date (dd/mm/yyyy)']
ts=f2017['Time (Local)']
dts=[pytz.timezone('Canada/Pacific').localize(dt.datetime(ii.year,ii.month,ii.day)+dt.timedelta(hours=jj.hour,minutes=jj.minute,seconds=jj.second)).astimezone(pytz.utc).replace(tzinfo=None)
    for ii,jj in zip(ds,ts)]
f2017['dtUTC']=dts
f2017.drop(['Crew','Date (dd/mm/yyyy)','Time (Local)','Comments','Latitude','Longitude','NO3+NO'],axis=1,inplace=True)
f2017.rename(columns={'Depth':'Z','PO4_2':'PO4','Si_2':'Si'},inplace=True)
f2017_g=f2017.groupby(['Station','Lat','Lon','dtUTC','Z'],as_index=False)
f2017_m=f2017_g.mean()
f2017=f2017_m.reindex()

In [6]:
df=pd.concat((f2015,f2016,f2017),ignore_index=True,sort=True)

In [7]:
df

Unnamed: 0,Lat,Lon,NO23,PO4,Si,Station,Z,dtUTC
0,49.608333,-124.866667,18.650,1.4590,68.43000,BS1,2,2015-02-18 23:07:00
1,49.608333,-124.866667,26.620,2.1750,55.73000,BS1,20,2015-02-18 23:07:00
2,49.608333,-124.866667,16.250,1.8090,39.14000,BS1,2,2015-03-21 22:55:00
3,49.608333,-124.866667,16.830,2.0690,39.11000,BS1,20,2015-03-21 22:55:00
4,49.608333,-124.866667,24.270,0.5510,49.90000,BS1,20,2015-04-05 23:10:00
5,49.608333,-124.866667,10.530,1.0330,34.71000,BS1,2,2015-04-09 21:48:00
6,49.608333,-124.866667,22.400,2.0810,43.40000,BS1,20,2015-04-09 21:48:00
7,49.608333,-124.866667,0.170,0.2090,5.79000,BS1,2,2015-05-13 18:49:00
8,49.608333,-124.866667,8.930,1.2950,19.48000,BS1,20,2015-05-13 18:49:00
9,49.608333,-124.866667,0.050,0.1980,6.45000,BS1,2,2015-05-17 21:14:00


In [19]:
Chl2015=pd.read_csv('/ocean/eolson/MEOPAR/obs/PSFCitSci/Chla_2015PSFSalish_Sea_22.01.2018vers_8_CN_edits.csv',encoding='latin-1',
                    dtype={'Date sampled (mm/dd/yyyy)':str, 'Time of Day (Local)':str,
                            'Latitude':str,'Longitude':str,'Chl a':float,'Phaeophytin':float,'Depth':float},parse_dates=False)
degminlat=[ii.split('ç') for ii in Chl2015['Latitude'].values]
Chl2015['Lat']=[float(ii[0])+float(ii[1])/60 for ii in degminlat]
degminlon=[ii.split('ç') for ii in Chl2015['Longitude'].values]
Chl2015['Lon']=[-1.0*(float(ii[0])+float(ii[1])/60) for ii in degminlon]
Chl2015 = Chl2015.dropna(subset = ['Date sampled (mm/dd/yyyy)', 'Time of Day (Local)', 'Lat', 'Lon', 'Depth'], how='any')
ds=Chl2015['Date sampled (mm/dd/yyyy)']
ts=Chl2015['Time of Day (Local)']
dts=[pytz.timezone('Canada/Pacific').localize(dt.datetime.strptime(ii+'T'+jj,'%m/%d/%yT%I:%M:%S %p')).astimezone(pytz.utc).replace(tzinfo=None) 
     for ii,jj in zip(ds,ts)]
Chl2015['dtUTC']=dts
Chl2015['Z']=[float(ii) for ii in Chl2015['Depth']]
Chl2015.drop(['Date sampled (mm/dd/yyyy)','Time of Day (Local)','Latitude','Longitude','Depth'],axis=1,inplace=True)
Chl2015.rename(columns={'Chl a':'Chl','Phaeophytin':'Phaeo','Station Name':'Station'},inplace=True)
Chl2015_g=Chl2015.groupby(['Station','Lat','Lon','dtUTC','Z'],as_index=False)
Chl2015_m=Chl2015_g.mean()
Chl2015=Chl2015_m.reindex()

In [17]:
Chl2016Dat=pd.read_csv('/ocean/eolson/MEOPAR/obs/PSFCitSci/2016ChlorophyllChlData.csv')#,encoding='latin-1')
Chl2016Sta=pd.read_csv('/ocean/eolson/MEOPAR/obs/PSFCitSci/2016ChlorophyllStationData.csv')
Chl2016Sta.rename(columns={'DateCollected ':'DateCollected','Latitude':'Lat','Longitude':'Lon'},inplace=True)
Chl2016Sta.dropna(subset = ['DateCollected', 'TimeCollected', 'Lat','Lon', 'Depth_m'], how='any',inplace=True)
Chl2016Sta.drop_duplicates(inplace=True)
Chl2016Dat.drop(Chl2016Dat.loc[Chl2016Dat.quality_flag>3].index,axis=0,inplace=True)
Chl2016Dat.drop(['Chla_ugL','Phaeophytin_ugL','quality_flag','ShipBoat'],axis=1,inplace=True)
Chl2016Dat.rename(columns={'MeanChla_ugL':'Chl','MeanPhaeophytin_ugL':'Phaeo'},inplace=True)
Chl2016=pd.merge(Chl2016Sta,Chl2016Dat,how='inner', left_on=['DateCollected','Station','Depth_m'], right_on = ['DateCollected','Station','Depth_m'])
Chl2016['Z']=[float(ii) for ii in Chl2016['Depth_m']]
ds=Chl2016['DateCollected']
ts=Chl2016['TimeCollected']
dts=[pytz.timezone('Canada/Pacific').localize(dt.datetime.strptime(ii+'T'+jj,'%m-%d-%YT%I:%M:%S %p')).astimezone(pytz.utc).replace(tzinfo=None) 
     for ii,jj in zip(ds,ts)]
Chl2016['dtUTC']=dts
Chl2016.drop(['DateCollected','TimeCollected','CV'],axis=1,inplace=True)

In [18]:
Chl2016.dtypes

Station            object
Lat               float64
Lon               float64
Depth_m            object
Chl               float64
Phaeo             float64
Z                 float64
dtUTC      datetime64[ns]
dtype: object

In [22]:
Chl2017=pd.read_excel('/ocean/eolson/MEOPAR/obs/PSFCitSci/PSF 2017 Chla_Data_Final_v-January 22-2018_CN_edits.xlsx',
                      sheet_name='avg-mean-cv%',skiprows=15,usecols=[1,3,4,5,7,9,11],names=['Date','Station','Time','Z0','Chl','Qflag','Phaeo'])
Chl2017.dropna(subset=['Station','Date','Time','Z0'],how='any',inplace=True)
Chl2017.dropna(subset=['Chl','Phaeo'],how='all',inplace=True)
Chl2017.drop(Chl2017.loc[Chl2017.Qflag>3].index,axis=0,inplace=True)
ds=Chl2017['Date']
ts=Chl2017['Time']
dts=[pytz.timezone('Canada/Pacific').localize(dt.datetime(ii.year,ii.month,ii.day)+dt.timedelta(hours=jj.hour,minutes=jj.minute,seconds=jj.second)).astimezone(pytz.utc).replace(tzinfo=None)
    for ii,jj in zip(ds,ts)]
Chl2017['dtUTC']=dts
staMap2017=f2017.loc[:,['Station','Lat','Lon']].copy(deep=True)
staMap2017.drop_duplicates(inplace=True)
Chl2017=pd.merge(Chl2017,staMap2017,how='inner', left_on=['Station'], right_on = ['Station'])
Chl2017['Z']=[float(ii) for ii in Chl2017['Z0']]
Chl2017.drop(['Qflag','Date','Z0'],axis=1,inplace=True)

In [24]:
dfChl=pd.concat((Chl2015,Chl2016,Chl2017),ignore_index=True,sort=True)

In [25]:
df_a = pd.merge(df, dfChl,  how='outer', left_on=['Station','Lat','Lon','dtUTC','Z'], right_on = ['Station','Lat','Lon','dtUTC','Z'])

In [27]:
df_a.dtypes

Lat               float64
Lon               float64
NO23              float64
PO4               float64
Si                float64
Station            object
Z                   int64
dtUTC      datetime64[ns]
Chl               float64
Depth_m            object
Phaeo             float64
Time               object
dtype: object