Import model data for comparison with observations

In [1]:
import netCDF4 as nc
import datetime as dt
import pandas as pd
from pandas import Series, DataFrame
import subprocess
import requests
import matplotlib.pyplot as plt
import cmocean
import numpy as np
import os
import glob
import dateutil as dutil
from salishsea_tools import viz_tools, places
import xarray as xr
from salishsea_tools import evaltools as et
from collections import OrderedDict

%matplotlib inline

In [2]:

start_date = dt.datetime(2023,1,1)
end_date = dt.datetime(2023,12,31)
flen=1 # number of days per model output file. always 1 for 201905 and 201812 model runs
namfmt='nowcast' # for 201905 and 201812 model runs, this should always be 'nowcast'
# filemap is dictionary of the form variableName: fileType, where variableName is the name
# of the variable you want to extract and fileType designates the type of 
# model output file it can be found in (usually ptrc_T for biology, grid_T for temperature and 
# salinity)
filemap={'microzooplankton':'biol_T','mesozooplankton':'biol_T'}
# fdict is a dictionary mappy file type to its time resolution. Here, 1 means hourly output
# (1h file) and 24 means daily output (1d file). In certain runs, multiple time resolutions 
# are available
fdict={'biol_T':24,'grid_T':24} #24 for hours averaged 

In [3]:
filemap

{'microzooplankton': 'biol_T', 'mesozooplankton': 'biol_T'}

In [4]:
f=nc.Dataset('/results2/SalishSea/nowcast-green.202111/26jul23/SalishSea_1d_20230726_20230726_biol_T.nc')

In [5]:
f.variables.keys()

dict_keys(['nitrate', 'ammonium', 'silicon', 'diatoms', 'flagellates', 'microzooplankton', 'dissolved_organic_nitrogen', 'particulate_organic_nitrogen', 'biogenic_silicon', 'mesozooplankton', 'deptht', 'y', 'x', 'time_counter'])

In [6]:
PATH= '/results2/SalishSea/nowcast-green.202111/'

In [7]:
df2=pd.read_csv('/ocean/ksuchy/MOAD/analysis-karyn/notebooks/Evaluations/2023_SoG_ZoopProd_DepthSpecific.csv')

In [8]:
df2

Unnamed: 0,Cruise,Date,Station,STN_TIME,Z_lower,Z_upper,Lat,Lon,BPR (mgC m-3 day-1),daily P:B
0,2023-003,03/15/2023,Stn 42,12:00,5,5,49.016667,-123.433333,0.849489,0.067893
1,2023-003,03/15/2023,Stn 42,12:00,10,10,49.016667,-123.433333,3.473540,0.177075
2,2023-003,03/15/2023,Stn 42,12:00,20,20,49.016667,-123.433333,8.605914,0.474847
3,2023-003,03/15/2023,Stn 42,12:00,50,50,49.016667,-123.433333,9.166827,0.382759
4,2023-003,03/15/2023,Stn 42,12:00,150,150,49.016667,-123.433333,,
...,...,...,...,...,...,...,...,...,...,...
168,2023-029,10/12/2023,Stn 12,12:00,20,20,49.716667,-124.666667,0.096584,0.005107
169,2023-029,10/12/2023,Stn 12,12:00,50,50,49.716667,-124.666667,0.164719,0.006107
170,2023-029,10/12/2023,Stn 12,12:00,150,150,49.716667,-124.666667,,
171,2023-029,10/12/2023,Stn 12,12:00,250,250,49.716667,-124.666667,,


In [9]:
df2.rename(columns={'Date':'dtUTC'}, inplace=True)

### Convert date to proper format

In [10]:
df2['dtUTC'][0],df2['STN_TIME'][0]

('03/15/2023', '12:00')

In [11]:
df2['dtUTC'][0:10]

0    03/15/2023
1    03/15/2023
2    03/15/2023
3    03/15/2023
4    03/15/2023
5    03/15/2023
6    03/16/2023
7    03/16/2023
8    03/16/2023
9    03/16/2023
Name: dtUTC, dtype: object

In [12]:
df2['dtUTC'][0].split('/')

['03', '15', '2023']

In [13]:
dateslist=list()

In [14]:
for el in df2['dtUTC']:
    dateslist.append(el.split('/'))

In [15]:
timeslist=list()
for el in df2['STN_TIME']:
    timeslist.append(el.split(':'))

In [16]:
dts=list()
for ii,jj in zip(dateslist,timeslist):
    dts.append(dt.datetime(int(ii[2]),int(ii[0]),int(ii[1]),int(jj[0]),int(jj[1])))

In [17]:
df2['dtUTC']

0      03/15/2023
1      03/15/2023
2      03/15/2023
3      03/15/2023
4      03/15/2023
          ...    
168    10/12/2023
169    10/12/2023
170    10/12/2023
171    10/12/2023
172    10/12/2023
Name: dtUTC, Length: 173, dtype: object

In [18]:
df2['dtUTC'] = df2['dtUTC'].apply(lambda x:
    dt.datetime.strptime(x, '%m/%d/%Y'))

In [19]:
df2['dtUTC']=[et.pac_to_utc(ii) for ii in df2['dtUTC']]

In [20]:
df2['dtUTC']

0     2023-03-15 07:00:00
1     2023-03-15 07:00:00
2     2023-03-15 07:00:00
3     2023-03-15 07:00:00
4     2023-03-15 07:00:00
              ...        
168   2023-10-12 07:00:00
169   2023-10-12 07:00:00
170   2023-10-12 07:00:00
171   2023-10-12 07:00:00
172   2023-10-12 07:00:00
Name: dtUTC, Length: 173, dtype: datetime64[ns]

In [21]:
data = et.matchData(df2, filemap, fdict, start_date, end_date, namfmt, PATH, flen,
                    meshPath='/home/sallen/MEOPAR/grid/mesh_mask202108.nc',
                    quiet=False, method='vertNet');

#

2023-03-15 07:00:00 biol_T 2007-01-01 12:00:00


KeyError: 'time_centered_bounds'

In [None]:
data

In [None]:
data.keys()

In [None]:
cm1=cmocean.cm.thermal
with nc.Dataset('/ocean/ksuchy/MOAD/NEMO-forcing/grid/bathymetry_202108.nc') as bathy:
    bathylon=np.copy(bathy.variables['nav_lon'][:,:])
    bathylat=np.copy(bathy.variables['nav_lat'][:,:])
    bathyZ=np.copy(bathy.variables['Bathymetry'][:,:])

In [None]:
data.keys()

In [None]:
data['mod_prod']=(data['mod_GRMESZDIAT']+data['mod_GRMESZPHY']+data['mod_GRMESZMICZ']+data['mod_GRMESZPON'])*86400*0.3*5.7*12

In [None]:
data

In [None]:
data.keys()

In [None]:
data=data.dropna()

In [None]:
print(data['BPR (mgC m-3 day-1)'].min())
print(data['BPR (mgC m-3 day-1)'].max())


In [None]:
print(data['mod_prod'].min())
print(data['mod_prod'].max())

In [None]:
def byDepth(ax,obsvar,modvar,lims):
    SI=et.varvarPlot(ax,data,obsvar,modvar,'Z_lower',(15,25),'z','m',('orange','darkturquoise','navy'))
    l=ax.legend(handles=SI)
    ax.set_xlabel('Log10 Observations (mg C m$^{-3}$ d$^{-1}$)+0.001')
    ax.set_ylabel('Log10 Z2 Model (mg C m$^{-3}$ d$^{-1}$)+0.001')
    ax.plot(lims,lims,'k-',alpha=.5)
    ax.set_xlim(lims)
    ax.set_ylim(lims)
    ax.set_aspect(1)
    return SI,l

In [None]:
fig,ax=plt.subplots(1,1,figsize=(4,4))
fig.subplots_adjust(hspace=1)
ax.plot(data['BPR (mgC m-3 day-1)'],data['mod_prod'],'k.')
ax.set_title('Mesozooplankton Productivity mg C m-3 d-1')
ax.set_xlabel('Obs')
ax.set_ylabel('Model')
ax.plot((0,40),(0,40),'r-',alpha=.3)
ax.set_xlim(0,40)
ax.set_ylim(0,40)

In [None]:
obsvar='BPR (mgC m-3 day-1)'
modvar='mod_prod'

fig, ax = plt.subplots(1,1,figsize = (4,4))
SI,l=byDepth(ax,obsvar,modvar,(0,40))
ax.set_title('Mesozooplankton Productivity mg C m-3 d-1 By Depth')

In [None]:
# define log transform function with slight shift to accommodate zero values
def logt(x):
    return np.log10(x+.001)

In [None]:
data['L10Productivity']=logt(data['BPR (mgC m-3 day-1)'])
data['L10mod_prod']=logt(data['mod_prod'])

### Depth-specific Point by Point comparisions of model vs obs

In [None]:
obsvar2='L10Productivity'
modvar2='L10mod_prod'

fig, ax = plt.subplots(1,1,figsize = (6,6))
SI,l=byDepth(ax,obsvar2,modvar2,(-3,3))
ax.set_title('Z2 Productivity (mg C m$^{-3}$ d$^{-1}$) By Depth',fontsize=12)
ax.legend(frameon=True,fontsize=12)
#fig.savefig('SaanichLog10ModelvsObsProductivity.jpg',bbox_inches='tight')

In [None]:
fig, ax = plt.subplots(1,1,figsize = (6,6))
with nc.Dataset('/ocean/ksuchy/MOAD/NEMO-forcing/grid/bathymetry_202108.nc') as grid:
    viz_tools.plot_coastline(ax, grid, coords = 'map',isobath=.1,color='darkgrey')
colors=('royalblue',
'green',
'orange',
'mediumspringgreen',
'black',
'darkviolet',
 'lightblue',
'fuchsia',
'firebrick','lime','darkgoldenrod','darkorange','deepskyblue','teal','darkgreen','darkblue','slateblue','purple')
datreg=dict()
for ind, iregion in enumerate(data.Station.unique()):
    datreg[iregion] = data.loc[data.Station==iregion]
    ax.plot(datreg[iregion]['Lon'], datreg[iregion]['Lat'],'o',
            color = colors[ind], label=iregion,markersize=4)
ax.set_ylim(47, 51)
ax.xaxis.set_tick_params(labelsize=14)
ax.yaxis.set_tick_params(labelsize=14)
ax.legend(bbox_to_anchor=(1.1, 1),frameon=False,markerscale=3.)
ax.set_xlim(-126, -121);
ax.set_title('Observation Locations');
#ax.legend(bbox_to_anchor=(1.05, 1), loc='upper left',frameon=False,markerscale=3.,fontsize=11)
ax.axis("off")
#fig.savefig('SalishSeaObservationLocations_noframe.jpg',bbox_inches='tight')

In [None]:
data['Month'] = data['dtUTC'].dt.month

In [None]:
def byRegion(ax,obsvar,modvar,lims):
    SS=[]
    for ind, iregion in enumerate(data.Station.unique()):
        #ax.plot(datreg[iregion]['Lon'], datreg[iregion]['Lat'],'.',
                #color = colors[ind], label=iregion)
        SS0=et.varvarPlot(ax,datreg[iregion],obsvar,modvar,
                          cols=(colors[ind],),lname=iregion)
        SS.append(SS0)
    l=ax.legend(handles=[ip[0][0] for ip in SS])
    ax.set_xlabel('Log10 Observations (mg C m$^{-3}$ d$^{-1}$)+0.001')
    ax.set_ylabel('Log10 Model (mg C m$^{-3}$ d$^{-1}$)+0.001')
    ax.plot(lims,lims,'k-',alpha=.5)
    ax.set_xlim(lims)
    ax.set_ylim(lims)
    ax.set_aspect(1)
    return SS,l

In [None]:
#data['Month']=[ii.month for ii in data['dtUTC']]
DJF=data.loc[(data.Month==12)|(data.Month==1)|(data.Month==2)]
MAM=data.loc[(data.Month==3)|(data.Month==4)|(data.Month==5)]
JJA=data.loc[(data.Month==6)|(data.Month==7)|(data.Month==8)]
SON=data.loc[(data.Month==9)|(data.Month==10)|(data.Month==11)]

In [None]:
def bySeason(ax,obsvar,modvar,lims):
    for axi in ax:
        axi.plot(lims,lims,'k-')
        axi.set_xlim(lims)
        axi.set_ylim(lims)
        axi.set_aspect(1)
        axi.set_xlabel('Obs')
        axi.set_ylabel('Model')
    SS=et.varvarPlot(ax[0],DJF,obsvar,modvar,cols=('crimson','darkturquoise','navy'))
    ax[0].set_title('Winter')
    SS=et.varvarPlot(ax[1],MAM,obsvar,modvar,cols=('crimson','darkturquoise','navy'))
    ax[1].set_title('Spring')
    SS=et.varvarPlot(ax[2],JJA,obsvar,modvar,cols=('crimson','darkturquoise','navy'))
    ax[2].set_title('Summer')
    SS=et.varvarPlot(ax[3],SON,obsvar,modvar,cols=('crimson','darkturquoise','navy'))
    ax[3].set_title('Autumn')
    return 

In [None]:
fig, ax = plt.subplots(1,1,figsize = (5,4))     
SS,l=byRegion(ax,'L10Productivity','L10mod_prod',(-3,3))
ax.set_title('Z2 Productivity - By Region',fontsize=18)
ax.legend(bbox_to_anchor=(1.1, 1.05),frameon=False,markerscale=2.5)
#fig.savefig('SalishSeaDIMicroZoopEval_byregion_noLegend.jpg',bbox_inches='tight')





fig, ax = plt.subplots(1,4,figsize = (16,3.3))
bySeason(ax,'L10Productivity','L10mod_prod',(-3,3))

In [None]:
### These groupings will be used to calculate statistics. The keys are labels and
### the values are corresponding dataframe views
statsubs=OrderedDict({
                      'All':data,
                      'Winter':DJF,
                      'Spring':MAM,
                      'Summer':JJA,
                      'Autumn': SON,})
for iregion in data.Station.unique():
    statsubs[iregion]=datreg[iregion]
statsubs.keys()

In [None]:
# Defining variables needed for mesozooplankton evaluations
obsvar4='L10Productivity'
modvar4='L10mod_prod'
year=2023 #how do I calculate for all years?



In [None]:
statsDict={year:dict()}
statsDict[year]['MicroZ']=OrderedDict()
for isub in statsubs:
    print(isub)
    statsDict[year]['MicroZ'][isub]=dict()
    var=statsDict[year]['MicroZ'][isub]
    var['N'],var['mmean'],var['omean'],var['Bias'],var['RMSE'],var['WSS']=et.stats(statsubs[isub].loc[:,[obsvar4]],
                                                                     statsubs[isub].loc[:,[modvar4]])
tbl,tdf=et.displayStats(statsDict[year]['MicroZ'],level='Subset',suborder=list(statsubs.keys()))
tbl

#tbl.to_excel("SalishSeaMicrozoopEvalStats.xlsx")

In [None]:
data_zeroes = data[data['mod_prod'] == 0]

In [None]:
data_zeroes