In [1]:
import datetime as dt
#from salishsea_tools import evaltools
from salishsea_tools import geo_tools
import glob
import pandas as pd
import numpy as np
import pytz
import netCDF4 as nc

In [2]:
nutrients_2015 = pd.read_csv(
    '/ocean/eolson/MEOPAR/obs/PSFCitSci/PSFbottledata2015_CN_edits_EOCor2.csv')

In [3]:
nutrients_2015.head(20)

Unnamed: 0,station,num,depth,date,Time,lat,lon,no23,si,po4,flagged
0,CBE2,317,2,26-01-2015,,48.736667,-123.571667,7.47,89.02,0.488,False
1,CBE2,319,20,26-01-2015,,48.736667,-123.571667,25.69,51.65,1.993,False
2,CBW2,375,20,26-01-2015,,48.748333,-123.621667,26.59,50.57,2.105,False
3,CBW2,376,20,26-01-2015,,48.748333,-123.621667,34.11,56.14,2.548,False
4,VC1,229,2,26-01-2015,,48.461667,-123.18,27.23,46.47,2.098,False
5,VC1,231,20,26-01-2015,,48.461667,-123.18,25.7,42.58,2.042,False
6,VC3,235,2,26-01-2015,,48.333333,-123.3,24.88,41.28,1.984,False
7,VC3,236,20,26-01-2015,,48.333333,-123.3,24.63,41.57,1.952,False
8,CBC2,313,2,27-01-2015,,48.748333,-123.605,6.64,81.18,0.439,False
9,CBC2,315,20,27-01-2015,,48.748333,-123.605,25.78,51.19,2.144,False


In [4]:
data=nutrients_2015.loc[pd.notnull(nutrients_2015['date'])&
                        pd.notnull(nutrients_2015['Time'])&
                        pd.notnull(nutrients_2015['lat'])&
                        pd.notnull(nutrients_2015['lon'])].copy(deep=True)
data['Lat']=data['lat']
data['Lon']=data['lon']
data['Z']=data['depth']

In [5]:
ts=data['Time'].values
ds=data['date'].values
dts=[pytz.timezone('Canada/Pacific').localize(dt.datetime.strptime(ii+' '+jj,'%d-%m-%Y %I:%M:%S %p')).astimezone(pytz.utc).replace(tzinfo=None)
    for ii,jj in zip(ds,ts)]

In [6]:
data['dtUTC']=dts

In [7]:
data

Unnamed: 0,station,num,depth,date,Time,lat,lon,no23,si,po4,flagged,Lat,Lon,Z,dtUTC
12,CBC1,164,2,02-04-2015,10:29:00 AM,48.740000,-123.606667,7.70,99.79,0.629,False,48.740000,-123.606667,2,2015-04-02 17:29:00
13,CBC1,166,20,02-04-2015,10:29:00 AM,48.740000,-123.606667,21.66,40.24,1.985,False,48.740000,-123.606667,20,2015-04-02 17:29:00
14,CBC2,176,2,02-04-2015,10:59:00 AM,48.748333,-123.605000,5.65,82.72,0.474,False,48.748333,-123.605000,2,2015-04-02 17:59:00
15,CBC2,178,20,02-04-2015,10:59:00 AM,48.748333,-123.605000,22.25,42.60,1.922,False,48.748333,-123.605000,20,2015-04-02 17:59:00
16,CBE2,168,2,02-04-2015,9:30:00 AM,48.736667,-123.571667,11.43,64.98,0.987,False,48.736667,-123.571667,2,2015-04-02 16:30:00
17,CBE2,170,20,02-04-2015,9:30:00 AM,48.736667,-123.571667,21.60,41.90,1.975,False,48.736667,-123.571667,20,2015-04-02 16:30:00
18,CBW2,172,20,02-04-2015,12:00:00 PM,48.748333,-123.621667,21.61,44.70,2.082,False,48.748333,-123.621667,20,2015-04-02 19:00:00
19,CBW2,175,2,02-04-2015,12:00:00 PM,48.748333,-123.621667,4.11,96.06,0.299,False,48.748333,-123.621667,2,2015-04-02 19:00:00
20,SN-001,161,2,02-04-2015,8:40:00 AM,48.768330,-123.573333,20.47,39.00,1.882,False,48.768330,-123.573333,2,2015-04-02 15:40:00
21,SN-001,162,20,02-04-2015,8:40:00 AM,48.768330,-123.573333,21.26,40.26,1.886,False,48.768330,-123.573333,20,2015-04-02 15:40:00


In [8]:
meshPath='/ocean/eolson/MEOPAR/NEMO-forcing/grid/mesh_mask201702_noLPE.nc'
start=dt.datetime(2015,2,6)
end=dt.datetime(2015,3,18)

In [9]:
data=data.loc[(data.dtUTC>=start)&(data.dtUTC<end)].copy(deep=True)

In [10]:
data['j']=np.zeros((len(data))).astype(int)
data['i']=np.zeros((len(data))).astype(int)

In [11]:
with nc.Dataset(meshPath) as fmesh:
    lmask=-1*(fmesh.variables['tmask'][0,0,:,:]-1)
    for la,lo in np.unique(data.loc[:,['Lat','Lon']].values,axis=0):
        jj, ii = geo_tools.find_closest_model_point(lo, la, fmesh.variables['nav_lon'], fmesh.variables['nav_lat'], 
                                                    land_mask = lmask)
        data.loc[(data.Lat==la)&(data.Lon==lo),['j','i']]=jj,ii

In [12]:
data=data.sort_values(by=['dtUTC','Lat','Lon','depth'])
data.reset_index(drop=True,inplace=True)

In [13]:
def index_model_files(start,end,basedir,nam_fmt,flen,ftype,tres):
    """
    See inputs for matchData above.
    outputs pandas dataframe containing columns 'paths','t_0', and 't_1'
    """
    if ftype not in ('ptrc_T','grid_T','grid_W','grid_U','grid_V','dia1_T'):
        print('ftype={}, are you sure? (if yes, add to list)'.format(ftype))
    if tres==24:
        ftres='1d'
    else:
        ftres=str(int(tres))+'h'
    ffmt='%Y%m%d'
    dfmt='%d%b%y'
    if nam_fmt=='nowcast':
        stencil='{0}/SalishSea_'+ftres+'_{1}_{1}_'+ftype+'.nc'
    elif nam_fmt=='long':
       stencil='**/SalishSea_'+ftres+'*'+ftype+'_{1}-{2}.nc'
    else:
        raise Exception('nam_fmt '+nam_fmt+' is not defined')
    iits=start
    ind=0
    inds=list()
    paths=list()
    t_0=list()
    t_n=list()
    while iits<=end:
        iite=iits+dt.timedelta(days=(flen-1))
        iitn=iits+dt.timedelta(days=flen)
        try:
            iifstr=glob.glob(basedir+stencil.format(iits.strftime(dfmt).lower(),iits.strftime(ffmt),iite.strftime(ffmt)),recursive=True)[0]
        except:
            print('file does not exist:  '+basedir+stencil.format(iits.strftime(dfmt).lower(),iits.strftime(ffmt),iite.strftime(ffmt)))
            raise
        inds.append(ind)
        paths.append(iifstr)
        t_0.append(iits)
        t_n.append(iitn)
        iits=iitn
        ind=ind+1
    return pd.DataFrame(data=np.swapaxes([paths,t_0,t_n],0,1),index=inds,columns=['paths','t_0','t_n'])

In [14]:
basedir='/data/eolson/MEOPAR/SS36runs/CedarRuns/spring2015_A3Base/'
nam_fmt='long'
flen=10

#map each variable to extract from model to a data variable name and to a file type
varmap={'no23':'nitrate','si':'silicon'}
filemap={'nitrate':'ptrc_T','silicon':'ptrc_T'}
fdict={'ptrc_T':1,'grid_T':1}

In [15]:
# set up columns to accept model values
for ivar in varmap.values():
    data['mod_'+ivar]=np.zeros((len(data)))

In [16]:
# check that entries are minimal and consistent:
fkeysVar=list(filemap.keys())
for ikey in fkeysVar:
    if ikey not in set(varmap.values()):
        filemap.pop(ikey) 
if len(set(varmap.values())-set(filemap.keys()))>0:
    print('Error: file(s) missing from filemap:',set(varmap.values())-set(filemap.keys()))
fkeysVar=list(filemap.keys())
ftypes=list(fdict.keys())
for ikey in ftypes:
    if ikey not in set(filemap.values()):
        fdict.pop(ikey) 
if len(set(filemap.values())-set(fdict.keys()))>0:
    print('Error: file(s) missing from fdict:',set(filemap.values())-set(fdict.keys()))
ftypes=list(fdict.keys()) 

In [17]:
# reverse filemap dict
filemap_r=dict()
for ift in ftypes:
    filemap_r[ift]=list()
for ikey in filemap:
    filemap_r[filemap[ikey]].append(ikey)
print(filemap_r)

{'ptrc_T': ['nitrate', 'silicon']}


In [18]:
flist=dict()
for ift in ftypes:
    flist[ift]=index_model_files(start,end,basedir,nam_fmt,flen,ift,fdict[ift])

file does not exist:  /data/eolson/MEOPAR/SS36runs/CedarRuns/spring2015_A3Base/**/SalishSea_1h*ptrc_T_20150206-20150215.nc


IndexError: list index out of range

In [None]:
flist[ftypes[0]]

In [None]:
def nextfile(ift,idt,ifind,fid,fend):
    if ift in fid.keys():
        fid[ift].close()
    frow=flist[ift].loc[(ifind.t_0<=idt)&(ifind.t_n>idt)]
    fid[ift]=nc.Dataset(frow['paths'].values[0])
    fend[ift]=frow['t_n'].values[0]
    return fid, fend

In [None]:
def getTimeInd(idt,ifid,torig):
    tlist=ifid.variables['time_centered_bounds'][:,:]
    ih=[iii for iii,hhh in enumerate(tlist) if hhh[1]>(idt-torig).total_seconds()][0] # return first index where latter endpoint is larger
    return ih

In [None]:
def getZInd(idt,ifid):
    tlist=ifid.variables['deptht_bounds'][:,:]
    ih=[iii for iii,hhh in enumerate(tlist) if hhh[1]>idt][0] # return first index where latter endpoint is larger
    return ih

In [None]:
with nc.Dataset(meshPath) as fmesh:
    tmask=np.copy(fmesh.variables['tmask'][:,:,:,:])

In [None]:
tmask[0,1,701,168]

In [None]:
tmask[0,0,0,0]

In [None]:
# loop through data, openening and closing model files as needed and storing model data
for ind, row in data.iterrows():
    print('idt',row['dtUTC'])
    if ind==0: # load first files
        fid=dict()
        fend=dict()
        for ift in ftypes:
            fid,fend=nextfile(ift,row['dtUTC'],flist[ift],fid,fend)
        torig=dt.datetime.strptime(fid[ftypes[0]].variables['time_centered'].time_origin,'%Y-%m-%d %H:%M:%S') # assumes same for all files in run
    for ift in ftypes:
        if row['dtUTC']>=fend[ift]:
            fid,fend=nextfile(ift,row['dtUTC'],flist[ift],fid,fend)
        # now read data
        # find time index
        ih=getTimeInd(row['dtUTC'],fid[ift],torig)
        # find depth index
        ik=getZInd(row['Z'],fid[ift])
        # assign values for each var assoc with ift
        for ivar in filemap_r[ift]:
            print(ih,ik,row['j'],row['i'],ivar,fid[ift].variables[ivar][ih,ik,row['j'],row['i']])
            data.loc[ind,['mod_'+ivar]]=fid[ift].variables[ivar][ih,ik,row['j'],row['i']]

In [None]:
data

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline
plt.pcolormesh(tmask[0,0,:,:])
plt.plot(data['i'],data['j'],'r.')

In [None]:
plt.plot(data['no23'],data['mod_nitrate'],'r.')
plt.plot(np.arange(0,30),np.arange(0,30),'k-')