In [1]:
import numpy as np  # this module handles arrays, but here we need it for its NaN value
import pandas as pd # this module contains a lot of tools for handling tabular data
from matplotlib import pyplot as plt
from salishsea_tools import evaltools as et
import datetime as dt
import os
import gsw
import pickle
import netCDF4 as nc
import cmocean
from scipy import stats as spst
from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()
%matplotlib inline

## How CSV was created:

In [2]:
## load sources files, do conversions, make CSV

def subval(idf,colList):
    # first value in colList should be the column you are going to keep
    # follow with other columns that will be used to fill in when that column is NaN
    # in order of precedence
    if len(colList)==2:
        idf[colList[0]]=[r[colList[0]] if not pd.isna(r[colList[0]]) \
                         else  r[colList[1]] for i,r in idf.iterrows()]
    elif len(colList)==3:
        idf[colList[0]]=[r[colList[0]] if not pd.isna(r[colList[0]]) \
                         else  r[colList[1]] if not pd.isna(r[colList[1]]) \
                         else r[colList[2]] for i,r in idf.iterrows()]
    else:
        raise NotImplementedError('Add to code to handle this case')
    idf.drop(columns=list(colList[1:]),inplace=True)
    return idf

# define paths to the source files and eventual output file
flist=('/ocean/eolson/MEOPAR/obs/NemcekHPLC/bottlePhytoMerged2015_NewALLO.csv',
       '/ocean/eolson/MEOPAR/obs/NemcekHPLC/bottlePhytoMerged2016_NewALLO.csv',
       '/ocean/eolson/MEOPAR/obs/NemcekHPLC/bottlePhytoMerged2017_NewALLO.csv',
       '/ocean/eolson/MEOPAR/obs/NemcekHPLC/bottlePhytoMerged2018_NewALLO.csv',
       '/ocean/eolson/MEOPAR/obs/NemcekHPLC/bottlePhytoMerged2019.csv')

dfs=list()
for fname in flist:
    dfs.append(pd.read_csv(fname))
df=pd.concat(dfs,ignore_index=True,sort=False); # concatenate the list into a single table

df.drop(labels=['ADM:MISSION','ADM:PROJECT','ADM:SCIENTIST','Zone','Zone.1','Temperature:Draw',
                'Temperature:Draw [deg C (ITS90)]','Bottle:Firing_Sequence','Comments by sample_numbeR',
                'File Name','LOC:EVENT_NUMBER','Number_of_bin_records'
                   ],axis=1,inplace=True)

df=subval(df,('Chlorophyll:Extracted [mg/m^3]','Chlorophyll:Extracted'))
df=subval(df,('Fluorescence [mg/m^3]','Fluorescence:URU:Seapoint [mg/m^3]','Fluorescence:URU:Seapoint'))
df=subval(df,('Lat','LOC:LATITUDE'))
df=subval(df,('Lon','LOC:LONGITUDE'))
df=subval(df,('Nitrate_plus_Nitrite [umol/L]','Nitrate_plus_Nitrite'))
df=subval(df,('PAR [uE/m^2/sec]','PAR'))
df=subval(df,('Phaeo-Pigment:Extracted [mg/m^3]','Phaeo-Pigment:Extracted'))
df=subval(df,('Phosphate [umol/L]','Phosphate'))
df=subval(df,('Pressure [decibar]','Pressure'))
df=subval(df,('Salinity','Salinity [PSS-78]','Salinity:T1:C1 [PSS-78]'))
df=subval(df,('Salinity:Bottle','Salinity:Bottle [PSS-78]'))
df=subval(df,('Silicate [umol/L]','Silicate'))
df=subval(df,('Temperature','Temperature [deg C (ITS90)]','Temperature:Secondary [deg C (ITS90)]'))
df=subval(df,('Transmissivity [*/metre]','Transmissivity'))

df['Z']=np.where(pd.isna(df['Depth [metres]']),
                 -1*gsw.z_from_p(df['Pressure [decibar]'].values,df['Lat'].values),
                 df['Depth [metres]'])
df['p']=np.where(pd.isna(df['Pressure [decibar]']),
                 gsw.p_from_z(-1*df['Depth [metres]'].values,df['Lat'].values),
                 df['Pressure [decibar]'])
df['SA']=gsw.SA_from_SP(df['Salinity'].values,df['p'].values,df['Lon'].values,df['Lat'].values)
df['CT']=gsw.CT_from_t(df['SA'].values,df['Temperature'].values,df['p'].values)
df.rename({'TchlA':'TchlA (ug/L)','Raphido':'Raphidophytes','Dinoflagellates-1':'Dinoflagellates',
        'Dictyo':'Dictyochophytes'},axis=1, inplace=True, errors='raise')
df['dtUTC']=[dt.datetime.strptime(ii,'%Y-%m-%d %H:%M:%S') if isinstance(ii,str) else np.nan \
             for ii in df['FIL:START TIME YYYY/MM/DD HH:MM:SS'] ]
df.drop(columns=['FIL:START TIME YYYY/MM/DD HH:MM:SS', 'LOC:STATION','LOC:WATER DEPTH','Oxygen:Dissolved:CTD', 
                 'pH:SBE:Nominal', 'Salinity:Bottle','Flag:Salinity:Bottle', 'Flag:Chlorophyll:Extracted',
                 'Flag:Nitrate_plus_Nitrite', 'Flag:Silicate', 'Flag:Phosphate','Cruise', 'Oxygen:Dissolved', 
                 'Flag:Oxygen:Dissolved', 'Transmissivity [*/metre]','PAR [uE/m^2/sec]', 'PAR:Reference [uE/m^2/sec]',
                'Oxygen:Dissolved:SBE [mL/L]', 'Oxygen:Dissolved:SBE [umol/kg]','Temperature', 'Salinity', 
                'Phaeo-Pigment:Extracted [mg/m^3]','Oxygen:Dissolved [mL/L]', 'Oxygen:Dissolved [umol/kg]',
                 'Depth [metres]','Phosphate [umol/L]','Fluorescence [mg/m^3]','Oxygen:Dissolved:CTD [mL/L]', 
                 'Oxygen:Dissolved:CTD [umol/kg]','Alkalinity:Total [umol/L]','Flag:Alkalinity:Total', 
                 'Carbon:Dissolved:Inorganic [umol/kg]','Flag:Carbon:Dissolved:Inorganic', 'Bottle_Number', 
                 'Pressure [decibar]','Depth:Nominal [metres]','Conductivity [S/m]', 'LOC:ALTIMETER (M)', 'ADM:PLATFORM',
       'LOC:GEOGRAPHIC AREA', 'FIL:DATA DESCRIPTION',
       'Transmissivity:Green [*/metre]', 'Date', ],inplace=True)
df.dropna(how='any',subset=['dtUTC','Lat','Lon','Diatoms-1'],inplace=True)

       
# get model indices:
PATH= '/results2/SalishSea/nowcast-green.201905/'
flen=1
filemap={'nitrate':'ptrc_T','silicon':'ptrc_T','ammonium':'ptrc_T','diatoms':'ptrc_T','ciliates':'ptrc_T',
         'flagellates':'ptrc_T','vosaline':'grid_T','votemper':'grid_T'}
fdict={'ptrc_T':1,'grid_T':1}
namfmt='nowcast'

data=et.matchData(df,filemap,fdict,namfmt,PATH,flen)
df2=data.loc[:,['dtUTC','Lat', 'Lon','Z', 'p','i','j','k', 'Sample_Number','Diatoms-1', 'Diatoms-2',
       'Prasinophytes', 'Cryptophytes', 'Dinoflagellates', 'Haptophytes',
       'Dictyochophytes', 'Raphidophytes', 'Cyanobacteria', 'TchlA (ug/L)',
       'Chlorophyll:Extracted [mg/m^3]', 'Nitrate_plus_Nitrite [umol/L]',
       'Silicate [umol/L]', 'Fluorescence:URU [mg/m^3]', 'SA', 'CT' ]]
df2.to_csv('/data/eolson/results/MEOPAR/oldDBs/HPLCPhyto.csv',index=False)

In [3]:
df2

Unnamed: 0,dtUTC,Lat,Lon,Z,p,i,j,k,Sample_Number,Diatoms-1,...,Dictyochophytes,Raphidophytes,Cyanobacteria,TchlA (ug/L),Chlorophyll:Extracted [mg/m^3],Nitrate_plus_Nitrite [umol/L],Silicate [umol/L],Fluorescence:URU [mg/m^3],SA,CT
0,2015-04-01 23:42:17,48.65233,-123.50183,1.487321,1.5,207,354,1,9.0,23.58,...,0.0,0.163,0.0,26.162,29.36,4.63,19.43,,27.295384,10.045218
1,2015-04-02 03:24:50,48.63017,-123.24284,2.08225,2.1,242,331,2,23.0,0.272,...,0.0,0.0,0.0,0.439,0.67,23.23,41.81,,29.374328,9.201191
2,2015-04-02 15:23:46,48.49983,-124.73317,1.586497,1.6,7,413,1,37.0,2.033,...,0.005,0.023,0.004,3.129,3.43,5.7,9.85,,29.643997,10.547005
3,2015-04-02 18:19:18,48.469,-124.5485,1.883969,1.9,31,394,1,51.0,2.722,...,0.014,0.033,0.01,4.219,4.69,15.58,24.48,,30.492932,9.644676
4,2015-04-02 23:03:35,48.30817,-124.0665,1.685682,1.7,82,328,1,64.0,3.341,...,0.002,0.022,0.0,6.385,7.15,1.17,3.43,,29.434686,11.089943
5,2015-04-03 02:09:56,48.26083,-123.72017,1.68569,1.7,127,294,1,76.0,5.729,...,0.0,0.0,0.0,9.188,11.13,,,,30.092338,10.37586
6,2015-04-03 05:34:12,48.23283,-123.301,1.586536,1.6,185,259,1,86.0,4.217,...,0.024,0.0,0.0,6.846,7.95,13.25,20.29,,30.367032,10.207919
7,2015-04-03 08:46:05,48.26483,-123.163,1.784847,1.8,209,255,1,96.0,4.665,...,0.006,0.0,0.0,7.52,8.79,13.59,19.52,,30.455359,9.832826
8,2015-04-03 10:34:51,48.24317,-122.97517,1.983166,2.0,234,238,1,109.0,1.254,...,0.018,0.006,0.0,1.993,2.56,19.82,36.45,,30.019539,9.968907
9,2015-04-03 11:55:44,48.38,-123.0435,1.983141,2.0,240,269,1,120.0,0.29,...,0.01,0.0,0.001,0.843,1.17,21.23,37.91,,30.199268,9.437491


# Load csv:

In [2]:
df=pd.read_csv('/data/eolson/results/MEOPAR/oldDBs/HPLCPhyto.csv')

In [3]:
df=pd.read_csv('/data/eolson/results/MEOPAR/oldDBs/HPLCPhyto.csv',dtype={'dtUTC':str})
# convert dtUTC back to datetime
df['dtUTC']=[dt.datetime.strptime(ii,'%Y-%m-%d %H:%M:%S') for ii in df['dtUTC']]
df.head()

Unnamed: 0,dtUTC,Lat,Lon,Z,p,Sample_Number,Bottle_Number,Diatoms-1,Diatoms-2,Prasinophytes,...,Dictyochophytes,Raphidophytes,Cyanobacteria,TchlA (ug/L),Chlorophyll:Extracted [mg/m^3],Nitrate_plus_Nitrite [umol/L],Silicate [umol/L],Fluorescence:URU [mg/m^3],SA,CT
0,2015-04-01 23:42:17,48.65233,-123.50183,102.104201,103.0,1.0,,,,,...,,,,,,25.36,62.27,,30.96735,9.275387
1,2015-04-01 23:42:17,48.65233,-123.50183,75.740427,76.4,2.0,,,,,...,,,,,,25.62,49.24,,30.175039,8.994115
2,2015-04-01 23:42:17,48.65233,-123.50183,51.256737,51.7,3.0,,,,,...,,,,,,22.46,40.4,,29.820448,9.000667
3,2015-04-01 23:42:17,48.65233,-123.50183,41.046072,41.4,4.0,,,,,...,,,,,,22.64,41.05,,29.700002,9.003451
4,2015-04-01 23:42:17,48.65233,-123.50183,31.330597,31.6,5.0,,,,,...,,,,,,22.48,41.23,,29.535452,9.044346


## Match data

In [6]:
# path to model files:
PATH= '/results/SalishSea/nowcast-green.201812/'

# start and end dates for analysis:
start_date = dt.datetime(2015,4,1)
end_date = dt.datetime(2015,5,1)

# number of days per model file:
flen=1

# dictionary mapping desired model variables to the file types where they are found
filemap={'nitrate':'ptrc_T','silicon':'ptrc_T','ammonium':'ptrc_T','diatoms':'ptrc_T','ciliates':'ptrc_T',
         'flagellates':'ptrc_T','vosaline':'grid_T','votemper':'grid_T'}

# dictionary mapping model file types to their time resolution in hours (1 is hourly files, 24 is daily)
fdict={'ptrc_T':1,'grid_T':1}

# results format
# -- nowcast: files like 01jan15/SalishSea_1h_20150101_20150101_ptrc_T.nc
# -- long: files like SalishSea_1h_20150206_20150804_ptrc_T_20150427-20150506.nc, all in one directory
namfmt='nowcast'

data=et.matchData(df,filemap,fdict,start_date,end_date,namfmt,PATH,flen)

In [5]:
data

Unnamed: 0,dtUTC,Lat,Lon,Z,p,Sample_Number,Bottle_Number,Diatoms-1,Diatoms-2,Prasinophytes,...,i,mod_nitrate,mod_silicon,mod_ammonium,mod_diatoms,mod_ciliates,mod_flagellates,mod_vosaline,mod_votemper,k
