## make Stats Table and print to csv or LaTeX file

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import os
import pandas as pd
import netCDF4 as nc
import datetime as dt
from salishsea_tools import evaltools as et
import pickle
%matplotlib inline

In [2]:
chlToN=2.0

In [3]:
data=dict()
data['DFOBio']=pickle.load(open('/data/eolson/MEOPAR/SS36runs/calcFiles/evalMatches/dataDFO.pkl','rb'))
data['HakaiBio']=pickle.load(open('/data/eolson/MEOPAR/SS36runs/calcFiles/evalMatches/dataHakai.pkl','rb'))
data['PSFBio']=pickle.load(open('/data/eolson/MEOPAR/SS36runs/calcFiles/evalMatches/dataPSF.pkl','rb'))
data['DFOPhys']=pickle.load(open('/data/eolson/MEOPAR/SS36runs/calcFiles/evalMatches/dataDFOPhys.pkl','rb'))
data['HakaiPhys']=pickle.load(open('/data/eolson/MEOPAR/SS36runs/calcFiles/evalMatches/dataHakaiPhys.pkl','rb'))
data['PSFPhys']=pickle.load(open('/data/eolson/MEOPAR/SS36runs/calcFiles/evalMatches/dataPSFPhys.pkl','rb'))

In [4]:
for el in data.keys():
    print(el)
    print(set(data[el].keys())-set(('Year','Month','Day','Hour','Lat','Lon','i','j','Pressure','Depth','Z','dtUTC','pZ')))
    print(' ')

DFOBio
{'Ammonium_units', 'mod_ciliates', 'mod_ammonium', 'mod_silicon', 'mod_vosaline', 'Silicate_units', 'Ammonium', 'AbsSal', 'Chlorophyll_Extracted_units', 'mod_nitrate', 'mod_flagellates', 'mod_votemper', 'N', 'Chlorophyll_Extracted', 'Si', 'ConsT', 'mod_diatoms'}
 
HakaiBio
{'Station', 'NO2+NO3 (uM)', 'mod_ciliates', 'mod_ammonium', 'mod_silicon', 'mod_vosaline', 'CT', 'mod_nitrate', 'mod_flagellates', 'mod_votemper', 'SiO2', 'SA', 'mod_diatoms', 'PO4'}
 
PSFBio
{'Depth_m', 'Station', 'mod_ciliates', 'mod_ammonium', 'mod_silicon', 'mod_vosaline', 'Phaeo', 'NO23', 'mod_nitrate', 'mod_flagellates', 'Chl', 'mod_votemper', 'Si', 'mod_diatoms', 'PO4'}
 
DFOPhys
{'Ammonium_units', 'mod_ciliates', 'mod_ammonium', 'mod_silicon', 'mod_vosaline', 'Silicate_units', 'Ammonium', 'AbsSal', 'Chlorophyll_Extracted_units', 'mod_nitrate', 'mod_flagellates', 'mod_votemper', 'N', 'Chlorophyll_Extracted', 'Si', 'ConsT', 'mod_diatoms'}
 
HakaiPhys
{'Station', 'mod_vosaline', 'CT', 'AbsSal', 'mod_votem

In [5]:
data['DFOBio']['Chl']=data['DFOBio']['Chlorophyll_Extracted']
data['DFOBio']['mod_chl']=chlToN*(data['DFOBio']['mod_diatoms']+data['DFOBio']['mod_ciliates']+data['DFOBio']['mod_flagellates'])
data['HakaiBio']['N']=data['HakaiBio']['NO2+NO3 (uM)']
data['HakaiBio']['Si']=data['HakaiBio']['SiO2']
data['PSFBio']['N']=data['PSFBio']['NO23']
data['PSFBio']['mod_chl']=chlToN*(data['PSFBio']['mod_diatoms']+data['PSFBio']['mod_ciliates']+data['PSFBio']['mod_flagellates'])
data['DFOPhys']['SA']=data['DFOPhys']['AbsSal']
data['DFOPhys']['CT']=data['DFOPhys']['ConsT']
data['PSFPhys']['CT']=data['PSFPhys']['ConsT']

In [6]:
lines=list()
lines.append(r'   \small'+'\n')
lines.append(r'   \centering'+'\n')
lines.append(r'   \begin{tabular}{rrrrrrcrrrrrcrrrrr}\toprule'+'\n')
lines.append(r'     & \multicolumn{5}{c}{DFO} & \phantom{abc} &\multicolumn{5}{c}{PSF} & \phantom{abc}  & \multicolumn{5}{c}{Hakai}  \\'+'\n')
lines.append(r'    \cmidrule{2-6} \cmidrule{8-12} \cmidrule{14-18}'+'\n')
lines.append(r'      & Mod{} Mean & Bias & RMSE & WSS & N & & Mod{} Mean & Bias & RMSE & WSS & N & & Mod{} Mean & Bias & RMSE & WSS & N \\'+'\n')
lines.append(r'    \addlinespace[.1cm]'+'\n')

In [7]:
zsurf=15

In [8]:
#NO3
units='$\mu$M N'
lines.append(r'    \multicolumn{1}{l}{\textbf{NO$_3$}} & \multicolumn{1}{c}{'+units+r'} &\multicolumn{1}{c}{'+units+r'} & \multicolumn{1}{c}{'+units+r'} & & & & \multicolumn{1}{c}{'+units+r'} & \multicolumn{1}{c}{'+units+r'} & \multicolumn{1}{c}{'+units+r'} & & & & \multicolumn{1}{c}{'+units+r'} & \multicolumn{1}{c}{'+units+r'} & \multicolumn{1}{c}{'+units+r'} & & \\ '+'\n')
ovar='N'
mvar='mod_nitrate'
st=dict()
for src in ('DFO','PSF','Hakai'):
    st[src]=dict()
    d=data[src+'Bio']
    st[src]['all']=et.stats(d[ovar],d[mvar])
    ii=(d['Z']<=zsurf)
    st[src]['surf']=et.stats(d.loc[ii,[ovar]],d.loc[ii,[mvar]])
    ii=(d['dtUTC']>=dt.datetime(2015,1,1))&(d['dtUTC']<dt.datetime(2016,1,1))
    st[src]['2015']=et.stats(d.loc[ii,[ovar]],d.loc[ii,[mvar]])
    ii=(d['dtUTC']>=dt.datetime(2016,1,1))&(d['dtUTC']<dt.datetime(2017,1,1))
    st[src]['2016']=et.stats(d.loc[ii,[ovar]],d.loc[ii,[mvar]])
    ii=(d['dtUTC']>=dt.datetime(2017,1,1))&(d['dtUTC']<dt.datetime(2018,1,1))
    st[src]['2017']=et.stats(d.loc[ii,[ovar]],d.loc[ii,[mvar]])
subsets={'all':'all','surf':'z $\leq$ '+str(zsurf)+' m','2015':'2015','2016':'2016','2017':'2017'}

for ss in ('all','surf','2015','2016','2017'):
    lines.append(r'     {}        & {:.2f} & {:.2f} & {:.2f} & {:.2f} & {:d} && {:.2f} & {:.2f} & {:.2f} & {:.2f} & {:d} && {:.2f} & {:.2f} & {:.2f} & {:.2f} & {:d} \\'.format(
        subsets[ss],st['DFO'][ss][1],st['DFO'][ss][3],st['DFO'][ss][4],st['DFO'][ss][5],st['DFO'][ss][0],
                    st['PSF'][ss][1],st['PSF'][ss][3],st['PSF'][ss][4],st['PSF'][ss][5],st['PSF'][ss][0],
                    st['Hakai'][ss][1],st['Hakai'][ss][3],st['Hakai'][ss][4],st['Hakai'][ss][5],st['Hakai'][ss][0]))
lines.append(r'    \addlinespace[.1cm]'+'\n')

In [9]:
#Si
units='$\mu$M N'
lines.append(r'    \multicolumn{1}{l}{\textbf{dSi}} & \multicolumn{1}{c}{'+units+r'} &\multicolumn{1}{c}{'+units+r'} & \multicolumn{1}{c}{'+units+r'} & & & & \multicolumn{1}{c}{'+units+r'} & \multicolumn{1}{c}{'+units+r'} & \multicolumn{1}{c}{'+units+r'} & & & & \multicolumn{1}{c}{'+units+r'} & \multicolumn{1}{c}{'+units+r'} & \multicolumn{1}{c}{'+units+r'} & & \\ '+'\n')
ovar='Si'
mvar='mod_silicon'
st=dict()
for src in ('DFO','PSF','Hakai'):
    st[src]=dict()
    d=data[src+'Bio']
    st[src]['all']=et.stats(d[ovar],d[mvar])
    ii=(d['Z']<=zsurf)
    st[src]['surf']=et.stats(d.loc[ii,[ovar]],d.loc[ii,[mvar]])
    ii=(d['dtUTC']>=dt.datetime(2015,1,1))&(d['dtUTC']<dt.datetime(2016,1,1))
    st[src]['2015']=et.stats(d.loc[ii,[ovar]],d.loc[ii,[mvar]])
    ii=(d['dtUTC']>=dt.datetime(2016,1,1))&(d['dtUTC']<dt.datetime(2017,1,1))
    st[src]['2016']=et.stats(d.loc[ii,[ovar]],d.loc[ii,[mvar]])
    ii=(d['dtUTC']>=dt.datetime(2017,1,1))&(d['dtUTC']<dt.datetime(2018,1,1))
    st[src]['2017']=et.stats(d.loc[ii,[ovar]],d.loc[ii,[mvar]])

for ss in ('all','surf','2015','2016','2017'):
    lines.append(r'     {}        & {:.2f} & {:.2f} & {:.2f} & {:.2f} & {:d} && {:.2f} & {:.2f} & {:.2f} & {:.2f} & {:d} && {:.2f} & {:.2f} & {:.2f} & {:.2f} & {:d} \\'.format(
        subsets[ss],st['DFO'][ss][1],st['DFO'][ss][3],st['DFO'][ss][4],st['DFO'][ss][5],st['DFO'][ss][0],
                    st['PSF'][ss][1],st['PSF'][ss][3],st['PSF'][ss][4],st['PSF'][ss][5],st['PSF'][ss][0],
                    st['Hakai'][ss][1],st['Hakai'][ss][3],st['Hakai'][ss][4],st['Hakai'][ss][5],st['Hakai'][ss][0]))
lines.append(r'    \addlinespace[.1cm]'+'\n')

In [10]:
#Chl
units='$\mu$g L$^{-1}$'
lines.append(r'    \multicolumn{1}{l}{\textbf{Chl}} & \multicolumn{1}{c}{'+units+r'} &\multicolumn{1}{c}{'+units+r'} & \multicolumn{1}{c}{'+units+r'} & & & & \multicolumn{1}{c}{'+units+r'} & \multicolumn{1}{c}{'+units+r'} & \multicolumn{1}{c}{'+units+r'} & & & & \multicolumn{1}{c}{'+units+r'} & \multicolumn{1}{c}{'+units+r'} & \multicolumn{1}{c}{'+units+r'} & & \\ '+'\n')
ovar='Chl'
mvar='mod_chl'
st=dict()
for src in ('DFO','PSF'):
    st[src]=dict()
    d=data[src+'Bio']
    st[src]['all']=et.stats(d[ovar],d[mvar])
    ii=(d['Z']<=zsurf)
    st[src]['surf']=et.stats(d.loc[ii,[ovar]],d.loc[ii,[mvar]])
    ii=(d['dtUTC']>=dt.datetime(2015,1,1))&(d['dtUTC']<dt.datetime(2016,1,1))
    st[src]['2015']=et.stats(d.loc[ii,[ovar]],d.loc[ii,[mvar]])
    ii=(d['dtUTC']>=dt.datetime(2016,1,1))&(d['dtUTC']<dt.datetime(2017,1,1))
    st[src]['2016']=et.stats(d.loc[ii,[ovar]],d.loc[ii,[mvar]])
    ii=(d['dtUTC']>=dt.datetime(2017,1,1))&(d['dtUTC']<dt.datetime(2018,1,1))
    st[src]['2017']=et.stats(d.loc[ii,[ovar]],d.loc[ii,[mvar]])

for ss in ('all','surf','2015','2016','2017'):
    lines.append(r'     {}        & {:.2f} & {:.2f} & {:.2f} & {:.2f} & {:d} && {:.2f} & {:.2f} & {:.2f} & {:.2f} & {:d} && {} & {} & {} & {} & {} \\'.format(
        subsets[ss],st['DFO'][ss][1],st['DFO'][ss][3],st['DFO'][ss][4],st['DFO'][ss][5],st['DFO'][ss][0],
                    st['PSF'][ss][1],st['PSF'][ss][3],st['PSF'][ss][4],st['PSF'][ss][5],st['PSF'][ss][0],
                    ' ',' ',' ',' ',' '))
lines.append(r'    \addlinespace[.1cm]'+'\n')

In [11]:
#T
units='$^{\circ}$C'
lines.append(r'    \multicolumn{1}{l}{\textbf{$\Theta$}} & \multicolumn{1}{c}{'+units+r'} &\multicolumn{1}{c}{'+units+r'} & \multicolumn{1}{c}{'+units+r'} & & & & \multicolumn{1}{c}{'+units+r'} & \multicolumn{1}{c}{'+units+r'} & \multicolumn{1}{c}{'+units+r'} & & & & \multicolumn{1}{c}{'+units+r'} & \multicolumn{1}{c}{'+units+r'} & \multicolumn{1}{c}{'+units+r'} & & \\ '+'\n')
ovar='CT'
mvar='mod_votemper'
st=dict()
for src in ('DFO','PSF','Hakai'):
    st[src]=dict()
    d=data[src+'Phys']
    st[src]['all']=et.stats(d[ovar],d[mvar])
    ii=(d['Z']<=zsurf)
    st[src]['surf']=et.stats(d.loc[ii,[ovar]],d.loc[ii,[mvar]])
    ii=(d['dtUTC']>=dt.datetime(2015,1,1))&(d['dtUTC']<dt.datetime(2016,1,1))
    st[src]['2015']=et.stats(d.loc[ii,[ovar]],d.loc[ii,[mvar]])
    ii=(d['dtUTC']>=dt.datetime(2016,1,1))&(d['dtUTC']<dt.datetime(2017,1,1))
    st[src]['2016']=et.stats(d.loc[ii,[ovar]],d.loc[ii,[mvar]])
    ii=(d['dtUTC']>=dt.datetime(2017,1,1))&(d['dtUTC']<dt.datetime(2018,1,1))
    st[src]['2017']=et.stats(d.loc[ii,[ovar]],d.loc[ii,[mvar]])

for ss in ('all','surf','2015','2016','2017'):
    lines.append(r'     {}        & {:.2f} & {:.2f} & {:.2f} & {:.2f} & {:d} && {:.2f} & {:.2f} & {:.2f} & {:.2f} & {:d} && {:.2f} & {:.2f} & {:.2f} & {:.2f} & {:d} \\'.format(
        subsets[ss],st['DFO'][ss][1],st['DFO'][ss][3],st['DFO'][ss][4],st['DFO'][ss][5],st['DFO'][ss][0],
                    st['PSF'][ss][1],st['PSF'][ss][3],st['PSF'][ss][4],st['PSF'][ss][5],st['PSF'][ss][0],
                    st['Hakai'][ss][1],st['Hakai'][ss][3],st['Hakai'][ss][4],st['Hakai'][ss][5],st['Hakai'][ss][0]))
lines.append(r'    \addlinespace[.1cm]'+'\n')

In [12]:
#S
units='g kg$^{-1}$'
lines.append(r'    \multicolumn{1}{l}{\textbf{S$_A$}} & \multicolumn{1}{c}{'+units+r'} &\multicolumn{1}{c}{'+units+r'} & \multicolumn{1}{c}{'+units+r'} & & & & \multicolumn{1}{c}{'+units+r'} & \multicolumn{1}{c}{'+units+r'} & \multicolumn{1}{c}{'+units+r'} & & & & \multicolumn{1}{c}{'+units+r'} & \multicolumn{1}{c}{'+units+r'} & \multicolumn{1}{c}{'+units+r'} & & \\ '+'\n')
ovar='SA'
mvar='mod_vosaline'
st=dict()
for src in ('DFO','PSF','Hakai'):
    st[src]=dict()
    d=data[src+'Phys']
    st[src]['all']=et.stats(d[ovar],d[mvar])
    ii=(d['Z']<=zsurf)
    st[src]['surf']=et.stats(d.loc[ii,[ovar]],d.loc[ii,[mvar]])
    ii=(d['dtUTC']>=dt.datetime(2015,1,1))&(d['dtUTC']<dt.datetime(2016,1,1))
    st[src]['2015']=et.stats(d.loc[ii,[ovar]],d.loc[ii,[mvar]])
    ii=(d['dtUTC']>=dt.datetime(2016,1,1))&(d['dtUTC']<dt.datetime(2017,1,1))
    st[src]['2016']=et.stats(d.loc[ii,[ovar]],d.loc[ii,[mvar]])
    ii=(d['dtUTC']>=dt.datetime(2017,1,1))&(d['dtUTC']<dt.datetime(2018,1,1))
    st[src]['2017']=et.stats(d.loc[ii,[ovar]],d.loc[ii,[mvar]])

for ss in ('all','surf','2015','2016','2017'):
    lines.append(r'     {}        & {:.2f} & {:.2f} & {:.2f} & {:.2f} & {:d} && {:.2f} & {:.2f} & {:.2f} & {:.2f} & {:d} && {:.2f} & {:.2f} & {:.2f} & {:.2f} & {:d} \\'.format(
        subsets[ss],st['DFO'][ss][1],st['DFO'][ss][3],st['DFO'][ss][4],st['DFO'][ss][5],st['DFO'][ss][0],
                    st['PSF'][ss][1],st['PSF'][ss][3],st['PSF'][ss][4],st['PSF'][ss][5],st['PSF'][ss][0],
                    st['Hakai'][ss][1],st['Hakai'][ss][3],st['Hakai'][ss][4],st['Hakai'][ss][5],st['Hakai'][ss][0]))
lines.append(r'    \addlinespace[.1cm]'+'\n')

In [13]:
lines.append(r'   \midrule'+'\n')
lines.append(r'   \end{tabular}'+'\n')

In [14]:
f = open('/data/eolson/results/MEOPAR/biomodelevalpaper/newstatsTBL.tex', 'w')
f.writelines(lines)
f.close()