In [1]:
import datetime as dt
import numpy as np
import netCDF4 as nc
import pandas as pd
import glob
from salishsea_tools import geo_tools
import gsw
import os
import pytz
import matplotlib.pyplot as plt
import cmocean as cmo
import warnings
from sqlalchemy import create_engine, case, MetaData
from sqlalchemy.orm import create_session, aliased
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.sql import and_, or_, not_, func
from salishsea_tools import viz_tools

%matplotlib inline

In [2]:
basedir='/ocean/shared/SalishSeaCastData/DFO/CTD/'
dbname='DFO_CTD.sqlite'
datelims=()

In [3]:
engine = create_engine('sqlite:///' + basedir + dbname, echo = False)

In [5]:
md=MetaData()
md.reflect(engine)
Base = automap_base(metadata=md)
# reflect the tables in salish.sqlite:
Base.prepare()
# mapped classes have been created
# existing tables:
StationTBL=Base.classes.StationTBL
ObsTBL=Base.classes.ObsTBL
#CalcsTBL=Base.classes.CalcsTBL
#JDFLocsTBL=Base.classes.JDFLocsTBL
session = create_session(bind = engine, autocommit = False, autoflush = True)

In [None]:
qry=session.query(ObsTBL.Depth,CalcsTBL.Z,ObsTBL.Salinity,ObsTBL.Salinity_T0_C0,ObsTBL.Salinity_T1_C1,CalcsTBL.Salinity_SA,CalcsTBL.Salinity_T0_C0_SA,CalcsTBL.Salinity_T1_C1_SA,
                  CalcsTBL.Temperature_CT,CalcsTBL.Temperature_Primary_CT,CalcsTBL.Temperature_Secondary_CT).\
    select_from(CalcsTBL).join(ObsTBL,ObsTBL.ID==CalcsTBL.ObsTBLID).filter(ObsTBL.Depth==ObsTBL.Depth).all()

In [None]:
df=pd.DataFrame(qry)

In [None]:
df.describe()

In [None]:
np.min((df['Depth']-df['Z'])/df['Z']),np.max((df['Depth']-df['Z'])/df['Z'])

In [None]:
df.loc[np.abs((df['Depth']-df['Z'])/df['Z'])>.1]

In [None]:
df.head(10)

In [None]:
def ptoz(pp,lat):
    return gsw.z_from_p(pp,lat)

In [None]:
conn=session.bind.connect()
#conn=engine.connect()

In [None]:
conn.connection.create_function("ptozS",2,ptoz)

In [None]:
qry="""SELECT ptozS(ObsTBL.Pressure, StationTBL.Lat) AS ZZ
    FROM ObsTBL INNER JOIN StationTBL ON ObsTBL.StationTBLID = StationTBL.ID
    LIMIT 25"""

In [None]:
df=pd.DataFrame(session.query(ObsTBL.Pressure, StationTBL.Lat).\
        select_from(StationTBL).join(ObsTBL,ObsTBL.StationTBLID==StationTBL.ID).all())
df2=df.head(25)
test3=[ptoz(pp,ll) for pp,ll in zip(df2['Pressure'],df2['Lat'])]

In [None]:
test3

In [None]:
for i,j in zip(test,test3):
    print(i,j)

In [None]:
test=conn.execute(qry)
for row in test:
    print(row)

In [None]:
test2=session.query(func.ptozS(ObsTBL.Pressure, StationTBL.Lat).label('ZZ')).\
        select_from(StationTBL).join(ObsTBL,ObsTBL.StationTBLID==StationTBL.ID)

In [None]:
# doesn't work
#for row in test2.limit(25):
#    print(row)

In [None]:
sorted([x.name for x in md.tables['StationTBL'].columns])

In [None]:
sorted([x.name for x in md.tables['ObsTBL'].columns])


#### salinity variables: 'Salinity','Salinity_T0_C0', 'Salinity_T1_C1'
#### temperature variables:'Temperature','Temperature_Primary','Temperature_Secondary'

### How many Depths with no Pressure and vice versa?

In [None]:
print('Z without P:',session.query(ObsTBL.Depth).filter(ObsTBL.Pressure==None).count())
print('P without Z:',session.query(ObsTBL.Pressure).filter(ObsTBL.Depth==None).count())


### Other depth info:

In [None]:
print('Z min, max:',session.query(func.min(ObsTBL.Depth)).one(),session.query(func.max(ObsTBL.Depth)).one())
print('P min, max:',session.query(func.min(ObsTBL.Pressure)).one(),session.query(func.max(ObsTBL.Pressure)).one())

### Other Variables:

In [None]:
for var in (ObsTBL.Salinity,ObsTBL.Salinity_T0_C0,ObsTBL.Salinity_T1_C1,ObsTBL.Temperature,ObsTBL.Temperature_Primary,ObsTBL.Temperature_Secondary):
    print(var,'min max count:',session.query(func.min(var)).one(),session.query(func.max(var)).one(),session.query(var).filter(var!=None).count())

In [None]:
vlist=(ObsTBL.Salinity,ObsTBL.Salinity_T0_C0,ObsTBL.Salinity_T1_C1,ObsTBL.Temperature,ObsTBL.Temperature_Primary,ObsTBL.Temperature_Secondary)
ulist=(ObsTBL.Salinity_units,ObsTBL.Salinity_T0_C0_units,ObsTBL.Salinity_T1_C1_units,ObsTBL.Temperature_units,
      ObsTBL.Temperature_Primary_units,ObsTBL.Temperature_Secondary_units)

In [None]:
for vvar,uvar in zip(vlist,ulist):
    print(uvar,'unique:')
    print('\t',[i for i in session.query(uvar).group_by(uvar).all()])
    print('\t','# missing units:',session.query(vvar,uvar).filter(and_(vvar!=None,uvar==None)).count())

In [None]:
df=pd.DataFrame(session.query(ObsTBL.Depth,ObsTBL.Pressure,ObsTBL.Salinity,ObsTBL.Salinity_T0_C0,ObsTBL.Salinity_T1_C1,
                              ObsTBL.Temperature,ObsTBL.Temperature_Primary,ObsTBL.Temperature_Secondary).all())

In [None]:
fig,ax=plt.subplots(1,3,figsize=(18,6))
ax[0].plot(df['Salinity'],df['Temperature'],'r.')
ax[1].plot(df['Salinity_T0_C0'],df['Temperature_Primary'],'c.')
ax[2].plot(df['Salinity_T1_C1'],df['Temperature_Secondary'],'m.')

In [None]:
df.loc[(~np.isnan(df['Salinity_T1_C1']))&(~np.isnan(df['Temperature_Primary']))]

In [None]:
df.loc[(~np.isnan(df['Salinity_T1_C1']))&(~np.isnan(df['Temperature']))]

In [None]:
df.loc[(~np.isnan(df['Salinity_T0_C0']))&(~np.isnan(df['Temperature']))]

In [None]:
df.loc[(~np.isnan(df['Salinity_T0_C0']))&(~np.isnan(df['Temperature_Secondary']))]

In [None]:
df.loc[(~np.isnan(df['Salinity']))&(~np.isnan(df['Temperature_Secondary']))]

In [None]:
df.loc[(~np.isnan(df['Salinity']))&(~np.isnan(df['Temperature_Primary']))]

In [None]:
len(df.loc[(~np.isnan(df['Salinity']))&(~np.isnan(df['Temperature']))])

In [None]:
len(df.loc[(~np.isnan(df['Salinity_T0_C0']))&(~np.isnan(df['Temperature_Primary']))])

In [None]:
len(df.loc[(~np.isnan(df['Salinity_T1_C1']))&(~np.isnan(df['Temperature_Secondary']))])

In [None]:
qry=session.query(StationTBL.StartYear.label('Year'),StationTBL.StartMonth.label('Month'),
                      StationTBL.StartDay.label('Day'),StationTBL.StartHour.label('Hour'),
                      StationTBL.Lat,StationTBL.Lon,
                     ObsTBL.Depth,ObsTBL.Pressure,ObsTBL.Salinity,
                  ObsTBL.Salinity_T0_C0,ObsTBL.Salinity_T1_C1,
                    ObsTBL.Temperature,ObsTBL.Temperature_Primary,ObsTBL.Temperature_Secondary,ObsTBL.sourceFile).\
                select_from(StationTBL).join(ObsTBL,ObsTBL.StationTBLID==StationTBL.ID).\
                filter(and_(StationTBL.Lat>47-3/2.5*(StationTBL.Lon+123.5),
                            StationTBL.Lat<47-3/2.5*(StationTBL.Lon+121)))

In [None]:
df=pd.DataFrame(qry.all())

In [None]:
fig,ax=plt.subplots(1,3,figsize=(18,6))
ax[0].plot(df['Salinity'],df['Temperature'],'r.')
ax[1].plot(df['Salinity_T0_C0'],df['Temperature_Primary'],'c.')
ax[2].plot(df['Salinity_T1_C1'],df['Temperature_Secondary'],'m.')

In [None]:
fig,ax=plt.subplots(1,3,figsize=(18,6))
grid = nc.Dataset('/data/vdo/MEOPAR/NEMO-forcing/grid/bathymetry_201702.nc')
for iax in ax:
    viz_tools.set_aspect(iax, coords = 'map')
    viz_tools.plot_coastline(iax, grid, coords = 'map')
    iax.set_ylim(48, 50.5)
    iax.set_xlim(-125.7, -122.5);
ax[0].plot(df.loc[(df['Salinity']>0)&(df['Temperature']>0),['Lon']],
                    df.loc[(df['Salinity']>0)&(df['Temperature']>0),['Lat']],'ro')
ax[1].plot(df.loc[(df['Salinity_T0_C0']>0)&(df['Temperature_Primary']>0),['Lon']],
                    df.loc[(df['Salinity_T0_C0']>0)&(df['Temperature_Primary']>0),['Lat']],'co')
ax[2].plot(df.loc[(df['Salinity_T1_C1']>0)&(df['Temperature_Secondary']>0),['Lon']],
                    df.loc[(df['Salinity_T1_C1']>0)&(df['Temperature_Secondary']>0),['Lat']],'mo')

In [None]:
qry=session.query(StationTBL.StartYear.label('Year'),StationTBL.StartMonth.label('Month'),
                      StationTBL.StartDay.label('Day'),StationTBL.StartHour.label('Hour'),
                      StationTBL.Lat,StationTBL.Lon,
                     ObsTBL.Depth,ObsTBL.Pressure,ObsTBL.Salinity,
                  ObsTBL.Salinity_T0_C0,ObsTBL.Salinity_T1_C1,
                    ObsTBL.Temperature,ObsTBL.Temperature_Primary,ObsTBL.Temperature_Secondary,ObsTBL.sourceFile).\
                select_from(StationTBL).join(ObsTBL,ObsTBL.StationTBLID==StationTBL.ID).\
                filter(and_(StationTBL.Lat>47-3/2.5*(StationTBL.Lon+123.5),
                            StationTBL.Lat<47-3/2.5*(StationTBL.Lon+121),StationTBL.MODEL!='CastAway'))
df=pd.DataFrame(qry.all())
fig,ax=plt.subplots(1,3,figsize=(18,6))
ax[0].plot(df['Salinity'],df['Temperature'],'r.')
ax[1].plot(df['Salinity_T0_C0'],df['Temperature_Primary'],'c.')
ax[2].plot(df['Salinity_T1_C1'],df['Temperature_Secondary'],'m.')

In [None]:
qry=session.query(StationTBL.StartYear.label('Year'),StationTBL.StartMonth.label('Month'),
                      StationTBL.StartDay.label('Day'),StationTBL.StartHour.label('Hour'),
                      StationTBL.Lat,StationTBL.Lon,
                     ObsTBL.Depth,ObsTBL.Pressure,ObsTBL.Salinity,
                  ObsTBL.Salinity_T0_C0,ObsTBL.Salinity_T1_C1,
                    ObsTBL.Temperature,ObsTBL.Temperature_Primary,ObsTBL.Temperature_Secondary,ObsTBL.sourceFile).\
                select_from(StationTBL).join(ObsTBL,ObsTBL.StationTBLID==StationTBL.ID).\
                filter(and_(StationTBL.Lat>47-3/2.5*(StationTBL.Lon+123.5),
                            StationTBL.Lat<47-3/2.5*(StationTBL.Lon+121),StationTBL.Include==True))
df=pd.DataFrame(qry.all())
fig,ax=plt.subplots(1,3,figsize=(18,6))
ax[0].plot(df['Salinity'],df['Temperature'],'r.')
ax[1].plot(df['Salinity_T0_C0'],df['Temperature_Primary'],'c.')
ax[2].plot(df['Salinity_T1_C1'],df['Temperature_Secondary'],'m.')

In [None]:
# search for duplicate stations and investigate:
a1=aliased(StationTBL)
a2=aliased(StationTBL)
dupsQRY=session.query(a1.ID.label('ID1'),a1.Include,a2.ID.label('ID2'),a2.Include,a1.sourceFile.label('source1'),a2.sourceFile.label('source2'),
                      a1.AGENCY.label('AGENCY1'),a2.AGENCY.label('AGENCY2'),a1.MODEL.label('MODEL1'),a2.MODEL.label('MODEL2'),
                     a1.EVENT_NUMBER.label('EVENT_NUMBER1'),a2.EVENT_NUMBER.label('EVENT_NUMBER2'),
                     a1.PLATFORM.label('PLATFORM1'),a2.PLATFORM.label('PLATFORM2'),
                     a1.STATION.label('STATION1'),a2.STATION.label('STATION2'),
                     a1.WATER_DEPTH.label('WATER_DEPTH1'),a2.WATER_DEPTH.label('WATER_DEPTH2')).select_from(a1).join(a2,and_(
    a1.StartYear==a2.StartYear,
    a1.StartMonth==a2.StartMonth,
    a1.StartDay==a2.StartDay,
    a1.StartHour-a2.StartHour<0.001,
    a1.StartHour-a2.StartHour>-0.001,
    a1.Lat-a2.Lat<0.001,
    a1.Lat-a2.Lat>-0.001,
    a1.Lon-a2.Lon<0.001,
    a1.Lon-a2.Lon>-0.001,
    a1.ID!=a2.ID)).filter(a1.Include==True,a2.Include==True,a1.ID<a2.ID)
# columns below were identical for each pair:
#                     a1.DATA_DESCRIPTION.label('DATA_DESCRIPTION1'),a2.DATA_DESCRIPTION.label('DATA_DESCRIPTION2'),
#                     a1.SCIENTIST.label('SCIENTIST1'),a2.SCIENTIST.label('SCIENTIST2'),
#                      a1.COUNTRY.label('COUNTRY1'),a2.COUNTRY.label('COUNTRY2'),
#                     a1.MISSION.label('MISSION1'),a2.MISSION.label('MISSION2'),
#                     a1.PROJECT.label('PROJECT1'),a2.PROJECT.label('PROJECT2'),
#                     a1.WSPD.label('WSPD1'),a2.WSPD.label('WSPD2')

In [None]:
dfa=pd.DataFrame(dupsQRY.all())

In [None]:
dfa

In [None]:
dfa['sourceA1']=[i[-45:] for i in dfa['source1']]
dfa['sourceA2']=[i[-45:] for i in dfa['source2']]

In [None]:
len(dfa)

In [None]:
dfa

In [None]:
for i,r in dfa.iterrows():
    print(r['source1'],r['source2'])

In [None]:
# in case of duplicates, take only one profile entry at each depth
a1=aliased(StationTBL)
a2=aliased(StationTBL)
dupsQRY=session.query(a1.ID.label('ID1'),a2.ID.label('ID2')).select_from(a1).join(a2,and_(
    a1.StartYear==a2.StartYear,
    a1.StartMonth==a2.StartMonth,
    a1.StartDay==a2.StartDay,
    a1.StartHour-a2.StartHour<0.001,
    a1.StartHour-a2.StartHour>-0.001,
    a1.Lat-a2.Lat<0.001,
    a1.Lat-a2.Lat>-0.001,
    a1.Lon-a2.Lon<0.001,
    a1.Lon-a2.Lon>-0.001,
    a1.ID!=a2.ID)).filter(a1.Include==True,a2.Include==True,a1.ID<a2.ID)

In [None]:
q=session.query(ObsTBL.ID,ObsTBL.StationTBLID,ObsTBL.Pressure,ObsTBL.Include,ObsTBL.sourceFile).filter(or_(ObsTBL.StationTBLID==120,ObsTBL.StationTBLID==135)).order_by(ObsTBL.Pressure)
dftemp=pd.DataFrame(q.all())
dftemp['FileEnd']=[ii[-10:] for ii in dftemp['sourceFile']]
dftemp

def delist(el):
    sh=np.shape(el)
    if len(sh)==0:
        iel=el
    elif (len(sh)==1) and (sh[0]==1):
        iel= el[0]
    else:
        raise Exception('item passed to delist was not a single value or a single value array; it was: {}'.format(el))
    return iel