Objectives:
- Read STRATOGEM CTD and bottle data into tables (sql)
- Match each bottle station with a ctd cast
- remove/correct questionable data following load_all.m
- match bottle profile entries with ctd profile entries
- load all data into new combined tables

    1) matched CTD/Bottle stations, profiles:
        a) all ctd data as well as matched bottle data where available
        b) any bottle profiles with no ctd
        
    2) any unmatched bottle stations and their profile entries
    
    3) any unmatched ctd stations and their profile entries
   
- export to multi-source database

In [1]:
from sqlalchemy import create_engine, Column, String, Integer, Float, Numeric, Boolean, MetaData, Table, \
type_coerce, ForeignKey, case, between
from sqlalchemy.orm import mapper, create_session, relationship
from sqlalchemy.ext.declarative import declarative_base
import sqlalchemy.types as types
from sqlalchemy.sql import select, and_, or_, not_, func
from time import strptime
import re
import os
import glob
import numpy as np
import string
import pandas as pd
import gsw 
import warnings

#### definition below allows resizing of pandas dataframes to fit in window:
http://stackoverflow.com/questions/19536817/manipulate-html-module-font-size-in-ipython-notebook

In [2]:
class sizeme():
    """ Class to change html fontsize of object's representation"""
    def __init__(self,ob, size, height=100):
        self.ob = ob
        self.size = size
        self.height = height
    def _repr_html_(self):
        repl_tuple = (self.size, self.height, self.ob._repr_html_())
        return u'<span style="font-size:{0}%; line-height:{1}%">{2}</span>'.format(*repl_tuple)

#### function to calculate means of arrays that may be empty without producing annoying warnings:

In [3]:
def arNanMean(ar):
    # suppress anticipated 'empty slice' warning
    with warnings.catch_warnings(record=True) as w:
        warnings.simplefilter("always")
        try:
            outvar=np.nanmean(ar)
        except:
            outvar=np.nan
        assert len(w) in [0,1]
        if len(w) == 1:
            assert "Mean of empty slice" in str(w[-1].message)
    return(outvar)

# Create database and tables

In [4]:
basepath='/ocean/eolson/MEOPAR/obs/'
basedir=basepath + 'STRATOGEM/'
dbname='STRATOGEM'

In [5]:
fout=open(basedir+'createDBfromSTRATOGEM_log.txt','w')
ferr=open(basedir+'createDBfromSTRATOGEM_errors.txt','w')
fout.write('Files processed:\n')

17

In [6]:
if os.path.isfile(basedir + dbname + '.sqlite'):
    os.remove(basedir + dbname + '.sqlite')
engine = create_engine('sqlite:///' + basedir + dbname + '.sqlite')
Base=declarative_base()
# create classes for custom data types

In [7]:
class forceNumeric(types.TypeDecorator):

    impl = types.Float
    def process_bind_param(self, value, dialect):
        try:
            int(float(value))
            if (int(float(value))==-99 or int(10*float(value))==-99):
                value=None
        except:
            value = None
        if (str(value).startswith('-99') or str(value).startswith('9999')):
            value = None
        return value

class forceInt(types.TypeDecorator):

    impl = types.Integer
    def process_bind_param(self, value, dialect):
        try:
            int(value)
            if int(value)==-99:
                value=None
        except:
            value = None
        if (str(value).startswith('-99') or str(value).startswith('9999')):
            value = None
        return value

In [8]:
# define Table Classes for CHL:
class ChlStationTBL(Base):
    __table__=Table('ChlStationTBL', Base.metadata,
                Column('ID', Integer, primary_key=True),
                Column('StName', String),
                Column('Mon', forceInt),
                Column('Day', forceInt),
                Column('Year', forceInt),
                Column('Lat', forceNumeric),
                Column('Lon', forceNumeric),
                Column('Time', String),
                Column('sourceFile', String))
    
class ChlProfTBL(Base):
    __table__=Table('ChlProfTBL', Base.metadata,
                    Column('ID', Integer, primary_key=True),
                    Column('depth', forceNumeric),
                    Column('corrdepth', forceNumeric),
                    Column('depthSalMatchCTD', forceNumeric),
                    Column('sal', forceNumeric),
                    Column('oxML', forceNumeric),
                    Column('chl002', forceNumeric),
                    Column('chl020', forceNumeric),
                    Column('chl200', forceNumeric),
                    Column('po4', forceNumeric),
                    Column('si', forceNumeric),
                    Column('no3', forceNumeric),
                    Column('sourceFile', String),
                    Column('ChlStationTBLID', forceInt, ForeignKey('ChlStationTBL.ID')),
                    )
    stationCHL=relationship(ChlStationTBL, primaryjoin=__table__.c.ChlStationTBLID == ChlStationTBL.ID)
    
# define CTD Table Classes:
class CTDStationTBL(Base):
    __table__=Table('CTDStationTBL', Base.metadata,
                Column('ID', Integer, primary_key=True),
                Column('StName', String),
                Column('Mon', forceInt),
                Column('Day', forceInt),
                Column('Year', forceInt),
                Column('Lat', forceNumeric),
                Column('Lon', forceNumeric),
                Column('Time', String),
                Column('Cruise', String),
                Column('xcalFile',String),
                Column('xcal0',forceNumeric),
                Column('xcal1',forceNumeric),
                Column('xcal2',forceNumeric),
                Column('sourceFile', String),
                Column('UseCast', Boolean, default=True))
    
class CTDProfTBL(Base):
    __table__=Table('CTDProfTBL', Base.metadata,
                Column('ID', Integer, primary_key=True),
                Column('scan', forceInt),
                Column('prSM', forceNumeric),
                Column('convertedDepth', forceNumeric),
                Column('t090C', forceNumeric),
                Column('c0mScm', forceNumeric),
                Column('wetStar', forceNumeric),
                Column('xmiss', forceNumeric),
                Column('par', forceNumeric),
                Column('UsePAR', Boolean, default=True),
                Column('sal00', forceNumeric),
                Column('sigma00', forceNumeric),
                Column('upoly0', forceNumeric),
                Column('upoly1', forceNumeric),
                Column('oxC', forceNumeric),
                Column('oxTC', forceNumeric),
                Column('oxMLL', forceNumeric),
                Column('sbeox0MLL', forceNumeric),
                Column('altM', forceNumeric),
                Column('sigmat00', forceNumeric),
                Column('v0', forceNumeric),
                Column('v1', forceNumeric),
                Column('flag', forceNumeric),
                Column('nbin', forceInt),
                Column('sourceFile', String),
                Column('CTDStationTBLID', forceInt, ForeignKey('CTDStationTBL.ID')),
                )
    stationCTD=relationship(CTDStationTBL, primaryjoin=__table__.c.CTDStationTBLID == CTDStationTBL.ID)
    
    
# combined data Tables:
class CombStationTBL(Base):
    __table__=Table('CombStationTBL', Base.metadata,
                Column('ID', Integer, primary_key=True),
                Column('ID_ChlSta', forceInt),
                Column('ID_CTDSta', forceInt),
                Column('StName', String),
                Column('Mon', forceInt),
                Column('Day', forceInt),
                Column('Year', forceInt),
                Column('Lat', forceNumeric),
                Column('Lon', forceNumeric),
                Column('Time', String),
                Column('Cruise', String))
    
class CombProfTBL(Base):
    __table__=Table('CombProfTBL', Base.metadata,
                Column('ID', Integer, primary_key=True),
                Column('ID_ChlProf', forceInt),
                Column('ID_CTDProf', forceInt),
                Column('Depth', forceNumeric),
                Column('Temperature', forceNumeric),
                Column('Salinity', forceNumeric),
                Column('PAR_ctd', forceNumeric),
                Column('Xmis_ctd', forceNumeric),
                Column('Fluor_ctd', forceNumeric),
                Column('chl002', forceNumeric),
                Column('chl020', forceNumeric),
                Column('chl200', forceNumeric),
                Column('Ox_bot', forceNumeric),
                Column('Ox_ctd', forceNumeric),
                Column('NO3', forceNumeric),
                Column('PO4', forceNumeric),
                Column('Si', forceNumeric),
                Column('CombStationTBLID', forceInt, ForeignKey('CombStationTBL.ID')),
                )
    stationComb=relationship(CombStationTBL, primaryjoin=__table__.c.CombStationTBLID == CombStationTBL.ID)

In [9]:
Base.metadata.create_all(engine)
session = create_session(bind = engine, autocommit = False, autoflush = True)

# Process Chl files

In [10]:
cdirpath=os.path.join(basedir, 'chl')
filenames = [f for f in os.listdir(cdirpath) if not f.endswith('~')] #if ( f.endswith('.cnv') or f.endswith('.ctd'))
filenames.sort()
str2=re.compile('%\s+Dep\s+Sal#\s+Sal\s+O2#\s+O2\s+Chl\s+Chl\s+Chl\s+PO4\s+Si\s+NO3')
str3=re.compile('%\s+m\s+PSU\s+ml/l\s+0.2um\s+2um\s+20um\s+uM\s+uM\s+uM')
str4=re.compile('%\s+ug/l\s+ug/l\s+ug/l')
letStart=re.compile('^[a-zA-Z]') # starts with letter
numStart=re.compile('^[0-9]') # starts with number
retime=re.compile('(?<=\s)[0-9]{1,2}\:[0-5][0-9](?=\s)')
relat=re.compile('(?<=\s)[0-9]{2,3}\s[0-9]{2}\.[0-9]{3,4}\s*(?=N)')
relon=re.compile('(?<=\s)[0-9]{2,3}\s[0-9]{2}\.[0-9]{3,4}\s*(?=W)')
sta=0
for file in filenames:
    #print(file)
    fout.write(file+'\n')
    with open(os.path.join(cdirpath,file), 'rt') as f:
        inheader=True
        hnum=0
        for fline in f:
            line=fline.strip()
            if inheader:
                if line.startswith('%'):
                    if hnum==1:
                        if str2.match(line) is None:
                            ferr.write('warning: '+file+'; no match on 1: '+line+'\n')
                        hnum+=1
                    elif hnum==2:
                        if str3.match(line) is None:
                            ferr.write('warning: '+file+'; no match on 2: '+line+'\n')
                        hnum+=1
                    elif hnum==3:
                        if str4.match(line) is None:
                            ferr.write('warning: '+file+'; no match on 3: '+line+'\n')
                        hnum+=1
                    elif line.startswith('% Stn'):
                        hnum+=1
                elif line.startswith('*'):
                    if not hnum==4:
                        ferr.write('warning: '+file+'; missing header match; hnum='+str(hnum))
                    headers=re.split('\s+',line)
                    headers=headers[1:] # drop 1st entry 
                    #print(headers)
                    inheader=False
            else:
                if letStart.match(line) is not None: #line starts with letter: Station
                    stline=re.split('\s*:?\s+',line)
                    #print('station:', splitline)
                    sta+=1
                    #print(stline) 
                    while len(stline)<5:
                        stline.append('NaN')
                    stadict={'ID':sta, 'StName':stline[0], 'Mon':strptime(stline[2][0:3],'%b').tm_mon, 
                             'Day':stline[3], 'Year':stline[4],
                             'sourceFile':file}
                    stadict['Time']=re.findall(retime,line)[0] if (len(re.findall(retime,line))==1) else None
                    if (len(re.findall(relat,line))==1):
                        splat=re.split('\s',re.findall(relat,line)[0])
                        lat=float(splat[0])+float(splat[1])/60.0
                    else:
                        lat=None
                    if (len(re.findall(relon,line))==1):
                        splon=re.split('\s',re.findall(relon,line)[0])
                        lon=-(float(splon[0])+float(splon[1])/60.0)
                    else:
                        lon=None
                    stadict['Lat']=lat
                    stadict['Lon']=lon
                    session.execute(ChlStationTBL.__table__.insert().values(**stadict))
                    #read station data
                if numStart.match(line) is not None: #line starts with number: Profile
                    splitline=re.split('\s+',line)
                    profdict=dict(zip(headers,splitline))
                    if 'no3r3' in profdict:
                        profdict['no3'] = arNanMean((float(profdict['no3']),float(profdict['no3r2']),
                                                      float(profdict['no3r3'])))
                    if 'po4r3' in profdict:
                        profdict['po4'] = arNanMean((float(profdict['po4']),float(profdict['po4r2']),
                                                      float(profdict['po4r3'])))
                    if 'sir3' in profdict:
                        profdict['si'] = arNanMean((float(profdict['si']),float(profdict['sir2']),
                                                    float(profdict['sir3'])))
                    keylist=[el for el in profdict.keys()]
                    for ikey in keylist:
                        if ikey not in ('depth', 'corrdepth', 'sal', 'oxML', 'chl002', 'chl020', 'chl200', 'po4', 'si', 'no3'):
                            del profdict[ikey]
                            ferr.write(ikey+' unused in '+file+' St. '+str(sta))
                    profdict['sourceFile']=file
                    profdict['ChlStationTBLID']=sta
                    session.execute(ChlProfTBL.__table__.insert().values(**profdict))

In [11]:
session.commit()

# Process CTD files:

In [12]:
def getCal(cpath):
    with open(cpath) as fi:
        for i, iline in enumerate(fi):
            if i==22:
                calvec=iline.split()
                break
            elif i > 22:
                break
    for i in range(0,3):
        calvec[i]=float(calvec[i])
    return(calvec)

In [13]:
alphnumlist=list(string.ascii_letters)+list(string.digits)
cdirpath=os.path.join(basedir, 'ctd')
filenames = [f for f in os.listdir(cdirpath) if (f not in ('SG0407100.cnv') and not f.startswith('sg'))] #if ( f.endswith('.cnv') or f.endswith('.ctd'))
# avoid file that appears to have errors
#filenames=['SG0201003.cnv']
filenames.sort()
spStart=re.compile('^\s*[0-9]') # starts with space characters followed by digit
reSta=re.compile('(?<=\*\*\sStation:).*')
reLat=re.compile('(?<=\*\*\sLatitude\s=).*(?=N)')
reLon=re.compile('(?<=\*\*\sLongitude\s=).*(?=W)')
# start_time = May 08 2002 09:39:10
reST=re.compile('(?<=\#\sstart_time\s=).*')
#rePST=re.compile('(?<=\*\*\sPST\s\(Time\)\s=).*')
reCr=re.compile('(?<=\*\*\sCruise:).*')
reNam=re.compile('(?<=\#\sname\s)[0-9]+.*')
reCal=re.compile('(?<=\#\sdatcnv_in\s\=).*')
reDir=re.compile('(?<=/ocean/shared/SoG/ctd/).*')
sta=0
for file in filenames:
    #print(file)
    fout.write(file)
    lpath=os.path.realpath(os.path.join(cdirpath,file))
    mDir=reDir.search(lpath).group(0).split('/')[0]
    sta+=1
    fout.write(file+'\n')
    with open(os.path.join(cdirpath,file), 'rt', encoding="ISO-8859-1") as f:
        headerWritten=False
        staDict={}
        staDict['sourceFile']=file
        varlist=[]
        for fline in f:
            line=fline.strip()
            mNam=reNam.search(line)
            mST=reST.search(line)
            mCal=reCal.search(line)
            if line.startswith('**'): # get station data
                mSta=reSta.search(line)
                mLat=reLat.search(line)
                mLon=reLon.search(line)
                mCr=reCr.search(line)
                if mSta is not None:
                    staDict['StName']=mSta.group(0).strip()
                    staDict['ID']=sta
                elif mLat is not None:
                    latDegMin=re.split('\s+',mLat.group(0).strip())
                    staDict['Lat']=float(latDegMin[0])+float(latDegMin[1])/60
                elif mLon is not None:
                    lonDegMin=re.split('\s+',mLon.group(0).strip())
                    staDict['Lon']=-(float(lonDegMin[0])+float(lonDegMin[1])/60)
                elif mCr is not None:
                    staDict['Cruise']=mCr.group(0).strip()
            elif mST is not None:
                dSp=re.split('\s+',mST.group(0).strip())
                staDict['Mon']=strptime(dSp[0][0:3],'%b').tm_mon
                staDict['Day']=dSp[1]
                staDict['Year']=dSp[2]
                staDict['Time']=dSp[3].strip()[:-3]
            elif mNam is not None: # get profile column headings
                nline=re.split('=|\:', mNam.group(0).strip())
                vnum=int(nline[0])
                vname=''.join(e for e in nline[1] if e in alphnumlist)
                varlist.append(vname)
                if not (varlist[vnum]==vname):
                    print('ERROR in varlist: '+varlist[vnum]+' '+vname)
            elif mCal is not None:
                mCal0=mCal.group(0).strip()
                cfname=re.split(r'\\',mCal0)[-1]
                calpath=os.path.join('/ocean/shared/SoG/ctd',mDir,cfname)
                calvec=getCal(calpath)
                staDict['xcalFile']=calpath
                staDict['xcal0']=calvec[0]
                staDict['xcal1']=calvec[1]
                staDict['xcal2']=calvec[2]
            elif spStart.match(line) is not None: # profile rows
                if not headerWritten:
                    # store station data on first profile line
                    session.execute(CTDStationTBL.__table__.insert().values(**staDict))
                    headerWritten=True
                # read line, assign to dict, add to table
                splitline=re.split('\s+',line)
                profdict=dict(zip(varlist,splitline))
                profdict['sourceFile']=file
                profdict['CTDStationTBLID']=sta
                profdict['convertedDepth']=-1*gsw.z_from_p(float(profdict['prSM']),float(stadict['Lat']))
                #print(profdict)
                session.execute(CTDProfTBL.__table__.insert().values(**profdict))

In [14]:
session.commit()
fout.close()
ferr.close()

# Match stations, avoiding duplicates

In [15]:
pd.set_option('display.max_rows', 600)

Link Chl stations with CTD stations and ensure each match is 1:1 by eliminating CTD casts with UseCast criterion

In [16]:
subStaMatch=session.query(ChlStationTBL.ID.label('ChlStaID'),ChlStationTBL.sourceFile.label('ChlStaSF'),
                          ChlStationTBL.StName.label('ChlStaStName'),CTDStationTBL.StName.label('CTDStaStName'),
                       CTDStationTBL.sourceFile.label('CTDStaSF'),CTDStationTBL.ID.label('CTDStaID')).\
                select_from(ChlStationTBL).outerjoin(CTDStationTBL,and_(CTDStationTBL.StName==ChlStationTBL.StName,
                                CTDStationTBL.Year==ChlStationTBL.Year,CTDStationTBL.Mon==ChlStationTBL.Mon,
                                or_(CTDStationTBL.Day==ChlStationTBL.Day,CTDStationTBL.Day==(ChlStationTBL.Day+1.0),
                                    CTDStationTBL.Day==(ChlStationTBL.Day-1.0),
                                    and_(CTDStationTBL.Lat==ChlStationTBL.Lat,CTDStationTBL.Lon==ChlStationTBL.Lon)))).\
                                filter(CTDStationTBL.UseCast==True).subquery()

In [17]:
countCTD=session.query(subStaMatch.c.ChlStaID,func.count(subStaMatch.c.CTDStaID).label('N'),
                    subStaMatch.c.CTDStaSF).group_by(subStaMatch.c.ChlStaID).subquery()

In [18]:
dupCTD=session.query(subStaMatch.c.ChlStaID,subStaMatch.c.ChlStaSF,subStaMatch.c.ChlStaStName,subStaMatch.c.CTDStaStName,
                    subStaMatch.c.CTDStaSF,subStaMatch.c.CTDStaID).select_from(subStaMatch).\
                join(countCTD,countCTD.c.ChlStaID==subStaMatch.c.ChlStaID).filter(countCTD.c.N>1)

remove one of each duplicate CTD cast (in case one is bad and to avoid undue weight to locations with multiple casts)
    - this was done only in context of matched stations

In [19]:
# SG0303006.cnv looks like it has problems at the start and it is shorter than SG0303007.cnv (but load_all uses 6)
# SG0317007.cnv  is shorter than SG0317006.cnv
# SG0318002.cnv may be superceded by sg0318002.cnv, which was processed later; however, load_all always uses SG*, 
# so go with that for all cases. (remove sg0318002.cnv, sg0318003.cnv, sg0318004.cnv, sg0318005.cnv...sg0318009.cnv)
# SG0408005.cnv or SG0408100.cnv? load_all uses 5, and SG0408100 appears to use different instrument; remove SG0408100.cnv
# and SG0408101.cnv, SG0408102.cnv, SG0408103.cnv HOWEVER THESE CONTAIN OXYGEN CTD DATA
# SG0410002.cnv or SG0410004.cnv : 2 is from same day as bottle data so remove SG0410004.cnv
# SG0412001.cnv or SG0412100.cnv: in all these cases use 00n.cnv instead of 10x.cnv
# SG0415001.cnv vs SG0415101.cnv breaks pattern of different CTD versions but 101.cnv contains almost no data
# SG0509006.cnv or SG0509005.cnv? 5 matches lat/lon, but load_all uses 6. so get rid of SG0509005.cnv
tochange=session.query(CTDStationTBL).filter(or_(CTDStationTBL.sourceFile=='SG0303007.cnv',
                                                CTDStationTBL.sourceFile=='SG0317007.cnv',
                                                CTDStationTBL.sourceFile=='SG0318002.cnv',
                                                CTDStationTBL.sourceFile=='sg0318003.cnv',
                                                CTDStationTBL.sourceFile=='sg0318004.cnv',
                                                CTDStationTBL.sourceFile=='sg0318005.cnv',
                                                CTDStationTBL.sourceFile=='sg0318006.cnv',
                                                CTDStationTBL.sourceFile=='sg0318007.cnv',
                                                CTDStationTBL.sourceFile=='sg0318008.cnv',
                                                CTDStationTBL.sourceFile=='sg0318009.cnv',
                                                CTDStationTBL.sourceFile=='SG0408100.cnv',
                                                CTDStationTBL.sourceFile=='SG0408101.cnv',
                                                CTDStationTBL.sourceFile=='SG0408102.cnv',
                                                CTDStationTBL.sourceFile=='SG0408103.cnv',
                                                CTDStationTBL.sourceFile=='SG0410004.cnv',
                                                CTDStationTBL.sourceFile=='SG0412100.cnv',
                                                CTDStationTBL.sourceFile=='SG0412101.cnv',
                                                CTDStationTBL.sourceFile=='SG0412102.cnv',
                                                CTDStationTBL.sourceFile=='SG0412104.cnv',
                                                CTDStationTBL.sourceFile=='SG0412105.cnv',
                                                CTDStationTBL.sourceFile=='SG0412106.cnv',
                                                CTDStationTBL.sourceFile=='SG0415101.cnv',
                                                CTDStationTBL.sourceFile=='SG0509100.cnv',
                                                CTDStationTBL.sourceFile=='SG0509101.cnv',
                                                CTDStationTBL.sourceFile=='SG0509102.cnv',
                                                CTDStationTBL.sourceFile=='SG0509103.cnv',
                                                CTDStationTBL.sourceFile=='SG0509104.cnv',
                                                CTDStationTBL.sourceFile=='SG0509005.cnv')) 
tochange.update({"UseCast": False})
session.commit()

In [20]:
dupdf=pd.DataFrame(dupCTD.all())
dupdf

following load_all.m procedure, remove/correct bad data

In [21]:
# correct bad chls:
badchl002=session.query(ChlProfTBL).filter(ChlProfTBL.chl002>900.0)
badchl002.update({'chl002':None})
badchl020=session.query(ChlProfTBL).filter(ChlProfTBL.chl020>900.0)
badchl020.update({'chl020':None})
badchl200=session.query(ChlProfTBL).filter(ChlProfTBL.chl200>900.0)
badchl200.update({'chl200':None})

0

In [22]:
#FL(ctd.prSM,k)=ctd.wetstar 
#k is cruise, in order; 20 is 03-15
# what is ctd.prSM(48:end)? prSM is pressure, but goes as whole numbers eg 1,2,3...
# S22 is station S2-2
# therefore, this data is from SG0315003.cnv, from the wetstar column
# but, for prSM<47, the wetstar column is negative! 
# -> throw out the whole column

badchl=session.query(CTDProfTBL).filter(CTDProfTBL.sourceFile=='SG0315003.cnv')
badchl.update({'wetStar':None})

306

In [23]:
S42ProfIDs=[idnum[0] for idnum in session.query(ChlProfTBL.ID).select_from(ChlProfTBL).\
        join(ChlStationTBL,ChlStationTBL.ID==ChlProfTBL.ChlStationTBLID).\
        filter(and_(ChlProfTBL.sourceFile=='bottle_0309.txt',
                    ChlStationTBL.StName=='S4-2')).all()]
S42ProfIDs

[732, 733, 734, 735]

In [24]:
S41ProfIDs=[idnum[0] for idnum in session.query(ChlProfTBL.ID).select_from(ChlProfTBL).\
        join(ChlStationTBL,ChlStationTBL.ID==ChlProfTBL.ChlStationTBLID).\
        filter(and_(ChlProfTBL.sourceFile=='bottle_0309.txt',
                    ChlStationTBL.StName=='S4-1')).all()]
S41ProfIDs

[719, 720, 721, 722, 723, 724, 725, 726, 727]

In [25]:
# S4-2 and S4-1
# 18= '03-09' (cruise)
# S42, c03-09 Bno3(1:4)= S41, c03-09 Bno3(5:8)
# S41, c03-09 Bno3(5:8)= S42, c03-09 Bno3(1:4)
# same for Bpo4 and Bsi
#tmp=S42.Bno3(1:4,18);S42.Bno3(1:4,18)=S41.Bno3(5:8,18);S41.Bno3(5:8,18)=tmp;
#tmp=S42.Bpo4(1:4,18);S42.Bpo4(1:4,18)=S41.Bpo4(5:8,18);S41.Bpo4(5:8,18)=tmp;
#tmp=S42.Bsi(1:4,18);S42.Bsi(1:4,18)=S41.Bsi(5:8,18);S41.Bsi(5:8,18)=tmp;
# -> this looks weird. just remove these nuts from analysis. 
#     No3, Po4, Si
#     S4-1 depths 50, 100, 200, 300
#     S4-2 depths 0, 5, 10, 30
for xx in S42ProfIDs:
    badnuts=session.query(ChlProfTBL).filter(and_(
                                ChlProfTBL.ID==xx,
                                ChlProfTBL.depth>=0,
                                ChlProfTBL.depth<=30))
    badnuts.update({'no3': None, 'po4': None, 'si': None})

for xx in S41ProfIDs:
    badnuts=session.query(ChlProfTBL).filter(and_(
                                ChlProfTBL.ID==xx,
                                ChlProfTBL.depth>=50,
                                ChlProfTBL.depth<=300))
    badnuts.update({'no3': None, 'po4': None, 'si': None})

In [26]:
findid1=session.query(ChlProfTBL.ID).select_from(ChlProfTBL).\
        join(ChlStationTBL,ChlStationTBL.ID==ChlProfTBL.ChlStationTBLID).\
        filter(and_(ChlProfTBL.sourceFile=='bottle_0401.txt',
                    ChlStationTBL.StName=='S4-1',
                    ChlProfTBL.depth==5)).one()
print(findid1[0])
print(session.query(ChlProfTBL.oxML).filter(ChlProfTBL.ID==1183).all()[0])
findid2=session.query(ChlProfTBL.ID).select_from(ChlProfTBL).\
        join(ChlStationTBL,ChlStationTBL.ID==ChlProfTBL.ChlStationTBLID).\
        filter(and_(ChlProfTBL.sourceFile=='bottle_0318.txt',
                    ChlStationTBL.StName=='S4-1',
                    ChlProfTBL.depth==50)).one()
print(findid2[0])
print(session.query(ChlProfTBL.oxML).filter(ChlProfTBL.ID==994).all()[0])

1183
(15.33,)
994
(9.51,)


In [27]:
# from load_all.m:
# S41.BOX(2,25)=7.7; 25=04-01, station 4-1, depth=5, origval=15.33, newval=7.7
# S41.BOX(5,21)=3.4; 21=03-18, station 4-1, depth=50, origval=9.51, newval=3.4
# -> make changes
badox1=session.query(ChlProfTBL).filter(ChlProfTBL.ID==findid1[0])
badox1.update({'oxML':7.7})
badox2=session.query(ChlProfTBL).filter(ChlProfTBL.ID==findid2[0])
badox2.update({'oxML':3.4})

1

In [28]:
#S41.SS(402:403,16)=NaN; 16=03-07, station 4-1, depth=402,403
#S41.SS(404:405,31)=NaN; 31=04-07, station 4-1, depth=404,405
#S41.SS(365:397,34)=NaN; 34=04-10, station 4-1, depth=365:397
# S41.SG(402:403,16)=NaN;
# S41.SG(404:405,31)=NaN;
# S41.SG(365:397,34)=NaN;
# SS is sal00, SG is sigma
findid16=session.query(CTDProfTBL).select_from(CTDProfTBL).\
        join(CTDStationTBL,CTDStationTBL.ID==CTDProfTBL.CTDStationTBLID).\
        filter(and_(CTDProfTBL.sourceFile.startswith('SG0307'),
                    CTDStationTBL.StName=='S4-1',
                    CTDProfTBL.prSM>=402,
                    CTDProfTBL.prSM<=403)).all()
print([row.sal00 for row in findid16])
findid31=session.query(CTDProfTBL).select_from(CTDProfTBL).\
        join(CTDStationTBL,CTDStationTBL.ID==CTDProfTBL.CTDStationTBLID).\
        filter(and_(CTDProfTBL.sourceFile.startswith('SG0407'),
                    CTDStationTBL.StName=='S4-1',
                    CTDProfTBL.prSM>=404,
                    CTDProfTBL.prSM<=405)).all()
print([row.sal00 for row in findid31])
findid34=session.query(CTDProfTBL).select_from(CTDProfTBL).\
        join(CTDStationTBL,CTDStationTBL.ID==CTDProfTBL.CTDStationTBLID).\
        filter(and_(CTDProfTBL.sourceFile.startswith('SG0410'),
                    CTDStationTBL.StName=='S4-1',
                    CTDProfTBL.prSM>=365,
                    CTDProfTBL.prSM<=397)).all()
print([row.sal00 for row in findid34])

for xx in findid16:
    badval=session.query(CTDProfTBL).filter(CTDProfTBL.ID==xx.ID)
    badval.update({'sal00': None, 'sigma00':None})
for xx in findid31:
    badval=session.query(CTDProfTBL).filter(CTDProfTBL.ID==xx.ID)
    badval.update({'sal00': None, 'sigma00':None})
for xx in findid34:
    badval=session.query(CTDProfTBL).filter(CTDProfTBL.ID==xx.ID)
    badval.update({'sal00': None, 'sigma00':None})

[30.1873, 29.3174]
[30.9673, 25.2108]
[30.7619, 30.2089, 30.2136, 30.217, 30.2193, 30.222, 30.2245, 30.2234, 30.2285, 30.2311, 30.2321, 30.2338, 30.2352, 30.237, 30.2399, 30.2373, 30.2426, 30.2445, 30.2475, 30.247, 30.245, 30.2437, 30.2455, 30.2466, 30.2481, 30.2488, 30.2501, 30.2513, 30.2532, 30.2546, 30.2562, 30.2604, 31.0385, 31.0396, 31.0427, 31.0447, 31.0458, 31.0464, 31.0469, 31.0476, 31.0495, 31.0512, 31.0514]


In [29]:
# S1.OX(:,41)=NaN; 41=05-01, Station=S1, all depths, var=oxML or sbeox0ML
findid41=session.query(CTDProfTBL).select_from(CTDProfTBL).\
        join(CTDStationTBL,CTDStationTBL.ID==CTDProfTBL.CTDStationTBLID).\
        filter(and_(CTDProfTBL.sourceFile.startswith('SG0501'),
                    CTDStationTBL.StName=='S1')).all()
print([(row.oxMLL, row.sbeox0MLL) for row in findid41])

for xx in findid41:
    badval=session.query(CTDProfTBL).filter(CTDProfTBL.ID==xx.ID)
    badval.update({'oxMLL': None, 'sbeox0MLL':None})

[(None, 1.81356), (None, 1.69979), (None, 1.67927), (None, 1.6648), (None, 1.65109), (None, 1.63888), (None, 1.63611), (None, 1.63128), (None, 1.62186), (None, 1.60819), (None, 1.584), (None, 1.57709), (None, 1.57049), (None, 1.56261), (None, 1.55078), (None, 1.53442), (None, 1.52067), (None, 1.5024), (None, 1.48843), (None, 1.47857), (None, 1.46999), (None, 1.4534), (None, 1.4322), (None, 1.41622), (None, 1.40334), (None, 1.39111), (None, 1.37279), (None, 1.35841), (None, 1.34803), (None, 1.33573), (None, 1.32471), (None, 1.3093), (None, 1.29452), (None, 1.28022), (None, 1.26812), (None, 1.26356), (None, 1.25201), (None, 1.24436), (None, 1.23318), (None, 1.22456), (None, 1.21597), (None, 1.20836), (None, 1.20309), (None, 1.20667), (None, 1.20193), (None, 1.19893), (None, 1.1973), (None, 1.19348), (None, 1.19007), (None, 1.19113), (None, 1.19177), (None, 1.19353), (None, 1.19829), (None, 1.20348), (None, 1.20733), (None, 1.2107), (None, 1.21294), (None, 1.21584), (None, 1.21901), (None

In [30]:
# additionally, get rid of CTD cast labeled 'BAD'
findbad=session.query(CTDStationTBL).filter(CTDStationTBL.StName.like('%bad%'))
print([row.ID for row in findbad])
for xx in findbad:
    removeBadProfs=session.query(CTDProfTBL).filter(CTDProfTBL.CTDStationTBLID==xx.ID).delete()
    removeBadSta = session.query(CTDStationTBL).filter(CTDStationTBL.ID==xx.ID).delete()

[225]


In [31]:
session.commit()

# get rid of chl profiles consisting of all NaNs; also get rid of stations not associated with any good profiles

In [32]:
badprofs=session.query(ChlProfTBL.ID, ChlProfTBL.depth, ChlProfTBL.corrdepth, ChlProfTBL.sal, 
                       ChlProfTBL.oxML,ChlProfTBL.chl002,ChlProfTBL.chl020,ChlProfTBL.chl200,
                       ChlProfTBL.po4,ChlProfTBL.si,ChlProfTBL.no3,ChlProfTBL.sourceFile,
                       ChlProfTBL.ChlStationTBLID).filter(and_(ChlProfTBL.chl002==None,
                                                  ChlProfTBL.chl020==None,
                                                  ChlProfTBL.chl200==None,
                                                  ChlProfTBL.no3==None,
                                                  ChlProfTBL.oxML==None,
                                                  ChlProfTBL.po4==None,
                                                  ChlProfTBL.sal==None,
                                                  ChlProfTBL.si==None)).delete()
session.commit()

In [33]:
badprofs=session.query(ChlProfTBL.ID, ChlProfTBL.depth, ChlProfTBL.corrdepth, ChlProfTBL.sal, 
                       ChlProfTBL.oxML,ChlProfTBL.chl002,ChlProfTBL.chl020,ChlProfTBL.chl200,
                       ChlProfTBL.po4,ChlProfTBL.si,ChlProfTBL.no3,ChlProfTBL.sourceFile,
                       ChlProfTBL.ChlStationTBLID).filter(and_(ChlProfTBL.chl002==None,
                                                  ChlProfTBL.chl020==None,
                                                  ChlProfTBL.chl200==None,
                                                  ChlProfTBL.no3==None,
                                                  ChlProfTBL.oxML==None,
                                                  ChlProfTBL.po4==None,
                                                  ChlProfTBL.sal==None,
                                                  ChlProfTBL.si==None))

In [34]:
for row in badprofs.all():
    print(row)

In [35]:
badStas=session.query(ChlStationTBL.ID).select_from(ChlStationTBL).\
    outerjoin(ChlProfTBL,ChlProfTBL.ChlStationTBLID==ChlStationTBL.ID).filter(ChlProfTBL.ID==None)

In [36]:
badStaList=[sta[0] for sta in badStas.all()]
print(badStaList)
for stx in badStaList:
    session.query(ChlStationTBL).filter(ChlStationTBL.ID==stx).delete()

[91, 92, 115, 236, 311, 317, 446, 447, 448, 472, 473, 475, 476, 477, 478, 479]


In [37]:
session.commit()

# Query data and add to combined tables

### Column Mapping
CombStationTBL:

    ID         New Sta ID
    ID_ChlSta  ChlStationTBL.ID
    ID_CTDSta  CTDSTationTBL.ID
    StName     CTDStationTBL.StName, ChlStationTBL.StName
    Mon        CTDStationTBL.Mon, ChlStationTBL.Mon
    Day        CTDStationTBL.Day, ChlStationTBL.Day
    Year       CTDStationTBL.Year, ChlStationTBL.Year
    Lat        CTDStationTBL.Lat, ChlStationTBL.Lat
    Lon        CTDSTationTBL.Lon, ChlStationTBL.Lon
    Time       CTDStationTBL.Time: trim & remove last 3, ChlStationTBL.Time
    Cruise     CTDStationTBL.Cruise
    
CombProfTBL:
    ID          New Prof ID
    ID_ChlProf  ChlProfTBL.ID
    ID_CTDProf  CTDProfTBL.ID
    Depth       round(CTDProfTBL.correctedDepth), round(ChlProfTBL.corrdepth)
    Temperature CTDProfTBL.t090C
    Salinity    ChlProfTBL.sal, CTDProfTBL.sal00
    PAR_ctd     CTDProfTBL.par
    Xmis_ctd    CTDProfTBL.xmiss
    Fluor_ctd   CTDProfTBL.wetStar
    chl002      ChlProfTBL.chl002
    chl020      ChlProfTBL.chl020
    chl200      ChlProfTBL.chl200
    Ox_bot      ChlProfTBL.oxML
    Ox_ctd      CTDProfTBL.oxMLL
    NO3         ChlProfTBL.no3
    PO4         ChlProfTBL.po4
    Si          ChlProfTBL.si
    CombStationTBLID     New Sta ID
    
    link chl and ctd profiles on: station match, round(CTDProfTBL.correctedDepth)==round(ChlProfTBL.corrdepth)

1st, add stations from chl tbl, whether or not present in CTD table:

In [38]:
subStaMatchIn=session.query(ChlStationTBL.ID.label('ID_ChlSta'),CTDStationTBL.ID.label('ID_CTDSta'),
    case([(CTDStationTBL.StName==None,ChlStationTBL.StName)],else_=CTDStationTBL.StName).label('StName'),
    case([(CTDStationTBL.Mon==None,ChlStationTBL.Mon)],else_=CTDStationTBL.Mon).label('Mon'),
    case([(CTDStationTBL.Day==None,ChlStationTBL.Day)],else_=CTDStationTBL.Day).label('Day'),
    case([(CTDStationTBL.Year==None,ChlStationTBL.Year)],else_=CTDStationTBL.Year).label('Year'),
    case([(CTDStationTBL.Lat==None,ChlStationTBL.Lat)],else_=CTDStationTBL.Lat).label('Lat'),
    case([(CTDStationTBL.Lon==None,ChlStationTBL.Lon)],else_=CTDStationTBL.Lon).label('Lon'),
    case([(CTDStationTBL.Time==None,ChlStationTBL.Time)],else_=CTDStationTBL.Time).label('Time'),
    CTDStationTBL.Cruise).\
    select_from(ChlStationTBL).outerjoin(CTDStationTBL,
                                         and_(CTDStationTBL.UseCast==True,
                                            CTDStationTBL.StName==ChlStationTBL.StName,  
                                            CTDStationTBL.Year==ChlStationTBL.Year,  
                                            CTDStationTBL.Mon==ChlStationTBL.Mon,  
                                            or_(CTDStationTBL.Day==ChlStationTBL.Day,  
                                                CTDStationTBL.Day==(ChlStationTBL.Day+1.0),  
                                                CTDStationTBL.Day==(ChlStationTBL.Day-1.0),  
                                                and_(CTDStationTBL.Lat==ChlStationTBL.Lat,  
                                                    CTDStationTBL.Lon==ChlStationTBL.Lon))))

In [39]:
print(str(subStaMatchIn.statement.compile(engine)).replace('"',''))

SELECT ChlStationTBL.ID AS ID_ChlSta, CTDStationTBL.ID AS ID_CTDSta, CASE WHEN (CTDStationTBL.StName IS NULL) THEN ChlStationTBL.StName ELSE CTDStationTBL.StName END AS StName, CASE WHEN (CTDStationTBL.Mon IS NULL) THEN ChlStationTBL.Mon ELSE CTDStationTBL.Mon END AS Mon, CASE WHEN (CTDStationTBL.Day IS NULL) THEN ChlStationTBL.Day ELSE CTDStationTBL.Day END AS Day, CASE WHEN (CTDStationTBL.Year IS NULL) THEN ChlStationTBL.Year ELSE CTDStationTBL.Year END AS Year, CASE WHEN (CTDStationTBL.Lat IS NULL) THEN ChlStationTBL.Lat ELSE CTDStationTBL.Lat END AS Lat, CASE WHEN (CTDStationTBL.Lon IS NULL) THEN ChlStationTBL.Lon ELSE CTDStationTBL.Lon END AS Lon, CASE WHEN (CTDStationTBL.Time IS NULL) THEN ChlStationTBL.Time ELSE CTDStationTBL.Time END AS Time, CTDStationTBL.Cruise 
FROM ChlStationTBL LEFT OUTER JOIN CTDStationTBL ON CTDStationTBL.UseCast = 1 AND CTDStationTBL.StName = ChlStationTBL.StName AND CTDStationTBL.Year = ChlStationTBL.Year AND CTDStationTBL.Mon = ChlStationTBL.Mon AND (

In [40]:
session.execute(" INSERT INTO CombStationTBL (ID_ChlSta, ID_CTDSta, StName, Mon, Day, Year, Lat, Lon, Time, \
Cruise) SELECT ChlStationTBL.ID AS ID_ChlSta, CTDStationTBL.ID AS ID_CTDSta, \
CASE WHEN (CTDStationTBL.StName IS NULL) THEN ChlStationTBL.StName ELSE CTDStationTBL.StName END AS StName, \
CASE WHEN (CTDStationTBL.Mon IS NULL) THEN ChlStationTBL.Mon ELSE CTDStationTBL.Mon END AS Mon, \
CASE WHEN (CTDStationTBL.Day IS NULL) THEN ChlStationTBL.Day ELSE CTDStationTBL.Day END AS Day, \
CASE WHEN (CTDStationTBL.Year IS NULL) THEN ChlStationTBL.Year ELSE CTDStationTBL.Year END AS Year, \
CASE WHEN (CTDStationTBL.Lat IS NULL) THEN ChlStationTBL.Lat ELSE CTDStationTBL.Lat END AS Lat, \
CASE WHEN (CTDStationTBL.Lon IS NULL) THEN ChlStationTBL.Lon ELSE CTDStationTBL.Lon END AS Lon, \
CASE WHEN (CTDStationTBL.Time IS NULL) THEN ChlStationTBL.Time ELSE CTDStationTBL.Time END AS Time, \
CTDStationTBL.Cruise FROM ChlStationTBL LEFT OUTER JOIN CTDStationTBL ON CTDStationTBL.UseCast = 1 \
AND CTDStationTBL.StName = ChlStationTBL.StName AND CTDStationTBL.Year = ChlStationTBL.Year \
AND CTDStationTBL.Mon = ChlStationTBL.Mon AND (CTDStationTBL.Day = ChlStationTBL.Day \
OR CTDStationTBL.Day = ChlStationTBL.Day + 1 OR CTDStationTBL.Day = ChlStationTBL.Day - 1 \
OR CTDStationTBL.Lat = ChlStationTBL.Lat AND CTDStationTBL.Lon = ChlStationTBL.Lon) ;")

<sqlalchemy.engine.result.ResultProxy at 0x7faa5587a310>

In [41]:
session.commit()

Check for chl stations not added:

In [42]:
morechlSta=session.query(ChlStationTBL.ID.label('ID_ChlSta'), ChlStationTBL.StName, ChlStationTBL.Mon,
    ChlStationTBL.Day, ChlStationTBL.Year, ChlStationTBL.Lat, ChlStationTBL.Lon, ChlStationTBL.Time,
                        ChlStationTBL.sourceFile).\
    select_from(ChlStationTBL).outerjoin(CombStationTBL, CombStationTBL.ID_ChlSta==ChlStationTBL.ID).\
    filter(CombStationTBL.ID_ChlSta==None)

In [43]:
pdMoreChlSta=pd.DataFrame(morechlSta.all())
pdMoreChlSta

Next add ctd stations not in previous query:

In [44]:
moreCTDSta=session.query(CTDStationTBL.ID.label('ID_CTDSta'),CTDStationTBL.StName.label('StName'),
                         CTDStationTBL.Mon.label('Mon'),CTDStationTBL.Day.label('Day'),
                         CTDStationTBL.Year.label('Year'),CTDStationTBL.Lat.label('Lat'),
                         CTDStationTBL.Lon.label('Lon'),CTDStationTBL.Time.label('Time'),CTDStationTBL.Cruise).\
    select_from(CTDStationTBL).outerjoin(CombStationTBL,CombStationTBL.ID_CTDSta==CTDStationTBL.ID).\
    filter(and_(CombStationTBL.ID_CTDSta==None,CTDStationTBL.UseCast==True))

In [45]:
print(moreCTDSta.count())
print(str(moreCTDSta.statement.compile(engine)).replace('"',''))

23
SELECT CTDStationTBL.ID AS ID_CTDSta, CTDStationTBL.StName AS StName, CTDStationTBL.Mon AS Mon, CTDStationTBL.Day AS Day, CTDStationTBL.Year AS Year, CTDStationTBL.Lat AS Lat, CTDStationTBL.Lon AS Lon, CTDStationTBL.Time AS Time, CTDStationTBL.Cruise 
FROM CTDStationTBL LEFT OUTER JOIN CombStationTBL ON CombStationTBL.ID_CTDSta = CTDStationTBL.ID 
WHERE CombStationTBL.ID_CTDSta IS NULL AND CTDStationTBL.UseCast = 1


In [46]:
session.execute(" INSERT INTO CombStationTBL (ID_CTDSta, StName, Mon, Day, Year, Lat, Lon, Time, \
Cruise) SELECT CTDStationTBL.ID AS ID_CTDSta, CTDStationTBL.StName AS StName, CTDStationTBL.Mon AS Mon, \
CTDStationTBL.Day AS Day, CTDStationTBL.Year AS Year, CTDStationTBL.Lat AS Lat, CTDStationTBL.Lon AS Lon, \
CTDStationTBL.Time AS Time, CTDStationTBL.Cruise FROM CTDStationTBL LEFT OUTER JOIN CombStationTBL \
ON CombStationTBL.ID_CTDSta = CTDStationTBL.ID WHERE CombStationTBL.ID_CTDSta IS NULL AND CTDStationTBL.UseCast = 1 ;")

<sqlalchemy.engine.result.ResultProxy at 0x7faa556e8cd0>

In [47]:
session.commit()

In [48]:
print(moreCTDSta.count())

0


## Now match profiles and add them to combined tables. start with chl profiles and any matching ctd profiles

                ID           new 
                ID_ChlProf   ChlProfTBL.ID
                ID_CTDProf   CTDProfTBL.ID
                Depth        ChlProfTBL.corrdepth, ChlProfTBL.depth, CTDProfTBL.convertedDepth
                Temperature  CTDProfTBL.t090C
                Salinity     compare CTDProfTBL.sal00, ChlProfTBL.sal
                PAR_ctd      CTDProfTBL.par
                Xmis_ctd     CTDProfTBL.xmiss
                Fluor_ctd    CTDProfTBL.wetStar
                chl002       ChlProfTBL.chl002
                chl020       ChlProfTBL.chl020
                chl200       ChlProfTBL.chl200
                Ox_bot       ChlProfTBL.oxML
                Ox_ctd       CTDProfTBL.oxMLL
                NO3          ChlProfTBL.no3
                PO4          ChlProfTBL.po4
                Si           ChlProfTBL.si
                CombStationTBLID   key to CombStationTBL: get by join 
                

#### Where possible, match based on salinity. Record these depths in bottle table

In [49]:
saldiffqry=session.query(ChlProfTBL.ID.label('ChlID'),CTDProfTBL.ID.label('CTDID'),CombStationTBL.StName,
                      ChlProfTBL.depth,ChlProfTBL.corrdepth, CTDProfTBL.convertedDepth, CTDProfTBL.prSM,
                        CTDProfTBL.sal00,ChlProfTBL.sal,
                         func.abs(ChlProfTBL.sal-CTDProfTBL.sal00).label('saldiff'),
                         func.abs(ChlProfTBL.depth-CTDProfTBL.convertedDepth).label('zdiff')).\
    select_from(CombStationTBL).join(ChlProfTBL,ChlProfTBL.ChlStationTBLID==CombStationTBL.ID_ChlSta).\
    join(CTDProfTBL,and_(CTDProfTBL.CTDStationTBLID==CombStationTBL.ID_CTDSta,
                              CTDProfTBL.convertedDepth<=(ChlProfTBL.depth+5.),
                              CTDProfTBL.convertedDepth>(ChlProfTBL.depth-5.))).\
    filter(and_(ChlProfTBL.sal!=None,CTDProfTBL.sal00!=None,CombStationTBL.ID_ChlSta!=None,
                                            CombStationTBL.ID_CTDSta!=None)).order_by(
                                    ChlProfTBL.ID,func.abs(ChlProfTBL.sal-CTDProfTBL.sal00),
                        func.abs(ChlProfTBL.depth-CTDProfTBL.convertedDepth)).subquery()

saldiffminqry=session.query(saldiffqry.c.ChlID.label('ChlIDsd'),func.min(saldiffqry.c.saldiff).label('saldiffmin')).\
    group_by(saldiffqry.c.ChlID).subquery()
print(session.query(saldiffminqry.c.saldiffmin).count())
print(session.query(saldiffminqry.c.ChlIDsd).filter(saldiffminqry.c.ChlIDsd!=None).count())

zdiffminqry=session.query(saldiffminqry.c.ChlIDsd.label('ChlIDzd'),saldiffqry.c.prSM,saldiffqry.c.convertedDepth,
                         saldiffminqry.c.saldiffmin,func.min(saldiffqry.c.zdiff).label('zdiffmin')).\
                select_from(saldiffminqry).\
                join(saldiffqry,and_(saldiffqry.c.saldiff==saldiffminqry.c.saldiffmin,
                                    saldiffqry.c.ChlID==saldiffminqry.c.ChlIDsd)).\
                       group_by(saldiffminqry.c.ChlIDsd,saldiffminqry.c.saldiffmin).subquery()
#print(session.query(ChlProfTBL).count())
saldepthqry=session.query(zdiffminqry.c.ChlIDzd,saldiffqry.c.prSM,saldiffqry.c.convertedDepth,
                         saldiffqry.c.saldiff).\
                select_from(zdiffminqry).\
                join(saldiffqry,and_(saldiffqry.c.saldiff==zdiffminqry.c.saldiffmin,
                                    saldiffqry.c.ChlID==zdiffminqry.c.ChlIDzd,
                                    saldiffqry.c.zdiff==zdiffminqry.c.zdiffmin)).group_by(zdiffminqry.c.ChlIDzd)
print(saldepthqry.count())

1934
1934
1934


In [50]:
for row in saldepthqry.all():
    rii=session.query(ChlProfTBL).filter(ChlProfTBL.ID==row.ChlIDzd)
    rii.update({'depthSalMatchCTD': row.convertedDepth})
session.commit()

In [51]:
print(session.query(ChlProfTBL.depthSalMatchCTD).filter(ChlProfTBL.depthSalMatchCTD>0).count())

1934


In [52]:
profchlctd=session.query(ChlProfTBL.ID,CTDProfTBL.ID,CombStationTBL.StName,ChlProfTBL.depth, ChlProfTBL.corrdepth,
                         ChlProfTBL.depthSalMatchCTD, CTDProfTBL.convertedDepth, CTDProfTBL.prSM,
                        CTDProfTBL.sal00,ChlProfTBL.sal,CTDProfTBL.sourceFile,ChlProfTBL.sourceFile).\
    select_from(CombStationTBL).join(ChlProfTBL,ChlProfTBL.ChlStationTBLID==CombStationTBL.ID_ChlSta).\
    join(CTDProfTBL,and_(CTDProfTBL.CTDStationTBLID==CombStationTBL.ID_CTDSta,
        CTDProfTBL.convertedDepth==ChlProfTBL.depthSalMatchCTD)).\
    filter(and_(func.abs(CTDProfTBL.sal00-ChlProfTBL.sal)>1.0,
              func.abs(ChlProfTBL.depth-ChlProfTBL.depthSalMatchCTD)>=1.0),CombStationTBL.ID_ChlSta!=None,
                                            CombStationTBL.ID_CTDSta!=None)

checkprofs=pd.DataFrame(profchlctd.all())
# check profiles where salinity match is very bad
# refine by difference in depth and matchdepth
#    sometimes the difference is due to quickly changing salinities; keep these
#    other times the difference is due to CTD prof starting at greater depth; eliminate these
sizeme(checkprofs,80,90)

Unnamed: 0,ID,ID.1,StName,depth,corrdepth,depthSalMatchCTD,convertedDepth,prSM,sal00,sal,sourceFile,sourceFile.1
0,116,6271,S1,5.0,6.3,6.940357,6.940357,7.0,24.881,23.032,SG0204001.cnv,bottle_0204.txt
1,144,7769,S4-2,0.0,0.0,1.982983,1.982983,2.0,18.1635,13.167,SG0204007.cnv,bottle_0204.txt
2,148,8114,S4-3,0.0,0.0,1.982983,1.982983,2.0,13.9061,12.262,SG0204008.cnv,bottle_0204.txt
3,204,11035,S2-1,10.0,9.1,8.923273,8.923273,9.0,25.2894,26.463,SG0207002.cnv,bottle_0207.txt
4,329,17730,S2-1,5.0,2.6,8.923273,8.923273,9.0,27.8002,26.628,SG0210002.cnv,bottle_0210.txt
5,363,19741,S2-3,5.0,3.7,3.965947,3.965947,4.0,28.4315,27.424,SG0301000.cnv,bottle_0301.txt
6,795,44258,T5,30.0,28.7,25.281595,25.281595,25.5,29.1311,26.157,SG0311004.cnv,bottle_0311.txt
7,818,46552,T3,0.0,,1.487239,1.487239,1.5,19.7578,15.978,SG0312002.cnv,bottle_0312.txt
8,879,51945,S2-3,0.0,0.0,1.982983,1.982983,2.0,14.7995,11.6,SG0314000.cnv,bottle_0314.txt
9,880,51947,S2-3,5.0,4.5,3.965947,3.965947,4.0,20.2818,22.546,SG0314000.cnv,bottle_0314.txt


In [53]:
# check profiles where salinity match is very bad
# refine by difference in depth and matchdepth
#    sometimes the difference is due to quickly changing salinities; keep these
#    other times the difference is due to CTD prof starting at greater depth; eliminate these

# make query id'ing top level of each CTD prof, and eliminate matches where 
#   depth diff>1 and saldiff > 1 and chl depth < match depth
# -> set ChlProfTBL.depthSalMatchCTD = -999 at these locations so that they can easily be removed from future matching
firstCTD=session.query(CTDProfTBL.CTDStationTBLID,func.min(CTDProfTBL.prSM).label('minPR')).group_by(CTDProfTBL.CTDStationTBLID).subquery()

profchlctd=session.query(ChlProfTBL.ID.label('ChlID'),CTDProfTBL.ID.label('CTDID'),CombStationTBL.StName,ChlProfTBL.depth, ChlProfTBL.corrdepth,
                         ChlProfTBL.depthSalMatchCTD, CTDProfTBL.convertedDepth, CTDProfTBL.prSM,
                        CTDProfTBL.sal00,ChlProfTBL.sal,firstCTD.c.minPR,CTDProfTBL.sourceFile,ChlProfTBL.sourceFile).\
    select_from(CombStationTBL).join(ChlProfTBL,ChlProfTBL.ChlStationTBLID==CombStationTBL.ID_ChlSta).\
    join(CTDProfTBL,and_(CTDProfTBL.CTDStationTBLID==CombStationTBL.ID_CTDSta,
        CTDProfTBL.convertedDepth==ChlProfTBL.depthSalMatchCTD)).outerjoin(firstCTD,
                                                            and_(
                                                            CTDProfTBL.CTDStationTBLID==firstCTD.c.CTDStationTBLID,
                                                            CTDProfTBL.prSM==firstCTD.c.minPR)).\
    filter(and_(func.abs(CTDProfTBL.sal00-ChlProfTBL.sal)>1.0,
              func.abs(ChlProfTBL.depth-ChlProfTBL.depthSalMatchCTD)>1.0,
              firstCTD.c.minPR!=None))
ChIds=[irow.ChlID for irow in profchlctd.all()]
print(ChIds)
checkprofs=pd.DataFrame(profchlctd.all())
sizeme(checkprofs,80,90)

[144, 148, 329, 818, 1616, 1867, 2065]


Unnamed: 0,ChlID,CTDID,StName,depth,corrdepth,depthSalMatchCTD,convertedDepth,prSM,sal00,sal,minPR,sourceFile,sourceFile.1
0,144,7769,S4-2,0.0,0.0,1.982983,1.982983,2.0,18.1635,13.167,2.0,SG0204007.cnv,bottle_0204.txt
1,148,8114,S4-3,0.0,0.0,1.982983,1.982983,2.0,13.9061,12.262,2.0,SG0204008.cnv,bottle_0204.txt
2,329,17730,S2-1,5.0,2.6,8.923273,8.923273,9.0,27.8002,26.628,9.0,SG0210002.cnv,bottle_0210.txt
3,818,46552,T3,0.0,,1.487239,1.487239,1.5,19.7578,15.978,1.5,SG0312002.cnv,bottle_0312.txt
4,1616,93926,S7-3,5.0,,0.991494,0.991494,1.0,24.5249,18.146,1.0,SG0411015.cnv,bottle_0411.txt
5,1867,110138,S2-3,5.0,,0.991494,0.991494,1.0,9.1969,27.723,1.0,SG0501000.cnv,bottle_0501.txt
6,2065,119716,S5,5.0,,0.991494,0.991494,1.0,27.8301,25.802,1.0,SG0505002.cnv,bottle_0505.txt


In [54]:
for idval in ChIds:
    rii=session.query(ChlProfTBL).filter(ChlProfTBL.ID==idval)
    rii.update({'depthSalMatchCTD': -999})
session.commit()

In [55]:
profchlctd=session.query(ChlProfTBL.ID.label('ID_ChlProf'),CTDProfTBL.ID.label('ID_CTDProf'),
                         ChlProfTBL.depthSalMatchCTD.label('Depth'),CTDProfTBL.t090C.label('Temperature'),
                         CTDProfTBL.sal00.label('Salinity'),CTDProfTBL.par.label('PAR_ctd'),CTDProfTBL.xmiss.label('Xmis_ctd'),
                         CTDProfTBL.wetStar.label('Fluor_ctd'),ChlProfTBL.chl002.label('chl002'),ChlProfTBL.chl020.label('chl020'),
                         ChlProfTBL.chl200.label('chl200'),ChlProfTBL.oxML.label('Ox_bot'),CTDProfTBL.oxMLL.label('Ox_ctd'),
                         ChlProfTBL.no3.label('NO3'),ChlProfTBL.po4.label('PO4'),ChlProfTBL.si.label('Si'),
                         CombStationTBL.ID.label('CombStationTBLID')).\
                select_from(ChlProfTBL).join(CombStationTBL,CombStationTBL.ID_ChlSta==ChlProfTBL.ChlStationTBLID).\
                join(CTDProfTBL,and_(CTDProfTBL.CTDStationTBLID==CombStationTBL.ID_CTDSta,
                                    CTDProfTBL.convertedDepth==ChlProfTBL.depthSalMatchCTD))
print(profchlctd.count())
print(session.query(ChlProfTBL).filter(ChlProfTBL.depthSalMatchCTD!=None).count())
checkprofs=pd.DataFrame(profchlctd.all())
sizeme(checkprofs.head(15),80,100)

1927
1927


Unnamed: 0,ID_ChlProf,ID_CTDProf,Depth,Temperature,Salinity,PAR_ctd,Xmis_ctd,Fluor_ctd,chl002,chl020,chl200,Ox_bot,Ox_ctd,NO3,PO4,Si,CombStationTBLID
0,1,1,0.991494,8.7769,24.8197,127.44,79.3714,14.038,12.5,9.9,9.1,,,4.1,0.5,22.1,1
1,4,142,0.991494,9.0391,25.6621,123.75,63.702,21.226,16.3,11.2,12.3,,,12.1,0.3,7.2,2
2,5,146,4.957422,8.4333,28.3877,21.224,80.2542,27.3503,15.7,,,,,2.0,0.6,18.4,2
3,6,157,15.863327,7.907,29.5599,2.074,92.4175,7.5571,6.3,,,,,21.8,1.7,46.0,2
4,7,280,0.991494,8.9074,27.2958,124.44,84.7162,8.4304,5.2,4.0,4.1,9.8,9.42227,0.0,0.1,4.5,3
5,8,284,4.957422,8.6006,28.0443,22.538,83.8724,10.3115,7.5,,,9.84,8.97628,0.0,0.2,6.1,3
6,9,297,17.846156,7.7376,29.2659,2.6962,89.87,9.8762,7.8,,,5.69,6.36316,24.7,2.1,51.2,3
7,10,392,1.982983,8.9209,26.7153,142.99,83.7169,12.9309,8.6,6.7,6.4,9.41,10.40733,0.4,0.1,2.5,4
8,11,394,3.965947,8.3412,28.3212,60.749,86.1171,15.948,13.3,,,10.02,9.79721,0.0,0.5,7.4,4
9,12,409,18.837564,7.7829,29.2381,1.9623,90.8153,4.1236,8.1,,,6.25,6.83668,20.5,1.9,42.8,4


In [56]:
print(str(profchlctd.statement.compile(engine)).replace('"',''))

SELECT ChlProfTBL.ID AS ID_ChlProf, CTDProfTBL.ID AS ID_CTDProf, ChlProfTBL.depthSalMatchCTD AS Depth, CTDProfTBL.t090C AS Temperature, CTDProfTBL.sal00 AS Salinity, CTDProfTBL.par AS PAR_ctd, CTDProfTBL.xmiss AS Xmis_ctd, CTDProfTBL.wetStar AS Fluor_ctd, ChlProfTBL.chl002 AS chl002, ChlProfTBL.chl020 AS chl020, ChlProfTBL.chl200 AS chl200, ChlProfTBL.oxML AS Ox_bot, CTDProfTBL.oxMLL AS Ox_ctd, ChlProfTBL.no3 AS NO3, ChlProfTBL.po4 AS PO4, ChlProfTBL.si AS Si, CombStationTBL.ID AS CombStationTBLID 
FROM ChlProfTBL JOIN CombStationTBL ON CombStationTBL.ID_ChlSta = ChlProfTBL.ChlStationTBLID JOIN CTDProfTBL ON CTDProfTBL.CTDStationTBLID = CombStationTBL.ID_CTDSta AND CTDProfTBL.convertedDepth = ChlProfTBL.depthSalMatchCTD


In [57]:
session.execute(" INSERT INTO CombProfTBL (ID_ChlProf, ID_CTDProf, Depth, Temperature, Salinity, PAR_ctd, Xmis_ctd,  \
Fluor_ctd, chl002, chl020, chl200, Ox_bot, Ox_ctd, NO3, PO4, Si, CombStationTBLID) SELECT ChlProfTBL.ID AS ID_ChlProf, \
CTDProfTBL.ID AS ID_CTDProf, ChlProfTBL.depthSalMatchCTD AS Depth, CTDProfTBL.t090C AS Temperature, \
CTDProfTBL.sal00 AS Salinity, CTDProfTBL.par AS PAR_ctd, CTDProfTBL.xmiss AS Xmis_ctd, CTDProfTBL.wetStar AS Fluor_ctd, \
ChlProfTBL.chl002 AS chl002, ChlProfTBL.chl020 AS chl020, ChlProfTBL.chl200 AS chl200, ChlProfTBL.oxML AS Ox_bot, \
CTDProfTBL.oxMLL AS Ox_ctd, ChlProfTBL.no3 AS NO3, ChlProfTBL.po4 AS PO4, ChlProfTBL.si AS Si, \
CombStationTBL.ID AS CombStationTBLID FROM ChlProfTBL JOIN CombStationTBL ON \
CombStationTBL.ID_ChlSta = ChlProfTBL.ChlStationTBLID JOIN CTDProfTBL ON \
CTDProfTBL.CTDStationTBLID = CombStationTBL.ID_CTDSta AND CTDProfTBL.convertedDepth = ChlProfTBL.depthSalMatchCTD ;")

<sqlalchemy.engine.result.ResultProxy at 0x7faa556e8810>

In [58]:
session.commit()

#### add chl profiles that were not matched with CTD profiles

In [59]:
profchl=session.query(ChlProfTBL.ID.label('ID_ChlProf'),ChlProfTBL.depth.label('Depth'),ChlProfTBL.sal.label('Salinity'),
                      ChlProfTBL.chl002.label('chl002'),ChlProfTBL.chl020.label('chl020'),
                      ChlProfTBL.chl200.label('chl200'),ChlProfTBL.oxML.label('Ox_bot'),ChlProfTBL.no3.label('NO3'),
                      ChlProfTBL.po4.label('PO4'),ChlProfTBL.si.label('Si'),CombStationTBL.ID.label('CombStationTBLID')).\
                select_from(ChlProfTBL).join(CombStationTBL,CombStationTBL.ID_ChlSta==ChlProfTBL.ChlStationTBLID).\
                filter(or_(ChlProfTBL.depthSalMatchCTD==None,ChlProfTBL.depthSalMatchCTD<0))
print(profchl.count())
print(session.query(ChlProfTBL).filter(or_(ChlProfTBL.depthSalMatchCTD==None,ChlProfTBL.depthSalMatchCTD<0)).count())
checkprofs=pd.DataFrame(profchl.all())
sizeme(checkprofs.head(15),80,100)

234
234


Unnamed: 0,ID_ChlProf,Depth,Salinity,chl002,chl020,chl200,Ox_bot,NO3,PO4,Si,CombStationTBLID
0,2,5.0,,12.0,9.6,2.6,,0.4,0.4,13.4,1
1,3,20.0,,1.9,3.0,9.7,,28.0,2.0,59.5,1
2,20,30.0,,,,,5.43,,,,6
3,28,20.0,,8.0,,,5.48,24.7,2.1,44.5,7
4,39,30.0,,,,,5.02,26.4,2.2,47.6,10
5,58,200.0,,,,,4.26,,,,14
6,134,30.0,,,,,4.77,19.9,1.5,35.7,32
7,139,390.0,31.143,,,,3.63,36.2,2.3,53.9,32
8,144,0.0,13.167,3.4,2.4,1.0,7.17,0.0,0.0,39.8,34
9,148,0.0,12.262,3.6,4.5,0.7,7.45,9.3,1.2,38.9,35


In [60]:
print(str(profchl.statement.compile(engine)).replace('"',''))

SELECT ChlProfTBL.ID AS ID_ChlProf, ChlProfTBL.depth AS Depth, ChlProfTBL.sal AS Salinity, ChlProfTBL.chl002 AS chl002, ChlProfTBL.chl020 AS chl020, ChlProfTBL.chl200 AS chl200, ChlProfTBL.oxML AS Ox_bot, ChlProfTBL.no3 AS NO3, ChlProfTBL.po4 AS PO4, ChlProfTBL.si AS Si, CombStationTBL.ID AS CombStationTBLID 
FROM ChlProfTBL JOIN CombStationTBL ON CombStationTBL.ID_ChlSta = ChlProfTBL.ChlStationTBLID 
WHERE ChlProfTBL.depthSalMatchCTD IS NULL OR ChlProfTBL.depthSalMatchCTD < ?


In [61]:
session.execute(" INSERT INTO CombProfTBL (ID_ChlProf, Depth, Salinity, \
chl002, chl020, chl200, Ox_bot, NO3, PO4, Si, CombStationTBLID) SELECT ChlProfTBL.ID AS ID_ChlProf, \
ChlProfTBL.depth AS Depth, ChlProfTBL.sal AS Salinity, ChlProfTBL.chl002 AS chl002, ChlProfTBL.chl020 AS chl020, \
ChlProfTBL.chl200 AS chl200, ChlProfTBL.oxML AS Ox_bot, ChlProfTBL.no3 AS NO3, ChlProfTBL.po4 AS PO4, \
ChlProfTBL.si AS Si, CombStationTBL.ID AS CombStationTBLID FROM ChlProfTBL JOIN CombStationTBL ON \
CombStationTBL.ID_ChlSta = ChlProfTBL.ChlStationTBLID WHERE ChlProfTBL.depthSalMatchCTD IS NULL OR \
ChlProfTBL.depthSalMatchCTD < 0.0 ;")
session.commit()

            ID           new 
            ID_ChlProf   ChlProfTBL.ID
            ID_CTDProf   CTDProfTBL.ID
            Depth        ChlProfTBL.corrdepth, ChlProfTBL.depth, CTDProfTBL.convertedDepth
            Temperature  CTDProfTBL.t090C
            Salinity     compare CTDProfTBL.sal00, ChlProfTBL.sal
            PAR_ctd      CTDProfTBL.par
            Xmis_ctd     CTDProfTBL.xmiss
            Fluor_ctd    CTDProfTBL.wetStar
            chl002       ChlProfTBL.chl002
            chl020       ChlProfTBL.chl020
            chl200       ChlProfTBL.chl200
            Ox_bot       ChlProfTBL.oxML
            Ox_ctd       CTDProfTBL.oxMLL
            NO3          ChlProfTBL.no3
            PO4          ChlProfTBL.po4
            Si           ChlProfTBL.si
            CombStationTBLID   key to CombStationTBL: get by join 

#### add CTD profiles that were not matched with Chl profiles

In [62]:
# id CTD profiles not already in table:
# difference in numbers makes sense becaues profctd should not include profs from stations marked as bad but not deleted
missingCTD=session.query(CTDProfTBL.ID).select_from(CTDProfTBL).outerjoin(CombProfTBL,
                        CombProfTBL.ID_CTDProf==CTDProfTBL.ID).filter(CombProfTBL.ID_CTDProf==None).subquery()
print(session.query(missingCTD.c.ID).count())
profctd=session.query(CTDProfTBL.ID.label('ID_CTDProf'),CTDProfTBL.convertedDepth.label('Depth'),
                      CTDProfTBL.t090C.label('Temperature'),CTDProfTBL.sal00.label('Salinity'),
                      CTDProfTBL.par.label('PAR_ctd'),CTDProfTBL.xmiss.label('Xmis_ctd'),
                      CTDProfTBL.wetStar.label('Fluor_ctd'),CTDProfTBL.oxMLL.label('Ox_ctd'),
                      CombStationTBL.ID.label('CombStationTBLID')).\
                select_from(CTDProfTBL).join(missingCTD,missingCTD.c.ID==CTDProfTBL.ID).\
                join(CombStationTBL,CombStationTBL.ID_CTDSta==CTDProfTBL.CTDStationTBLID)
print(profctd.count())
checkprofs=pd.DataFrame(profctd.all())
sizeme(checkprofs.head(15),80,100)

128545
122793


Unnamed: 0,ID_CTDProf,Depth,Temperature,Salinity,PAR_ctd,Xmis_ctd,Fluor_ctd,Ox_ctd,CombStationTBLID
0,2,1.982983,8.7149,26.6071,83.959,78.5644,19.5129,,1
1,3,2.974468,8.6198,27.9831,56.343,82.0421,22.186,,1
2,4,3.965947,8.5194,28.2416,37.711,83.442,17.0891,,1
3,5,4.957422,8.4183,28.3202,26.34,86.105,17.7157,,1
4,6,5.948892,8.1962,28.3649,18.228,85.1214,18.0643,,1
5,7,6.940357,8.2771,28.4058,12.893,84.3845,21.1819,,1
6,8,7.931818,8.3768,28.447,8.8522,80.745,27.7864,,1
7,9,8.923273,8.3723,28.4956,6.0987,82.2901,23.8123,,1
8,10,9.914724,8.2936,28.6061,4.4944,82.247,22.7579,,1
9,11,10.90617,7.9694,28.81,3.5565,76.2597,26.5406,,1


In [63]:
print(str(profctd.statement.compile(engine)).replace('"',''))

SELECT CTDProfTBL.ID AS ID_CTDProf, CTDProfTBL.convertedDepth AS Depth, CTDProfTBL.t090C AS Temperature, CTDProfTBL.sal00 AS Salinity, CTDProfTBL.par AS PAR_ctd, CTDProfTBL.xmiss AS Xmis_ctd, CTDProfTBL.wetStar AS Fluor_ctd, CTDProfTBL.oxMLL AS Ox_ctd, CombStationTBL.ID AS CombStationTBLID 
FROM CTDProfTBL JOIN (SELECT CTDProfTBL.ID AS ID 
FROM CTDProfTBL LEFT OUTER JOIN CombProfTBL ON CombProfTBL.ID_CTDProf = CTDProfTBL.ID 
WHERE CombProfTBL.ID_CTDProf IS NULL) AS anon_1 ON anon_1.ID = CTDProfTBL.ID JOIN CombStationTBL ON CombStationTBL.ID_CTDSta = CTDProfTBL.CTDStationTBLID


In [64]:
session.execute(" INSERT INTO CombProfTBL (ID_CTDProf, Depth, Temperature, Salinity, PAR_ctd, Xmis_ctd,  \
Fluor_ctd, Ox_ctd, CombStationTBLID) SELECT CTDProfTBL.ID AS ID_CTDProf, CTDProfTBL.convertedDepth AS Depth, \
CTDProfTBL.t090C AS Temperature, CTDProfTBL.sal00 AS Salinity, CTDProfTBL.par AS PAR_ctd, CTDProfTBL.xmiss AS Xmis_ctd, \
CTDProfTBL.wetStar AS Fluor_ctd, CTDProfTBL.oxMLL AS Ox_ctd, CombStationTBL.ID AS CombStationTBLID \
FROM CTDProfTBL JOIN (SELECT CTDProfTBL.ID AS ID \
FROM CTDProfTBL LEFT OUTER JOIN CombProfTBL ON CombProfTBL.ID_CTDProf = CTDProfTBL.ID \
WHERE CombProfTBL.ID_CTDProf IS NULL) AS anon_1 ON anon_1.ID = CTDProfTBL.ID JOIN CombStationTBL ON \
CombStationTBL.ID_CTDSta = CTDProfTBL.CTDStationTBLID ;")
session.commit()

In [65]:
session.close()
engine.dispose()
