In [16]:
import pandas as pd
import numpy as np
import glob
import gsw
import plotly.express as px
import plotly.graph_objects as go
from scipy import interpolate
from dataclasses import dataclass, field
import pysftp
import fnmatch

In [17]:
'''Class to get CTD data from PBOS'''
@dataclass
class PBOSdata :
    fdir : str = "."
    fname : str = "ctd*.log"


    def getCTDs(self, force_download=True) :

        # Create ctds filename 
        # Retrieve it via sftp if it isn't already been downloaded
        self.fname = "ctd*.log"
        fpath = self.fdir + '/' + self.fname
        if ((glob.glob(fpath) == []) or force_download):
            print(f'Getting files {self.fname} from PBOS')
            host = '107.190.208.42'
            # host = '10.123.123.28'
            port = 10022
            username='rnd20220701'
            password = 'uvaf2022again'
            remotePath = './'
            localPath = self.fdir
            cnopts = pysftp.CnOpts()
            cnopts.hostkeys = None 
            with pysftp.Connection(host=host, port=port, username=username, password=password, cnopts=cnopts) as sftp:
                print("Connection succesfully established ... ")
                for filename in sftp.listdir(remotePath):
                    if fnmatch.fnmatch(filename, self.fname):
                        print(filename)
                        sftp.get(remotePath + filename, localPath + filename)
        return
        
    def getANCs(self, force_download=True) :

        # Create ctds filename 
        # Retrieve it via sftp if it isn't already been downloaded
        self.fname = "anc_*.log.gz"
        fpath = self.fdir + '/' + self.fname
        if ((glob.glob(fpath) == []) or force_download):
            print(f'Getting files {self.fname} from PBOS')
            host = '107.190.208.42'
            # host = '10.123.123.28'
            port = 10022
            username='rnd20220701'
            password = 'uvaf2022again'
            remotePath = './'
            localPath = self.fdir
            cnopts = pysftp.CnOpts()
            cnopts.hostkeys = None 
            with pysftp.Connection(host=host, port=port, username=username, password=password, cnopts=cnopts) as sftp:
                print("Connection succesfully established ... ")
                for filename in sftp.listdir(remotePath):
                    if fnmatch.fnmatch(filename, self.fname):
                        print(filename)
                        sftp.get(remotePath + filename, localPath + filename)
        return


In [18]:
from hashlib import pbkdf2_hmac
from pyrsistent import pbag


rootDir = '/Users/ericrehm/Danaher/SBS Technology Research - 2021 UVAF/Deliverables/Data/sftp/'

pb = PBOSdata(rootDir)
# pb.getCTDs(True)
pb.getANCs(True)

Getting files anc_*.log.gz from PBOS
Connection succesfully established ... 
anc_cst190.log.gz
anc_eco.log.gz
anc_ecopar.log.gz
anc_satpar.log.gz
anc_seaowl.log.gz


In [19]:

# Where is the data?
rootDir = '/Users/ericrehm/Danaher/SBS Technology Research - 2021 UVAF/Deliverables/Data/sftp/'
anclogs = ['anc_cst190.log.gz', 'anc_eco.log.gz', 'anc_seaowl.log.gz', 'anc_ecopar.log.gz', 'anc_satpar.log.gz']
# anclogs = ['anc_ecopar.log.gz']
ncols = [10, 12, 18, 7, 11 ]

# Calibration data
ecoCal = { 'Chl470' : {"ScaleFactor" : 0.0073, "DarkCounts" : 48},
        'Beta700' :{"ScaleFactor" : 1.591e-06, "DarkCounts" : 49},
        'Chl435': {"ScaleFactor" : 0.0073, "DarkCounts" : 47}}
 
seaowlCal = { 'Chl470' : {"ScaleFactor" : 2.006E-03, "DarkCounts" : 49},
        'Beta700' :{"ScaleFactor" : 4.852E-07, "DarkCounts" : 49},
        'fDOM': {"ScaleFactor" : 3.218E-03, "DarkCounts" : 50}}

ecoparCal = { 'a0' : 4777, 'a1' : 2902, 'im': 1.3589 }

# Initialized "pre-melted" dataframe
dfall = pd.DataFrame([])


# Assumes data processed to ctdn.log by exctds.sh, where n = 1..7

ix = 0
for anclog in anclogs:
    print(anclog)
    # dfanc = pd.read_csv(rootDir+anclog, header=None, delim_whitespace=True)
    dfanc = pd.read_csv(rootDir+anclog, compression='gzip', header=None, delimiter=' |\t|,', usecols=range(0, ncols[ix]), engine='python', encoding = "ISO-8859-1", skiprows=1)

    # rename know SYSLOG columns
    dfanc.rename(columns={0:'date', 1:'time', 2:'host',3:'app'}, inplace=True)

    print(dfanc.head(2))

    dfanc.date = dfanc.date.astype('string')
    dfanc.time = dfanc.time.astype('string')
    dfanc.host = dfanc.host.astype('string')
    dfanc.app  = dfanc.app.astype('string')

    # Filter for correct patterns
    # yyyy-mm-dd
    patternKeep = r'^\d{4}\-(0[1-9]|1[012])\-(0[1-9]|[12][0-9]|3[01])$'
    filter = dfanc.date.str.contains(patternKeep)
    dfanc = dfanc[filter]

    # hh:mm:ss
    patternKeep = r'^(\d{2}):(\d{2}):(\d{2})$'
    filter = dfanc.time.str.contains(patternKeep)
    dfanc = dfanc[filter]
    # print(dfanc.head(2))

    # Create datetime from date and time
    dfsensor = pd.DataFrame([])
    dfsensor['dt'] = pd.to_datetime(dfanc.date + ' ' + dfanc.time)

    match dfanc.app[0]:
        case 'Port#8:' : 
            print('Port#8')
            dfsensor['sensor'] = 'cst190'
            dfsensor['value'] = dfanc[8]  # beam-c
            dfall = pd.concat([dfall, dfsensor], axis=0)

        case 'Port#10:' : 
            print('Port#10')
            dfsensor['sensor'] = 'ecochl470'
            dfsensor['value'] = ecoCal['Chl470']['ScaleFactor']*(dfanc[6]-ecoCal['Chl470']['DarkCounts'])  # chl-470
            dfall = pd.concat([dfall, dfsensor], axis=0)
            dfsensor['sensor'] = 'ecobb700'
            dfsensor['value'] = ecoCal['Beta700']['ScaleFactor']*(dfanc[8]-ecoCal['Beta700']['DarkCounts'])  # bb700
            dfall = pd.concat([dfall, dfsensor], axis=0)
            dfsensor['sensor'] = 'ecochl435'
            dfsensor['value'] = ecoCal['Chl435']['ScaleFactor']*(dfanc[10]-ecoCal['Chl470']['DarkCounts'])  # chl-435
            dfall = pd.concat([dfall, dfsensor], axis=0)

        case 'Port#11:' : 
            print('Port#11:')
            dfsensor['sensor'] = 'seachl470'
            dfsensor['value'] = seaowlCal['Chl470']['ScaleFactor']*(dfanc[8]-seaowlCal['Chl470']['DarkCounts'])  # chl-470
            dfall = pd.concat([dfall, dfsensor], axis=0)
            dfsensor['sensor'] = 'seabb700'
            dfsensor['value'] = seaowlCal['Beta700']['ScaleFactor']*(dfanc[12]-seaowlCal['Chl470']['DarkCounts'])  # bb700
            dfall = pd.concat([dfall, dfsensor], axis=0)
            dfsensor['sensor'] = 'seafdom'
            dfsensor['value'] = seaowlCal['fDOM']['ScaleFactor']*(dfanc[17]-seaowlCal['fDOM']['DarkCounts'])  # fdom
            dfall = pd.concat([dfall, dfsensor], axis=0)
            
        case 'Port#12:' : 
            dfsensor['sensor'] = 'ecopar'
            dfsensor['value'] = ecoparCal['im']*(10**((dfanc[6]-ecoparCal['a0'])/ecoparCal['a1']))  # ECOPAR PAR
            dfall = pd.concat([dfall, dfsensor], axis=0)

        case 'Port#13:' : 
            dfsensor['sensor'] = 'satpar'
            dfsensor['value'] = dfanc[6]  # SATPAR value
            dfall = pd.concat([dfall, dfsensor], axis=0)

        case _:
            print(f'Unknown app (Port): {dfanc.app[0]}')

    print(dfsensor.tail(2))
    ix = ix+1


anc_cst190.log.gz
         date      time           host      app          4     5      6  \
0  2022-09-09  14:41:08  10.123.123.22  Port#8:  CST-190PB  9873  13658   
1  2022-09-09  14:41:09  10.123.123.22  Port#8:  CST-190PB  9874  13659   

       7      8    9  
0  14501  0.299  516  
1  14501  0.299  515  


  filter = dfanc.date.str.contains(patternKeep)
  filter = dfanc.time.str.contains(patternKeep)


Port#8
                         dt  sensor  value
3785839 2022-10-17 12:01:29  cst190  1.826
3785840 2022-10-17 12:01:30  cst190  1.831
anc_eco.log.gz
         date      time           host       app                4    5     6  \
0  2022-09-09  14:41:08  10.123.123.22  Port#10:  FLBBFLRT2K-7341  695  3154   
1  2022-09-09  14:41:09  10.123.123.22  Port#10:  FLBBFLRT2K-7341  695  3154   

     7     8      9      10     11  
0  700  4130  695.0  4130.0  525.0  
1  700  4130  695.0  4130.0  525.0  


  filter = dfanc.date.str.contains(patternKeep)
  filter = dfanc.time.str.contains(patternKeep)


Port#10
                         dt     sensor   value
2861133 2022-10-17 12:01:11  ecochl435  4.8107
2861134 2022-10-17 12:01:12  ecochl435  4.7012
anc_seaowl.log.gz
         date      time           host       app              4     5    6  \
0  2022-09-09  14:41:09  10.123.123.22  Port#11:  SEAOWLA2K-013  3169  623   
1  2022-09-09  14:41:10  10.123.123.22  Port#11:  SEAOWLA2K-013  3162  621   

      7     8     9     10     11      12    13    14  15  16  17  
0  5881  5881  1665  15105  15366  151050  2177  2177  50  53  53  
1  5854  5854  1661  15108  15366  151080  2175  2175  50  49  49  


  filter = dfanc.date.str.contains(patternKeep)
  filter = dfanc.time.str.contains(patternKeep)


Port#11:
                         dt   sensor     value
2883672 2022-10-17 12:01:17  seafdom  1.203532
2883673 2022-10-17 12:01:18  seafdom  1.222840
anc_ecopar.log.gz
         date      time           host       app         4         5        6
0  2022-09-09  14:41:09  10.123.123.22  Port#12:  09/09/22  14:41:05  10134.0
1  2022-09-09  14:41:10  10.123.123.22  Port#12:  09/09/22  14:41:06  10134.0


  filter = dfanc.date.str.contains(patternKeep)
  filter = dfanc.time.str.contains(patternKeep)


                        dt  sensor     value
265284 2022-10-17 12:01:28  ecopar  3.546474
265285 2022-10-17 12:01:29  ecopar  3.540850
anc_satpar.log.gz
         date      time           host       app           4      5        6  \
0  2022-09-09  14:41:09  10.123.123.22  Port#13:  SATPRS1057  3.464  130.254   
1  2022-09-09  14:41:10  10.123.123.22  Port#13:  SATPRS1057  4.464  130.295   

     7    8     9   10  
0 -1.6  2.2  30.5  216  
1 -1.1  2.0  30.5  217  


  filter = dfanc.date.str.contains(patternKeep)
  filter = dfanc.time.str.contains(patternKeep)


                         dt  sensor  value
3183424 2022-10-17 12:01:39  satpar  3.118
3183425 2022-10-17 12:01:40  satpar  3.108


In [20]:
print(dfall.tail(2))

                         dt  sensor  value
3183424 2022-10-17 12:01:39  satpar  3.118
3183425 2022-10-17 12:01:40  satpar  3.108


In [26]:
%matplotlib osx

import seaborn as sns
import matplotlib.dates as mdates

start_day = '10.11.2022'
end_day = '10.18.2022'

start_day = pd.to_datetime(start_day)
end_day = pd.to_datetime(end_day)
dfsub = dfall[dfall.dt.between(start_day, end_day)]

g = sns.FacetGrid(dfsub, col='sensor', col_wrap=3, sharey=False)
g.map_dataframe(sns.scatterplot, x='dt', y='value', s=2, edgecolor='none')
xformatter = mdates.DateFormatter("%m/%d")
g.axes[0].xaxis.set_major_formatter(xformatter)
for i, ax in enumerate(g.axes.flat): 
    print(i)
    match i:
        case 0:
            ax.set_ylim(0, 5)     # m-1
        case 1:
            ax.set_ylim(0, 10)    # mg m-3
        case 2:
            ax.set_ylim(0, 0.01)  # m-1 sr-1
        case 3:
            ax.set_ylim(0, 10)    # mg m-3
        case 4:
            ax.set_ylim(0, 10)    # mg m-3
        case 5:
            ax.set_ylim(0, 0.01)  # m-1 sr-1
        case 6: 
            ax.set_ylim(0, 3)     # ppb
        case 7: 
            ax.set_ylim(0, 100)     # umol m-2 s-1
        case 8: 
            ax.set_ylim(0, 100)     # umol m-2 s-1


0
1
2
3
4
5
6
7
8


In [28]:
dfsub.sensor.unique()

array(['cst190', 'ecochl470', 'ecobb700', 'ecochl435', 'seachl470',
       'seabb700', 'seafdom', 'ecopar', 'satpar'], dtype=object)

In [29]:
# symbol_size = 10
# x1 = dfsub.dt
# y = dfsub[dfsub.sensor == 'satpar'].sensor / dfsub.sensor.ecopar
# fig = px.scatter(x=x, y=y, render_mode='webgl')

# fig.show()

AttributeError: 'Series' object has no attribute 'satpar'