In [1]:
import pandas as pd
import numpy as np
import pickle
import matplotlib.pyplot as plt
import datetime as dt
from salishsea_tools import evaltools as et

In [2]:
# read in stored station info (links station names to locations)
dir0 = '/ocean/eolson/MEOPAR/obs/WADE/ptools_data/ecology/'
sta_df = pd.read_pickle(dir0 + 'sta_df.p')

In [3]:
bottle_fn = dir0 + 'raw/ParkerMacCready2019CTDDataFeb2020.xlsx'
sheet_name = '2018-2019NutrientData'
sheet_chl = '2018-2019ChlaLabData'
bot = pd.read_excel(bottle_fn, sheet_name=sheet_name,engine='openpyxl')
chl = pd.read_excel(bottle_fn, sheet_name=sheet_chl, engine='openpyxl')

In [4]:
bot.dropna(how='all',inplace=True) # drop rows where all values are NaN
chl.dropna(how='all',inplace=True)
bot.dropna(subset=['Sampling Depth'],inplace=True) # drop rows with NaN sampling depths because nominal depths do not appear reliable
chl.dropna(subset=['Sampling Depth'],inplace=True)
# drop rows with no useful data:
bot.dropna(how='all',subset=['NH4_Lab','NO2_Lab','NO3_Lab','PO4_Lab','SiOH4_Lab'],inplace=True)
chl.dropna(how='all',subset=['Chla_Lab'],inplace=True)

In [5]:
# drop rows that appear to have misaligned Sampling depths
#Date	Station	Niskin	Nomdepth	Sampling Depth	CTD Cast Rep	Chla_Lab	Chla_QC	
    #2019-02-26	DNA001	12.0	30	1.136	1.0	1.9216	2.0	
chl.drop(chl.loc[(chl.Date==dt.datetime(2019,2,26))&(chl.Station=='DNA001')&\
                 (chl.Nomdepth==30)&(chl['Sampling Depth']==1.9216)].index,inplace=True)
# also remove from chl: 
#       2018-02-01  HCB010     9.0       30          10.545 sample is probably actually 30 m based on nuts
#       2018-03-09  CRR001     9.0       30         101.554 "
chl.drop(chl.loc[(chl.Date==dt.datetime(2018,2,1))&(chl.Station=='HCB010')&\
                 (chl.Nomdepth==30)&(chl['Sampling Depth']==10.545)].index,inplace=True)
chl.drop(chl.loc[(chl.Date==dt.datetime(2018,3,9))&(chl.Station=='CRR001')&\
                 (chl.Nomdepth==30)&(chl['Sampling Depth']==101.554)].index,inplace=True)
#remove from chl and nuts:
#       Date:2019-10-30 Station: ADM001  all 3 values (Nom depth 0,10,30 but sampling depth all ~126.7)
chl.drop(chl.loc[(chl.Date==dt.datetime(2019,10,30))&(chl.Station=='ADM001')].index,inplace=True)
bot.drop(chl.loc[(chl.Date==dt.datetime(2019,10,30))&(chl.Station=='ADM001')].index,inplace=True)

In [6]:
# average over rows having identical  (Date,Station,Niskin,Sampling Depth) [replicates]
# ignore Nomdepth because it seems to not always be accurate
chl2=pd.DataFrame(chl.groupby(['Date','Station','Niskin','Sampling Depth'],as_index=False).mean())

In [7]:
chl2

Unnamed: 0,Date,Station,Niskin,Sampling Depth,CTD Cast Rep,Chla_Lab,Chla_QC,Chla_QA,Chla_SampleFieldReplicateNumber
0,2018-01-10,PSS019,9.0,31.196,1.0,0.0710,2.0,3.0,1.0
1,2018-01-10,PSS019,11.0,11.404,1.0,0.1960,2.0,3.0,1.0
2,2018-01-10,PSS019,12.0,1.187,1.0,0.6673,2.0,3.0,1.0
3,2018-01-10,SAR003,9.0,31.057,1.0,0.0530,2.0,3.0,1.0
4,2018-01-10,SAR003,11.0,11.146,1.0,0.2097,2.0,3.0,1.0
...,...,...,...,...,...,...,...,...,...
1787,2019-12-19,NSQ002,9.0,30.676,1.0,0.3154,2.0,2.0,1.0
1788,2019-12-19,NSQ002,10.0,10.871,1.0,0.3640,2.0,2.0,1.0
1789,2019-12-19,NSQ002,12.0,1.214,1.0,0.4397,2.0,2.0,1.0
1790,2019-12-19,OAK004,11.0,11.106,1.0,0.5765,2.0,2.0,2.0


In [8]:
bot2=pd.DataFrame(bot.groupby(['Date','Station','Niskin','Sampling Depth'],as_index=False).mean())

In [9]:
allbot = pd.merge(left=bot2,right=chl2,how='outer',
                    left_on = ['Date','Station','Niskin','CTD Cast Rep','Sampling Depth'],
                    right_on = ['Date','Station','Niskin','CTD Cast Rep','Sampling Depth'])
print(f'len(allbot):{len(allbot)}, len(chl2):{len(chl2)}, len(bot2):{len(bot2)}')

len(allbot):2357, len(chl2):1792, len(bot2):2348


In [10]:
nutNoChl=len(allbot.loc[(~pd.isnull(allbot.NH4_Lab))&(pd.isnull(allbot.Chla_Lab))])
chlNoNut=len(allbot.loc[(pd.isnull(allbot.NH4_Lab))&(~pd.isnull(allbot.Chla_Lab))])
print(nutNoChl,chlNoNut)

565 9


In [11]:
# if these equal the total length, things are making sense:
print(len(bot2)+chlNoNut,len(chl2)+nutNoChl)

2357 2357


In [12]:
allbot.keys()

Index(['Date', 'Station', 'Niskin', 'Sampling Depth', 'CTD Cast Rep',
       'NH4_Lab', 'NH4_QC', 'NH4_QA', 'NH4_SampleFieldReplicateNumber',
       'NO2_Lab', 'NO2_QC', 'NO2_QA', 'NO2_SampleFieldReplicateNumber',
       'NO3_Lab', 'NO3_QC', 'NO3_QA', 'NO3_SampleFieldReplicateNumber',
       'PO4_Lab', 'PO4_QC', 'PO4_QA', 'PO4_SampleFieldReplicateNumber',
       'SiOH4_Lab', 'SiOH4_QC', 'SiOH4_QA', 'SiOH4_SampleFieldReplicateNumber',
       'Unnamed: 36', 'Chla_Lab', 'Chla_QC', 'Chla_QA',
       'Chla_SampleFieldReplicateNumber'],
      dtype='object')

#### Now add date/times

In [13]:
dfTime=pd.read_excel('/ocean/eolson/MEOPAR/obs/WADE/WDE_Data/OlsonSuchyAllen_UBC_PDR_P003790-010721.xlsx',
                    engine='openpyxl',sheet_name='EventDateTime')

In [14]:
## duplicate Station/Date entries with different times seem to be always within a couple of hours, 
# so just take the first (next cell)
test=dfTime.groupby(['FlightDate','SiteCode'])['TimeDown \n(Local - PST or PDT)'].count()
for date, loc in test[test>1].index:
    print(dfTime.loc[(dfTime.FlightDate==date)&(dfTime.SiteCode==loc),['FlightDate','SiteCode','TimeDown \n(Local - PST or PDT)']])

    FlightDate SiteCode TimeDown \n(Local - PST or PDT)
590 2001-07-09   DNA001                        17:09:00
591 2001-07-09   DNA001                        17:09:00
    FlightDate SiteCode TimeDown \n(Local - PST or PDT)
948 2003-06-25   BUD005                        17:00:00
949 2003-06-25   BUD005                        17:00:00
     FlightDate SiteCode TimeDown \n(Local - PST or PDT)
1362 2005-08-16   BUD005                        10:43:00
1363 2005-08-16   BUD005                        10:43:00
     FlightDate SiteCode TimeDown \n(Local - PST or PDT)
1429 2005-11-07   BUD005                        11:55:00
1430 2005-11-07   BUD005                        11:55:00
     FlightDate SiteCode TimeDown \n(Local - PST or PDT)
1446 2005-12-05   BUD005                        11:58:00
1447 2005-12-05   BUD005                        11:58:00
     FlightDate SiteCode TimeDown \n(Local - PST or PDT)
2104 2008-08-21   SJF000                        09:59:00
2105 2008-08-21   SJF000             

In [15]:
# drop duplicate rows
dfTime.drop_duplicates(subset=['FlightDate','SiteCode'],keep='first',inplace=True)
print(dfTime.keys())

Index(['FlightYear', 'FlightMonth', 'FlightDate', 'SiteCode', 'Sampled',
       'TimeDown \n(Local - PST or PDT)', 'FieldComment'],
      dtype='object')


In [16]:
dfTime['dtPac']=[dt.datetime.combine(idate, itime) for idate, itime \
         in zip(dfTime['FlightDate'],dfTime['TimeDown \n(Local - PST or PDT)'])]
dfTime['dtUTC']=[et.pac_to_utc(ii) for ii in dfTime['dtPac']]

In [17]:
allbot['dtUTC']=np.nan # create column and set all values to nan

In [18]:
# use loop to set dtUTC values, where available (where dates are unavailable, left as NaN for now)
for ind, row in allbot.iterrows():
    ix=(dfTime.FlightDate==row['Date'])&(dfTime.SiteCode==row['Station'])
    if np.sum(ix)==1:
        idate,itime=dfTime.loc[ix,['FlightDate','TimeDown \n(Local - PST or PDT)']].values[0]
        allbot.loc[ind,['UTCDateTime']]=et.pac_to_utc(dt.datetime.combine(idate,itime))

#### Now add lat/lons

In [19]:
# PROCESS STATION LOCATION INFO (based on Parker's code)
sta_fn='/ocean/eolson/MEOPAR/obs/WADE/WDE_Data/OlsonSuchyAllen_UBC_PDR_P003790-010721.xlsx'
sheetname='Site Info'
sta_df =pd.read_excel(sta_fn,engine='openpyxl',sheet_name=sheetname)
sta_df.dropna(how='any',subset=['Lat_NAD83 (deg / dec_min)','Long_NAD83 (deg / dec_min)','Station'],inplace=True)
sta_df = sta_df.set_index('Station')
# get locations in decimal degrees
for sta in sta_df.index:
    lat_str = sta_df.loc[sta, 'Lat_NAD83 (deg / dec_min)']
    lat_deg = float(lat_str.split()[0]) + float(lat_str.split()[1])/60
    sta_df.loc[sta,'Lat'] = lat_deg
    #
    lon_str = sta_df.loc[sta, 'Long_NAD83 (deg / dec_min)']
    lon_deg = float(lon_str.split()[0]) + float(lon_str.split()[1])/60
    sta_df.loc[sta,'Lon'] = -lon_deg    
sta_df.pop('Lat_NAD83 (deg / dec_min)');
sta_df.pop('Long_NAD83 (deg / dec_min)');

In [20]:
allbotFinal=pd.merge(left=sta_df,right=allbot,how='right',
                 left_on='Station',right_on='Station')

In [21]:
allbotFinal

Unnamed: 0,Station,Desig,Descrip,Basin,*Max_Depth,Lat,Lon,Date,Niskin,Sampling Depth,...,SiOH4_QC,SiOH4_QA,SiOH4_SampleFieldReplicateNumber,Unnamed: 36,Chla_Lab,Chla_QC,Chla_QA,Chla_SampleFieldReplicateNumber,dtUTC,UTCDateTime
0,HCB013,,,,,,,2018-01-10,3,26.820,...,2.0,3.0,1.0,,,,,,,
1,HCB013,,,,,,,2018-01-10,11,11.252,...,2.0,3.0,1.0,,,,,,,
2,HCB013,,,,,,,2018-01-10,12,1.336,...,2.0,3.0,1.0,,,,,,,
3,HCB013,,,,,,,2018-03-07,11,10.865,...,2.0,3.0,1.0,,,,,,,
4,HCB013,,,,,,,2018-03-07,12,1.170,...,2.0,3.0,1.0,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2352,WPA113,"C,M,T",Willapa Bay - Bay Center (red nun 2) - MT008,Willapa Bay,10.0,46.644,-123.993,2019-06-04,12,1.220,...,2.0,2.0,1.0,,6.2169,2.0,2.0,1.0,,
2353,WPA113,"C,M,T",Willapa Bay - Bay Center (red nun 2) - MT008,Willapa Bay,10.0,46.644,-123.993,2019-07-02,11,5.963,...,2.0,2.0,1.0,,6.7821,2.0,2.0,1.0,,
2354,WPA113,"C,M,T",Willapa Bay - Bay Center (red nun 2) - MT008,Willapa Bay,10.0,46.644,-123.993,2019-07-02,12,1.469,...,2.0,2.0,1.0,,,,,,,
2355,WPA113,"C,M,T",Willapa Bay - Bay Center (red nun 2) - MT008,Willapa Bay,10.0,46.644,-123.993,2019-09-11,11,6.032,...,2.0,2.0,1.0,,9.7210,2.0,2.0,1.0,,


In [22]:
np.unique(allbotFinal.loc[np.isnan(allbotFinal.Lat),['Station']])

array(['BLL040', 'HCB013'], dtype=object)

In [23]:
# There are two station designations that are not pressent in the station list. Drop them.
allbotFinal.dropna(how='any',subset=['Lat','Lon'],inplace=True)

In [24]:
# rename Sample Depth to Z
allbotFinal.rename(columns={'Sampling Depth':'Z'},inplace=True)

In [25]:
allbotFinal

Unnamed: 0,Station,Desig,Descrip,Basin,*Max_Depth,Lat,Lon,Date,Niskin,Z,...,SiOH4_QC,SiOH4_QA,SiOH4_SampleFieldReplicateNumber,Unnamed: 36,Chla_Lab,Chla_QC,Chla_QA,Chla_SampleFieldReplicateNumber,dtUTC,UTCDateTime
22,PSS019,C,Possession Sound - Gedney Island,Whidbey Basin,107.0,48.010927,-122.30125,2018-01-10,3,98.380,...,2.0,3.0,1.0,,,,,,,2018-01-10 20:34:00
23,PSS019,C,Possession Sound - Gedney Island,Whidbey Basin,107.0,48.010927,-122.30125,2018-01-10,9,31.196,...,2.0,3.0,1.0,,0.0710,2.0,3.0,1.0,,2018-01-10 20:34:00
24,PSS019,C,Possession Sound - Gedney Island,Whidbey Basin,107.0,48.010927,-122.30125,2018-01-10,11,11.404,...,2.0,3.0,1.0,,0.1960,2.0,3.0,1.0,,2018-01-10 20:34:00
25,PSS019,C,Possession Sound - Gedney Island,Whidbey Basin,107.0,48.010927,-122.30125,2018-01-10,12,1.187,...,2.0,3.0,1.0,,0.6673,2.0,3.0,1.0,,2018-01-10 20:34:00
26,PSS019,C,Possession Sound - Gedney Island,Whidbey Basin,107.0,48.010927,-122.30125,2018-03-07,3,86.010,...,2.0,3.0,1.0,,,,,,,2018-03-07 21:27:00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2352,WPA113,"C,M,T",Willapa Bay - Bay Center (red nun 2) - MT008,Willapa Bay,10.0,46.644000,-123.99300,2019-06-04,12,1.220,...,2.0,2.0,1.0,,6.2169,2.0,2.0,1.0,,
2353,WPA113,"C,M,T",Willapa Bay - Bay Center (red nun 2) - MT008,Willapa Bay,10.0,46.644000,-123.99300,2019-07-02,11,5.963,...,2.0,2.0,1.0,,6.7821,2.0,2.0,1.0,,
2354,WPA113,"C,M,T",Willapa Bay - Bay Center (red nun 2) - MT008,Willapa Bay,10.0,46.644000,-123.99300,2019-07-02,12,1.469,...,2.0,2.0,1.0,,,,,,,
2355,WPA113,"C,M,T",Willapa Bay - Bay Center (red nun 2) - MT008,Willapa Bay,10.0,46.644000,-123.99300,2019-09-11,11,6.032,...,2.0,2.0,1.0,,9.7210,2.0,2.0,1.0,,


### here is an example loading the new chlorophyll data (includes all years)

In [26]:
fn='/ocean/eolson/MEOPAR/obs/WADE/WDE_Data/OlsonSuchyAllen_UBC_PDR_P003790-010721.xlsx'
sheetname='LabChlaPheo'
chlPheo =pd.read_excel(fn,engine='openpyxl',sheet_name=sheetname)

In [27]:
chlPheo.dropna(how='any',subset=['Date','Station','SamplingDepth'],inplace=True)

In [28]:
# average over replicates
chlPheo2=pd.DataFrame(chlPheo.groupby(['Date','Station','SamplingDepth'],as_index=False).mean())

In [29]:
# join to station info (lat/lon)
chlPheo3=pd.merge(left=sta_df,right=chlPheo2,how='right',
                 left_on='Station',right_on='Station')

In [30]:
# join to date/time
dfTime['dtUTC']=[et.pac_to_utc(dt.datetime.combine(idate,itime)) for idate,itime in \
                zip(dfTime['FlightDate'],dfTime['TimeDown \n(Local - PST or PDT)'])]
dfTime2=dfTime.loc[:,['FlightDate','SiteCode','dtUTC']]
chlPheoFinal=pd.merge(left=chlPheo3,right=dfTime2,how='left',
                      left_on=['Date','Station'],right_on=['FlightDate','SiteCode'])

In [31]:
len(chlPheoFinal),len(chlPheo3),len(dfTime2)

(11469, 11469, 5186)

### Stuff that helped id problems in matching chl and nuts:

In [32]:
def intna(x):
    try: 
        y=int(x)
    except:
        y=np.nan
    return y

In [33]:
np.abs(140-178)/178

0.21348314606741572

In [34]:
nd=[intna(ii) for ii in chl['Nomdepth']]
crit=[np.abs(isd-ind)>max(5,0.3*isd) for ind, isd in zip(nd,chl['Sampling Depth'])]

In [35]:
bot.keys()

Index(['Date', 'Station', 'Niskin', 'Nomdepth', 'Sampling Depth',
       'CTD Cast Rep', 'NH4_Lab', 'NH4_QC', 'NH4_QF', 'NH4_QA', 'NH4_Comment',
       'NH4_SampleFieldReplicateNumber', 'NO2_Lab', 'NO2_QC', 'NO2_QF',
       'NO2_QA', 'NO2_Comment', 'NO2_SampleFieldReplicateNumber', 'NO3_Lab',
       'NO3_QC', 'NO3_QF', 'NO3_QA', 'NO3_Comment',
       'NO3_SampleFieldReplicateNumber', 'PO4_Lab', 'PO4_QC', 'PO4_QF',
       'PO4_QA', 'PO4_Comment', 'PO4_SampleFieldReplicateNumber', 'SiOH4_Lab',
       'SiOH4_QC', 'SiOH4_QF', 'SiOH4_QA', 'SiOH4_Comment',
       'SiOH4_SampleFieldReplicateNumber', 'Unnamed: 36'],
      dtype='object')

In [36]:
for i, row in chl.loc[crit].iterrows():
    print(chl.loc[(chl.Date==row['Date'])&(chl.Station==row.Station),
                  ['Date','Station','Niskin','Nomdepth','Sampling Depth',
                  'CTD Cast Rep','Chla_Lab']])
    print(bot.loc[(bot.Date==row['Date'])&(bot.Station==row.Station),
                  ['Date','Station','Niskin','Nomdepth','Sampling Depth',
                  'CTD Cast Rep','NH4_Lab','NO3_Lab']])
    print('-------\n')

          Date Station  Niskin Nomdepth  Sampling Depth  CTD Cast Rep  \
119 2019-02-26  DNA001    12.0        0           1.136           1.0   
120 2019-02-26  DNA001    10.0       10           9.702           1.0   
121 2019-02-26  DNA001    12.0       30           1.136           1.0   

     Chla_Lab  
119    2.2268  
120    1.9555  
121    1.9216  
          Date Station  Niskin Nomdepth  Sampling Depth  CTD Cast Rep  \
163 2019-02-26  DNA001      12        0           1.136             1   
164 2019-02-26  DNA001      10       10           9.702             1   
165 2019-02-26  DNA001       4       30          29.815             1   

     NH4_Lab  NO3_Lab  
163   0.3302  27.5842  
164   0.3106  27.6943  
165   0.2201  27.8900  
-------

          Date Station  Niskin Nomdepth  Sampling Depth  CTD Cast Rep  \
425 2019-05-08  SJF002    12.0        0           0.713           1.0   
426 2019-05-08  SJF002     4.0      140          79.942           1.0   

     Chla_Lab  
425    1.

In [42]:
nomd=[ii if np.isfloat]

SyntaxError: invalid syntax (<ipython-input-42-ed44ba0b4660>, line 1)

In [38]:
allbot = pd.merge(left=bot,right=chl,how='outer',
                    left_on = ['Date','Station','Niskin','CTD Cast Rep','Sampling Depth','NO3_SampleFieldReplicateNumber'],
                    right_on = ['Date','Station','Niskin','CTD Cast Rep','Sampling Depth','Chla_SampleFieldReplicateNumber'])
print(f'len(allbot):{len(allbot)}, len(chl):{len(chl)}, len(bot):{len(bot)}')
#,'Nomdepth'

len(allbot):2649, len(chl):2024, len(bot):2587


In [39]:
bot.keys()

Index(['Date', 'Station', 'Niskin', 'Nomdepth', 'Sampling Depth',
       'CTD Cast Rep', 'NH4_Lab', 'NH4_QC', 'NH4_QF', 'NH4_QA', 'NH4_Comment',
       'NH4_SampleFieldReplicateNumber', 'NO2_Lab', 'NO2_QC', 'NO2_QF',
       'NO2_QA', 'NO2_Comment', 'NO2_SampleFieldReplicateNumber', 'NO3_Lab',
       'NO3_QC', 'NO3_QF', 'NO3_QA', 'NO3_Comment',
       'NO3_SampleFieldReplicateNumber', 'PO4_Lab', 'PO4_QC', 'PO4_QF',
       'PO4_QA', 'PO4_Comment', 'PO4_SampleFieldReplicateNumber', 'SiOH4_Lab',
       'SiOH4_QC', 'SiOH4_QF', 'SiOH4_QA', 'SiOH4_Comment',
       'SiOH4_SampleFieldReplicateNumber', 'Unnamed: 36'],
      dtype='object')

In [43]:
chl.keys()

Index(['Date', 'Station', 'Niskin', 'Nomdepth', 'Sampling Depth',
       'CTD Cast Rep', 'Chla_Lab', 'Chla_QC', 'Chla_QF', 'Chla_QA',
       'Chla_Comment', 'Chla_SampleFieldReplicateNumber'],
      dtype='object')

In [44]:
allbot.keys()

Index(['Date', 'Station', 'Niskin', 'Nomdepth_x', 'Sampling Depth',
       'CTD Cast Rep', 'NH4_Lab', 'NH4_QC', 'NH4_QF', 'NH4_QA', 'NH4_Comment',
       'NH4_SampleFieldReplicateNumber', 'NO2_Lab', 'NO2_QC', 'NO2_QF',
       'NO2_QA', 'NO2_Comment', 'NO2_SampleFieldReplicateNumber', 'NO3_Lab',
       'NO3_QC', 'NO3_QF', 'NO3_QA', 'NO3_Comment',
       'NO3_SampleFieldReplicateNumber', 'PO4_Lab', 'PO4_QC', 'PO4_QF',
       'PO4_QA', 'PO4_Comment', 'PO4_SampleFieldReplicateNumber', 'SiOH4_Lab',
       'SiOH4_QC', 'SiOH4_QF', 'SiOH4_QA', 'SiOH4_Comment',
       'SiOH4_SampleFieldReplicateNumber', 'Unnamed: 36', 'Nomdepth_y',
       'Chla_Lab', 'Chla_QC', 'Chla_QF', 'Chla_QA', 'Chla_Comment',
       'Chla_SampleFieldReplicateNumber'],
      dtype='object')

In [45]:
allbot['ones']=1

In [46]:
allbot.loc[(~pd.isnull(allbot.NH4_Lab))&(pd.isnull(allbot.Chla_Lab)),['Date','Station','Niskin','CTD Cast Rep','Sampling Depth','NH4_Lab','Chla_Lab']]

Unnamed: 0,Date,Station,Niskin,CTD Cast Rep,Sampling Depth,NH4_Lab,Chla_Lab
2,2019-01-07,BUD005,2,1,18.832,0.8230,
6,2019-01-07,CRR001,2,1,101.239,0.1599,
10,2019-01-07,CSE001,2,1,50.123,1.0020,
17,2019-01-07,GOR001,2,1,165.513,0.3369,
21,2019-01-07,NSQ002,2,1,91.115,1.9562,
...,...,...,...,...,...,...,...
2581,2018-12-10,GOR001,2,1,164.924,0.1996,
2585,2018-12-10,NSQ002,2,1,94.978,0.2414,
2586,2018-12-10,OAK004,12,1,0.628,4.4378,
2587,2018-12-10,OAK004,12,1,0.628,4.4997,


In [47]:
allbot.loc[(pd.isnull(allbot.NH4_Lab))&(~pd.isnull(allbot.Chla_Lab)),['Date','Station','Niskin','CTD Cast Rep','Sampling Depth','NH4_Lab','Chla_Lab']]

Unnamed: 0,Date,Station,Niskin,CTD Cast Rep,Sampling Depth,NH4_Lab,Chla_Lab
2590,2019-02-07,SJF001,12,1,0.87,,0.4194
2591,2019-02-07,SJF001,12,1,0.87,,0.3798
2592,2019-03-14,SJF001,12,1,0.677,,0.8428
2593,2019-03-14,SJF001,12,1,0.677,,0.8752
2594,2019-03-15,ELB015,12,1,0.897,,0.85
2595,2019-04-04,SJF001,10,1,0.665,,1.6855
2596,2019-04-04,SJF001,10,1,0.665,,1.6747
2597,2019-05-08,SJF001,10,1,0.628,,1.9449
2598,2019-05-08,SJF001,10,1,0.628,,1.8908
2599,2019-06-05,SJF001,10,1,0.604,,3.2846


In [48]:
bot[['Date','Station','Niskin','Sampling Depth']].head(5)

Unnamed: 0,Date,Station,Niskin,Sampling Depth
0,2019-01-07,BUD005,12,0.836
1,2019-01-07,BUD005,10,9.508
2,2019-01-07,BUD005,2,18.832
3,2019-01-07,CRR001,12,0.932
4,2019-01-07,CRR001,10,9.534


In [49]:
chl[['Date','Station','Niskin','Sampling Depth']].head(5)

Unnamed: 0,Date,Station,Niskin,Sampling Depth
0,2019-01-07,BUD005,12.0,0.836
1,2019-01-07,BUD005,10.0,9.508
2,2019-01-07,CRR001,12.0,0.932
3,2019-01-07,CRR001,10.0,9.534
4,2019-01-07,CRR001,4.0,29.781


In [50]:
allbot.groupby(['Date','Station','Niskin','Sampling Depth'])['ones'].count()

Date        Station  Niskin  Sampling Depth
2018-01-10  HCB013   3       26.820            1
                     11      11.252            1
                     12      1.336             1
            PSS019   3       98.380            1
                     9       31.196            1
                                              ..
2019-12-19  NSQ002   9       30.676            1
                     10      10.871            1
                     12      1.214             1
            OAK004   11      11.106            3
                     12      1.189             1
Name: ones, Length: 2357, dtype: int64

In [51]:
temp=allbot.groupby(['Date','Station','Niskin','Sampling Depth']).agg({'ones':['count']})
temp.columns = ['icount']
np.unique(temp.icount)

array([1, 2, 3])

In [52]:
temp.loc[temp.icount>1]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,icount
Date,Station,Niskin,Sampling Depth,Unnamed: 4_level_1
2018-01-12,OAK004,12,0.936,3
2018-01-17,HCB004,11,11.109,3
2018-01-23,CMB003,12,1.057,3
2018-02-01,HCB004,11,10.282,3
2018-02-02,OAK004,12,1.437,3
...,...,...,...,...
2019-11-20,WPA004,11,10.080,3
2019-11-21,OAK004,4,10.135,3
2019-12-05,HCB004,11,10.602,3
2019-12-09,CMB003,12,1.309,3


In [53]:
bot.loc[(bot.Date==dt.datetime(2018,1,12))&(bot.Station=='OAK004')]

Unnamed: 0,Date,Station,Niskin,Nomdepth,Sampling Depth,CTD Cast Rep,NH4_Lab,NH4_QC,NH4_QF,NH4_QA,...,PO4_QA,PO4_Comment,PO4_SampleFieldReplicateNumber,SiOH4_Lab,SiOH4_QC,SiOH4_QF,SiOH4_QA,SiOH4_Comment,SiOH4_SampleFieldReplicateNumber,Unnamed: 36
1328,2018-01-12,OAK004,12,0,0.936,1,2.0465,2,0,3,...,3,FRP1,1,106.1614,2,0,3,FRP1,1,
1329,2018-01-12,OAK004,12,0,0.936,1,1.9974,2,0,3,...,3,FRP2,2,106.5756,2,0,3,FRP2,2,
1330,2018-01-12,OAK004,12,0,0.936,1,2.0239,2,0,3,...,3,FRP3,3,105.8203,2,0,3,FRP3,3,
1331,2018-01-12,OAK004,11,10,8.823,1,1.906,2,0,3,...,3,,1,99.4781,2,0,3,,1,


In [54]:
chl.loc[(chl.Date==dt.datetime(2018,1,12))&(chl.Station=='OAK004')]

Unnamed: 0,Date,Station,Niskin,Nomdepth,Sampling Depth,CTD Cast Rep,Chla_Lab,Chla_QC,Chla_QF,Chla_QA,Chla_Comment,Chla_SampleFieldReplicateNumber
1070,2018-01-12,OAK004,12.0,0,0.936,1.0,0.519,2.0,0,3.0,FRP1,1.0
1071,2018-01-12,OAK004,12.0,0,0.936,1.0,0.3707,2.0,0,3.0,FRP2,2.0
1072,2018-01-12,OAK004,12.0,0,0.936,1.0,0.4449,2.0,0,3.0,FRP3,3.0
1073,2018-01-12,OAK004,11.0,10,8.823,1.0,0.4343,2.0,0,3.0,,1.0


In [55]:
temp.loc[temp.icount==2]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,icount
Date,Station,Niskin,Sampling Depth,Unnamed: 4_level_1
2018-02-15,EAP001,9,197.968,2
2018-02-22,WPA003,11,3.891,2
2018-03-07,SAR003,9,143.572,2
2018-03-08,HCB003,9,142.451,2
2018-05-23,PSS019,9,89.882,2
2018-06-07,HCB003,9,153.77,2
2018-08-07,SAR003,9,138.64,2
2019-02-26,DNA001,12,1.136,2


In [56]:
bot.loc[(bot.Date==dt.datetime(2019,2,26))&(bot.Station=='DNA001'),['Date','Station','Niskin','Nomdepth','Sampling Depth','NH4_Lab','NO3_Lab']]

Unnamed: 0,Date,Station,Niskin,Nomdepth,Sampling Depth,NH4_Lab,NO3_Lab
163,2019-02-26,DNA001,12,0,1.136,0.3302,27.5842
164,2019-02-26,DNA001,10,10,9.702,0.3106,27.6943
165,2019-02-26,DNA001,4,30,29.815,0.2201,27.89


In [57]:
chl.loc[(chl.Date==dt.datetime(2019,2,26))&(chl.Station=='DNA001')]

Unnamed: 0,Date,Station,Niskin,Nomdepth,Sampling Depth,CTD Cast Rep,Chla_Lab,Chla_QC,Chla_QF,Chla_QA,Chla_Comment,Chla_SampleFieldReplicateNumber
119,2019-02-26,DNA001,12.0,0,1.136,1.0,2.2268,2.0,0,2.0,,1.0
120,2019-02-26,DNA001,10.0,10,9.702,1.0,1.9555,2.0,0,2.0,,1.0
121,2019-02-26,DNA001,12.0,30,1.136,1.0,1.9216,2.0,0,2.0,,1.0


In [58]:
allbot.loc[(allbot.Date==dt.datetime(2019,2,26))&(allbot.Station=='DNA001'),['Date','Station','Niskin','Sampling Depth','Chla_Lab']]

Unnamed: 0,Date,Station,Niskin,Sampling Depth,Chla_Lab
158,2019-02-26,DNA001,12,1.136,2.2268
159,2019-02-26,DNA001,12,1.136,1.9216
160,2019-02-26,DNA001,10,9.702,1.9555
161,2019-02-26,DNA001,4,29.815,


In [59]:
#remove this entry in chl:
    #Date	Station	Niskin	Nomdepth	Sampling Depth	CTD Cast Rep	Chla_Lab	Chla_QC	
    #2019-02-26	DNA001	12.0	30	1.136	1.0	1.9216	2.0	

In [60]:
allbot.loc[(allbot.Nomdepth_x!=allbot.Nomdepth_y)]

Unnamed: 0,Date,Station,Niskin,Nomdepth_x,Sampling Depth,CTD Cast Rep,NH4_Lab,NH4_QC,NH4_QF,NH4_QA,...,SiOH4_SampleFieldReplicateNumber,Unnamed: 36,Nomdepth_y,Chla_Lab,Chla_QC,Chla_QF,Chla_QA,Chla_Comment,Chla_SampleFieldReplicateNumber,ones
2,2019-01-07,BUD005,2,NB,18.832,1,0.8230,2.0,0,2.0,...,1.0,,,,,,,,,1
6,2019-01-07,CRR001,2,NB,101.239,1,0.1599,2.0,0,2.0,...,1.0,,,,,,,,,1
10,2019-01-07,CSE001,2,NB,50.123,1,1.0020,2.0,0,2.0,...,1.0,,,,,,,,,1
17,2019-01-07,GOR001,2,NB,165.513,1,0.3369,2.0,0,2.0,...,1.0,,,,,,,,,1
21,2019-01-07,NSQ002,2,NB,91.115,1,1.9562,2.0,0,2.0,...,1.0,,,,,,,,,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2644,2018-11-07,SJF001,10,,0.781,1,,,,,...,,,0,0.3654,2.0,0,3.0,FRP2,2.0,1
2645,2018-11-07,SJF001,10,,0.781,1,,,,,...,,,0,0.3728,2.0,0,3.0,FRP3,3.0,1
2646,2018-12-10,OAK004,10,,0.880,1,,,,,...,,,0,1.1228,2.0,0,3.0,FRP1,1.0,1
2647,2018-12-10,OAK004,10,,0.880,1,,,,,...,,,0,1.0168,2.0,0,3.0,FRP2,2.0,1
