The binning method developed in LabBook-v2 doesn't work for some of the Grampus missions. I'm using this notebook to debug what's going on there.

# The binning script: `binMatFile.py`

Below is a copy of the binning script which is failing on Grampus (and came out of LabBook-v2)

In [5]:
import numpy as np
import pandas as pd
import xarray as xr
import re
import datetime as dt
import scipy.io
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
def binMatFile(mat,glider):
    # finding the variables as named by Bruce
    variablefile = 'V:/Catherine/Glider/DataFiles/Glider Yo Columns.xlsx'
    variables = pd.read_excel(variablefile,sheet_name='new',header=None,usecols=[4],squeeze=True)
    variables = list(variables.drop(variables.index[0]))

    #finding the required variables based on their default names
    if re.search('h',glider): 
        searchKeys = ['sci_ocr*','sci_bbfl2s*','m_present_time','x_corrected_lat','x_corrected_lon','x_measured_depth',
                  'x_sci_sigmat','m_pitch','m_roll','sci_water_temp','x_sci_salinity']
    else:
        if 'sci_suna_nitrate_mg' in mat.keys():
            searchKeys = ['sci_ocr*','sci_bbfl2s*','m_present_time','x_corrected_lat','x_corrected_lon','x_measured_depth',
                  'x_sci_sigmat','m_pitch','m_roll','sci_water_temp','x_sci_salinity','sci_oxy4*',
                          'sci_suna_nitrate_mg']
        else:
            searchKeys = ['sci_ocr*','sci_bbfl2s*','m_present_time','x_corrected_lat','x_corrected_lon','x_measured_depth',
                  'x_sci_sigmat','m_pitch','m_roll','sci_water_temp','x_sci_salinity','sci_oxy4*']

    matchedVariables = []
    for key in searchKeys:
        for matkey in mat.keys():
            M = re.match(key,matkey)
            if M:
                matchedVariables += [matkey]

    #dealing with east and west bound data separately
    missionList = []
    directions = ['east','west']
    for direction in directions:

        dirBound = direction + 'bound'

        #selecting required variables from the <direction>bound matrices
        Data = {}
        for vv in matchedVariables:
            ix = mat[vv][0][0]
            Data[vv] = mat[dirBound][:,ix-1]

        Data = pd.DataFrame(Data)

        #doing some QC on the data
        subData = Data.loc[np.isfinite(Data['x_corrected_lat'])].copy()
        idx = ~((subData['sci_water_temp'] == 0) & (subData['x_sci_salinity'] < 0) & (subData['x_sci_salinity'] > -1))
        subData = subData.loc[idx].copy()
        subData.dropna(thresh=19,inplace = True)

        #reformatting the datetime string from unix time to year and decimal day of year
        year = []
        jdays = []
        for unixTime in subData['m_present_time']:
            pythonDT = dt.datetime.utcfromtimestamp(unixTime)
            year += [pythonDT.year]
            jday = pythonDT.timetuple().tm_yday
            fractionalDay = (pythonDT.hour/24.) + (pythonDT.minute/60/24) + (pythonDT.second/60/60/24)
            jdays += [jday+fractionalDay]
        subData['year_'+direction] = year
        subData['day_'+direction] = jdays
        subData.drop(['m_present_time'],axis = 1, inplace = True)

        #specifying the depth and longitude bins
        zbins = np.linspace(0,200,num=201)
        lonbins = np.linspace(-69.78,-66.8,num=int((-66.8+69.78)/0.01))

        zlabels = [str(z) for z in range(len(zbins)-1)]
        lonlabels = [str(i) for i in range(len(lonbins)-1)]

        #doing the binning
        subData['zbin'] = pd.cut(subData['x_measured_depth'].values,zbins,labels = zbins[1:])
        subData['lonbin'] = pd.cut(subData['x_corrected_lon'].values,lonbins,labels = lonbins[1:])
        binned = subData.groupby(['zbin','lonbin']).mean()

        #reformatting into xarray
        binned.drop(['x_measured_depth','x_corrected_lon'],axis = 1, inplace=True)
        if 'sci_oxy4_temp' in mat.keys():
            binned.drop(['sci_oxy4_temp'],axis=1,inplace=True)
            
        data = xr.Dataset.from_dataframe(binned)

        #renaming the variables
        if re.search('h',glider):
            #print('henry')
            if 'sci_ocr507I_irrad1' in mat.keys():
                varnames = {'sci_ocr507R_rad1' : 'Lu412_'+direction,'sci_ocr507R_rad2' : 'Lu443_'+direction,
                        'sci_ocr507R_rad3' : 'Lu490_'+direction,'sci_ocr507R_rad4' : 'Lu510_'+direction,
                        'sci_ocr507R_rad5' : 'Lu532_'+direction,'sci_ocr507R_rad6' : 'Lu555_'+direction,
                        'sci_ocr507R_rad7' : 'Lu670_'+direction,
                        'sci_ocr507I_irrad1' : 'Ed412_'+direction,'sci_ocr507I_irrad2' : 'Ed443_'+direction,
                        'sci_ocr507I_irrad3' : 'Ed490_'+direction,'sci_ocr507I_irrad4' : 'Ed510_'+direction,
                        'sci_ocr507I_irrad5' : 'Ed532_'+direction,'sci_ocr507I_irrad6' : 'Ed555_'+direction,
                        'sci_ocr507I_irrad7' : 'Ed670_'+direction,'sci_bbfl2s_bb_scaled' : 'bb532_'+direction,
                        'sci_bbfl2s_chlor_scaled' : 'chlfl_'+direction,'sci_bbfl2s_cdom_scaled' : 'cdomfl_'+direction,
                        'x_corrected_lat' : 'lat_'+direction,
                        'sci_water_temp' : 'temp_'+direction, 'x_sci_salinity' : 'sal_'+direction,
                        'x_sci_sigmat' : 'sigmat_'+direction,
                        'm_pitch' : 'pitch_'+direction, 'm_roll' : 'roll_'+direction}
            else:
                varnames = {'sci_ocr507r_rad1' : 'Lu412_'+direction,'sci_ocr507r_rad2' : 'Lu443_'+direction,
                        'sci_ocr507r_rad3' : 'Lu490_'+direction,'sci_ocr507r_rad4' : 'Lu510_'+direction,
                        'sci_ocr507r_rad5' : 'Lu532_'+direction,'sci_ocr507r_rad6' : 'Lu555_'+direction,
                        'sci_ocr507r_rad7' : 'Lu670_'+direction,
                        'sci_ocr507i_irrad1' : 'Ed412_'+direction,'sci_ocr507i_irrad2' : 'Ed443_'+direction,
                        'sci_ocr507i_irrad3' : 'Ed490_'+direction,'sci_ocr507i_irrad4' : 'Ed510_'+direction,
                        'sci_ocr507i_irrad5' : 'Ed532_'+direction,'sci_ocr507i_irrad6' : 'Ed555_'+direction,
                        'sci_ocr507i_irrad7' : 'Ed670_'+direction,'sci_bbfl2s_bb_scaled' : 'bb532_'+direction,
                        'sci_bbfl2s_chlor_scaled' : 'chlfl_'+direction,'sci_bbfl2s_cdom_scaled' : 'cdomfl_'+direction,
                        'x_corrected_lat' : 'lat_'+direction,
                        'sci_water_temp' : 'temp_'+direction, 'x_sci_salinity' : 'sal_'+direction,
                        'x_sci_sigmat' : 'sigmat_'+direction,
                        'm_pitch' : 'pitch_'+direction, 'm_roll' : 'roll_'+direction}
        else:
            #print('grampus')
            if 'sci_suna_nitrate_mg' in mat.keys():
                varnames = {'sci_ocr504r_rad1' : 'L380_'+direction,'sci_ocr504r_rad2' : 'Lu443_'+direction,
                    'sci_ocr504r_rad3' : 'Lu490_'+direction,'sci_ocr504r_rad4' : 'Lu532_'+direction,
                    'sci_ocr504i_irrad1' : 'Ed380_'+direction,'sci_ocr504i_irrad2' : 'Ed443_'+direction,
                    'sci_ocr504i_irrad3' : 'Ed490_'+direction,'sci_ocr504i_irrad4' : 'Ed532_'+direction,
                    'sci_bbfl2s_bb_scaled' : 'bb532_'+direction,
                    'sci_bbfl2s_chlor_scaled' : 'chlfl_'+direction,'sci_bbfl2s_cdom_scaled' : 'cdomfl_'+direction,
                    'x_corrected_lat' : 'lat_'+direction,
                    'sci_water_temp' : 'temp_'+direction, 'x_sci_salinity' : 'sal_'+direction,
                    'x_sci_sigmat' : 'sigmat_'+direction,
                    'm_pitch' : 'pitch_'+direction, 'm_roll' : 'roll_'+direction,
                    'sci_suna_nitrate_mg' : 'nitrate_'+direction,
                    'sci_oxy4_oxygen' : 'o2 conc_'+direction, 'sci_oxy4_saturation' : 'o2 saturation_'+direction}
            else:
                varnames = {'sci_ocr504r_rad1' : 'L380_'+direction,'sci_ocr504r_rad2' : 'Lu443_'+direction,
                    'sci_ocr504r_rad3' : 'Lu490_'+direction,'sci_ocr504r_rad4' : 'Lu532_'+direction,
                    'sci_ocr504i_irrad1' : 'Ed380_'+direction,'sci_ocr504i_irrad2' : 'Ed443_'+direction,
                    'sci_ocr504i_irrad3' : 'Ed490_'+direction,'sci_ocr504i_irrad4' : 'Ed532_'+direction,
                    'sci_bbfl2s_bb_scaled' : 'bb532_'+direction,
                    'sci_bbfl2s_chlor_scaled' : 'chlfl_'+direction,'sci_bbfl2s_cdom_scaled' : 'cdomfl_'+direction,
                    'x_corrected_lat' : 'lat_'+direction,
                    'sci_water_temp' : 'temp_'+direction, 'x_sci_salinity' : 'sal_'+direction,
                    'x_sci_sigmat' : 'sigmat_'+direction,
                    'm_pitch' : 'pitch_'+direction, 'm_roll' : 'roll_'+direction,
                    'sci_oxy4_oxygen' : 'o2 conc_'+direction, 'sci_oxy4_saturation' : 'o2 saturation_'+direction}
        

        data = data.rename(varnames)

        missionList += [data]

    fullMission = xr.merge(missionList)

    return fullMission

# Attempt on Grampus-mission3

In [33]:
missionDir = 'V:/glider/grampus/mission3/matlabdata/mission3.mat'

mat = scipy.io.loadmat(missionDir)

In [8]:
binnedData = binMatFile(mat,'grampus')

TypeError: can't convert complex to int

# Debugging the binning script

In [9]:
glider = 'grampus'

## variable names:

In [10]:
# finding the variables as named by Bruce
variablefile = 'V:/Catherine/Glider/DataFiles/Glider Yo Columns.xlsx'
variables = pd.read_excel(variablefile,sheet_name='new',header=None,usecols=[4],squeeze=True)
variables = list(variables.drop(variables.index[0]))

#finding the required variables based on their default names
if re.search('h',glider): 
    searchKeys = ['sci_ocr*','sci_bbfl2s*','m_present_time','x_corrected_lat','x_corrected_lon','x_measured_depth',
              'x_sci_sigmat','m_pitch','m_roll','sci_water_temp','x_sci_salinity']
else:
    if 'sci_suna_nitrate_mg' in mat.keys():
        searchKeys = ['sci_ocr*','sci_bbfl2s*','m_present_time','x_corrected_lat','x_corrected_lon','x_measured_depth',
              'x_sci_sigmat','m_pitch','m_roll','sci_water_temp','x_sci_salinity','sci_oxy4*',
                      'sci_suna_nitrate_mg']
    else:
        searchKeys = ['sci_ocr*','sci_bbfl2s*','m_present_time','x_corrected_lat','x_corrected_lon','x_measured_depth',
              'x_sci_sigmat','m_pitch','m_roll','sci_water_temp','x_sci_salinity','sci_oxy4*']

matchedVariables = []
for key in searchKeys:
    for matkey in mat.keys():
        M = re.match(key,matkey)
        if M:
            matchedVariables += [matkey]

## eastbound direction only

### selecting variables and doing QC

In [11]:
direction = 'east'
dirBound = direction + 'bound'

#selecting required variables from the <direction>bound matrices
Data = {}
for vv in matchedVariables:
    ix = mat[vv][0][0]
    Data[vv] = mat[dirBound][:,ix-1]

Data = pd.DataFrame(Data)

#doing some QC on the data
subData = Data.loc[np.isfinite(Data['x_corrected_lat'])].copy()
idx = ~((subData['sci_water_temp'] == 0) & (subData['x_sci_salinity'] < 0) & (subData['x_sci_salinity'] > -1))
subData = subData.loc[idx].copy()
subData.dropna(thresh=19,inplace = True)

### Date time formatting

In [12]:
#reformatting the datetime string from unix time to year and decimal day of year
year = []
jdays = []
for unixTime in subData['m_present_time']:
    pythonDT = dt.datetime.utcfromtimestamp(unixTime)
    year += [pythonDT.year]
    jday = pythonDT.timetuple().tm_yday
    fractionalDay = (pythonDT.hour/24.) + (pythonDT.minute/60/24) + (pythonDT.second/60/60/24)
    jdays += [jday+fractionalDay]
subData['year_'+direction] = year
subData['day_'+direction] = jdays
subData.drop(['m_present_time'],axis = 1, inplace = True)

TypeError: can't convert complex to int

In [13]:
unixTime

(1470679144.4628+0j)

In [14]:
subData['m_present_time']

174       1.470679e+09+0.000000e+00j
250       1.470679e+09+0.000000e+00j
347       1.470679e+09+0.000000e+00j
513       1.470679e+09+0.000000e+00j
546       1.470679e+09+0.000000e+00j
                     ...            
497936    1.471264e+09+0.000000e+00j
497983    1.471264e+09+0.000000e+00j
498080    1.471264e+09+0.000000e+00j
498115    1.471264e+09+0.000000e+00j
498203    1.471264e+09+0.000000e+00j
Name: m_present_time, Length: 49227, dtype: complex128

In [15]:
subData

Unnamed: 0,sci_ocr504i_irrad1,sci_ocr504i_irrad2,sci_ocr504i_irrad3,sci_ocr504i_irrad4,sci_ocr504r_rad1,sci_ocr504r_rad2,sci_ocr504r_rad3,sci_ocr504r_rad4,sci_bbfl2s_cdom_scaled,sci_bbfl2s_bb_scaled,...,x_corrected_lon,x_measured_depth,x_sci_sigmat,m_pitch,m_roll,sci_water_temp,x_sci_salinity,sci_oxy4_oxygen,sci_oxy4_saturation,sci_oxy4_temp
174,-0.002788+0.000000j,0.008786+0.000000j,0.013164+0.000000j,0.007997+0.000000j,-1.628750e-05+0.000000e+00j,0.000243+0.000000j,0.000122+0.000000j,-0.000012+0.000000j,2.896000+0.000000j,0.023893+0.000000j,...,-69.788649+0.000000j,93.853750+0.000000j,25.809682+0.000000j,,,8.012000+0.000000j,32.619737+0.000000j,209.029000+0.000000j,71.079000+0.000000j,8.122000+0.000000j
250,-0.000993+0.000000j,0.008160+0.000000j,0.012241+0.000000j,0.007780+0.000000j,-7.765880e-05+0.000000e+00j,0.000252+0.000000j,0.000132+0.000000j,-0.000048+0.000000j,2.896000+0.000000j,0.021975+0.000000j,...,-69.788537+0.000000j,86.284833+0.000000j,25.728122+0.000000j,,,8.178500+0.000000j,32.588033+0.000000j,208.232000+0.000000j,70.847000+0.000000j,8.147000+0.000000j
347,-0.000847+0.000000j,0.008160+0.000000j,0.014086+0.000000j,0.009498+0.000000j,5.565760e-06+0.000000e+00j,0.000249+0.000000j,0.000153+0.000000j,-0.000004+0.000000j,2.715000+0.000000j,0.014994+0.000000j,...,-69.788406+0.000000j,77.065960+0.000000j,25.564097+0.000000j,,,8.708000+0.000000j,32.532124+0.000000j,209.258000+0.000000j,71.911000+0.000000j,8.591000+0.000000j
513,0.000463+0.000000j,0.006356+0.000000j,0.020260+0.000000j,0.027848+0.000000j,-8.676430e-05+0.000000e+00j,0.000251+0.000000j,0.000209+0.000000j,0.000143+0.000000j,2.986500+0.000000j,0.012094+0.000000j,...,-69.788187+0.000000j,61.652767+0.000000j,25.376577+0.000000j,,,9.256500+0.000000j,32.486071+0.000000j,213.630000+0.000000j,74.397000+0.000000j,9.188000+0.000000j
546,0.001543+0.000000j,0.007497+0.000000j,0.026057+0.000000j,0.037014+0.000000j,-1.118840e-05+0.000000e+00j,0.000237+0.000000j,0.000227+0.000000j,0.000220+0.000000j,2.805500+0.000000j,0.011111+0.000000j,...,-69.788150+0.000000j,58.823767+0.000000j,25.350741+0.000000j,,,9.312800+0.000000j,32.480102+0.000000j,214.421000+0.000000j,74.780000+0.000000j,9.253000+0.000000j
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
497936,-0.000483+0.000000j,0.092745+0.000000j,0.435680+0.000000j,0.460223+0.000000j,-4.814550e-06+0.000000e+00j,0.000854+0.000000j,0.004998+0.000000j,0.004616+0.000000j,2.443500+0.000000j,0.012340+0.000000j,...,-68.833702+0.000000j,26.518667+0.000000j,25.248969+0.000000j,,,10.452100+0.000000j,32.767264+0.000000j,212.552000+0.000000j,76.089000+0.000000j,10.446000+0.000000j
497983,0.004856+0.000000j,0.227922+0.000000j,0.836626+0.000000j,0.893068+0.000000j,-6.260050e-07+0.000000e+00j,0.001792+0.000000j,0.009607+0.000000j,0.009110+0.000000j,2.715000+0.000000j,0.013864+0.000000j,...,-68.833663+0.000000j,22.069080+0.000000j,25.182519+0.000000j,,,10.496500+0.000000j,32.717630+0.000000j,212.923000+0.000000j,76.283000+0.000000j,10.484000+0.000000j
498080,0.093064+0.000000j,1.154840+0.000000j,2.697860+0.000000j,2.980060+0.000000j,2.124430e-04+0.000000e+00j,0.007604+0.000000j,0.028244+0.000000j,0.027289+0.000000j,2.534000+0.000000j,0.009095+0.000000j,...,-68.833617+0.000000j,13.766160+0.000000j,25.072827+0.000000j,,,10.864900+0.000000j,32.704312+0.000000j,216.214000+0.000000j,77.871000+0.000000j,10.727000+0.000000j
498115,0.258876+0.000000j,2.068750+0.000000j,4.205600+0.000000j,4.751460+0.000000j,5.941470e-04+0.000000e+00j,0.011681+0.000000j,0.038191+0.000000j,0.037595+0.000000j,2.262500+0.000000j,0.006833+0.000000j,...,-68.833603+0.000000j,10.882000+0.000000j,24.940453+0.000000j,,,11.410500+0.000000j,32.674605+0.000000j,219.730000+0.000000j,79.600000+0.000000j,10.999000+0.000000j


OK so all the subdata are complex... what about the raw data from the mat file:

In [16]:
Data

Unnamed: 0,sci_ocr504i_irrad1,sci_ocr504i_irrad2,sci_ocr504i_irrad3,sci_ocr504i_irrad4,sci_ocr504r_rad1,sci_ocr504r_rad2,sci_ocr504r_rad3,sci_ocr504r_rad4,sci_bbfl2s_cdom_scaled,sci_bbfl2s_bb_scaled,...,x_corrected_lon,x_measured_depth,x_sci_sigmat,m_pitch,m_roll,sci_water_temp,x_sci_salinity,sci_oxy4_oxygen,sci_oxy4_saturation,sci_oxy4_temp
0,0.000000+0.000000j,0.000000+0.000000j,0.000000+0.000000j,0.000000+0.000000j,0.000000+0.000000j,0.000000+0.000000j,0.000000+0.000000j,0.000000+0.000000j,0.000000+0.000000j,0.000000+0.000000j,...,-69.788236+0.000000j,0.010368+0.000000j,,,,0.000000+0.000000j,-0.001908+0.000000j,261.050000+0.000000j,108.417000+0.000000j,17.757000+0.000000j
1,,,,,,,,,,,...,-69.789248+0.000000j,0.020737+0.000000j,,-0.155334+0.000000j,-0.162316+0.000000j,,,,,
2,,,,,,,,,,,...,-69.789502+0.000000j,0.020737+0.000000j,,,,,,,,
3,,,,,,,,,,,...,-69.789517+0.000000j,0.020737+0.000000j,,,,,,,,
4,,,,,,,,,,,...,-69.789545+0.000000j,0.000000+0.000000j,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
498267,,,,,,,,,,,...,-68.833605+0.000000j,0.026650+0.000000j,,,,,,,,
498268,,,,,,,,,,,...,-68.833605+0.000000j,0.056261+0.000000j,,,,,,,,
498269,,,,,,,,,,,...,-68.833605+0.000000j,0.026650+0.000000j,,,,,,,,
498270,,,,,,,,,,,...,-68.833605+0.000000j,0.026650+0.000000j,,,,,,,,


Also complex.

Do any of the data actually have an imaginary part? (In the view above the all have `0j`)

In [18]:
imagPart = np.imag(Data)
np.any(imagPart)

False

No, so why are the data being read in as complex??

In [27]:
mat = scipy.io.loadmat(mfile,mat_dtype = True)
#selecting required variables from the <direction>bound matrices
Data = {}
for vv in matchedVariables:
    ix = int(mat[vv][0][0])  ## NOTE I HAD TO MAKE THIS AN INT
    Data[vv] = mat[dirBound][:,ix-1]

In [29]:
pd.DataFrame(Data)

Unnamed: 0,sci_ocr504i_irrad1,sci_ocr504i_irrad2,sci_ocr504i_irrad3,sci_ocr504i_irrad4,sci_ocr504r_rad1,sci_ocr504r_rad2,sci_ocr504r_rad3,sci_ocr504r_rad4,sci_bbfl2s_cdom_scaled,sci_bbfl2s_bb_scaled,...,x_corrected_lon,x_measured_depth,x_sci_sigmat,m_pitch,m_roll,sci_water_temp,x_sci_salinity,sci_oxy4_oxygen,sci_oxy4_saturation,sci_oxy4_temp
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,-69.788236,0.010368,,,,0.0,-0.001908,261.05,108.417,17.757
1,,,,,,,,,,,...,-69.789248,0.020737,,-0.155334,-0.162316,,,,,
2,,,,,,,,,,,...,-69.789502,0.020737,,,,,,,,
3,,,,,,,,,,,...,-69.789517,0.020737,,,,,,,,
4,,,,,,,,,,,...,-69.789545,0.000000,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
498267,,,,,,,,,,,...,-68.833605,0.026650,,,,,,,,
498268,,,,,,,,,,,...,-68.833605,0.056261,,,,,,,,
498269,,,,,,,,,,,...,-68.833605,0.026650,,,,,,,,
498270,,,,,,,,,,,...,-68.833605,0.026650,,,,,,,,


This now works! I just want to try these small changes on one of the files that did work to check the best way to integrate this change in the `binMatFile.py` script

# Test on old glider mission

In [34]:
missionDir = 'V:/glider/henry/mission4/matlabdata/mission4.mat'

mat = scipy.io.loadmat(missionDir,mat_dtype=True)

glider = 'henry'

# finding the variables as named by Bruce
variablefile = 'V:/Catherine/Glider/DataFiles/Glider Yo Columns.xlsx'
variables = pd.read_excel(variablefile,sheet_name='new',header=None,usecols=[4],squeeze=True)
variables = list(variables.drop(variables.index[0]))

#finding the required variables based on their default names
if re.search('h',glider): 
    searchKeys = ['sci_ocr*','sci_bbfl2s*','m_present_time','x_corrected_lat','x_corrected_lon','x_measured_depth',
              'x_sci_sigmat','m_pitch','m_roll','sci_water_temp','x_sci_salinity']
else:
    if 'sci_suna_nitrate_mg' in mat.keys():
        searchKeys = ['sci_ocr*','sci_bbfl2s*','m_present_time','x_corrected_lat','x_corrected_lon','x_measured_depth',
              'x_sci_sigmat','m_pitch','m_roll','sci_water_temp','x_sci_salinity','sci_oxy4*',
                      'sci_suna_nitrate_mg']
    else:
        searchKeys = ['sci_ocr*','sci_bbfl2s*','m_present_time','x_corrected_lat','x_corrected_lon','x_measured_depth',
              'x_sci_sigmat','m_pitch','m_roll','sci_water_temp','x_sci_salinity','sci_oxy4*']

matchedVariables = []
for key in searchKeys:
    for matkey in mat.keys():
        M = re.match(key,matkey)
        if M:
            matchedVariables += [matkey]
            
direction = 'east'
dirBound = direction + 'bound'

#selecting required variables from the <direction>bound matrices
Data = {}
for vv in matchedVariables:
    ix = int(mat[vv][0][0])
    Data[vv] = mat[dirBound][:,ix-1]

Data = pd.DataFrame(Data)

In [37]:
Data

Unnamed: 0,sci_ocr507R_rad1,sci_ocr507I_irrad1,sci_ocr507R_rad2,sci_ocr507R_rad3,sci_ocr507R_rad4,sci_ocr507R_rad5,sci_ocr507R_rad6,sci_ocr507R_rad7,sci_ocr507I_irrad2,sci_ocr507I_irrad3,...,sci_bbfl2s_cdom_scaled,m_present_time,x_corrected_lat,x_corrected_lon,x_measured_depth,x_sci_sigmat,m_pitch,m_roll,sci_water_temp,x_sci_salinity
0,,,,,,,,,,,...,,1.240851e+09,,,0.000000,,,,,
1,,,,,,,,,,,...,,1.240851e+09,,,,,,,,
2,,,,,,,,,,,...,,1.240851e+09,,,0.000000,,,,,
3,,,,,,,,,,,...,,1.240851e+09,,,,,,,,
4,,,,,,,,,,,...,,1.240851e+09,,,0.000000,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
200622,,,,,,,,,,,...,,1.241954e+09,43.590215,-66.760788,0.000000,,0.298579,-0.116444,,
200623,,,,,,,,,,,...,,1.241954e+09,43.590295,-66.760786,0.000000,,-0.246720,0.025579,,
200624,,,,,,,,,,,...,,1.241954e+09,43.590360,-66.760775,0.000000,,-0.377160,-0.040153,,
200625,,,,,,,,,,,...,,1.241954e+09,43.590443,-66.760764,0.377914,,-0.209353,0.064001,,


Looks good! I'm just going to add the following two changes into the `binMatFile.py` directly:
1. include the `mat_dtype=True` argument in `loadmat` function
2. make the index an int where the data is selected from the `<direction>bound` matrices

and run on the Grampus missions from mission3 - mission8