# Profile processing Step 1: Pre-processing CTD, identification of data groups, and image tags

works with the raw CTD time series, Castaway processed CTD profiles, and all of the images in the folder to sync the time and create files that will be used in the next step to associate a time, depth, salinity, and temperature with every particle identified by the image processing output. It is in this notebook that you will also select specific intervals within the cast to process (if desired) and identify the total depth (which is needed later on in processing the profile data). Three files are required to complete this step of the processing. They are:

1) raw CTD time series
2) Castaway processed CTD profiles
3) The images to process or the images that were processed 

This notebook can be run before or after the image processing. 

The user input needed in the notebook includes:
- specifying the cast to process through the file ```0_CastPath.csv``` (this file needs to be in the same directory as the step 1 notebook)
- any time corrections to match the images (computer clock) and CTD (CTD clock)
- the image file type (e.g., jpg, bmp, etc.) and the file type of the CTD data (should be .csv)

During the execution of the notebook, you will need to identify whether or not there are breakpoints you want to associate with the cast to group the data and how you want to define the depth. Completing these should be self-explanatory as you work through the file.

Files generated through the execution of this notebook include:

- CTD-profile.csv (a renamed copy for easy read in later on)
- CTD-timeseries.csv (a renamed copy for easy read in later on)
- Data-Breakpionts.csv (a list unique set of data groupings for the cast)
- Depth.csv (depth to be used in later profiling)
- ImageTime.csv (list of image day/times)

## Python loads and functions

In [None]:
%config InlineBackend.figure_format='retina' # hig-res plots for a Retina display 
import numpy as np
import pandas as pd
import os
import glob
from datetime import datetime, timedelta
import shutil

# %matplotlib inline
import matplotlib.pyplot as plt
%matplotlib widget

In [None]:
###define equation for converting conductivity to specific conductance
#C - conductivity in microsiemens/cm
#T - temperature in celcius

def SpC(C,T):
    return C/(1+0.02*(T-25))
    
    
#returns the clear-water density in kg/m^3 as a function of temperature [in deg C] and salinity [ppt]
def rho_cw(T,S): 
    rho_fresh=1000*(1-(T +288.9414)/(508929.2*(T+68.12963))*(T-3.9863)**2)
    Acoef = 0.824493 - 0.0040899*T + 0.000076438*T**2 -0.00000082467*T**3 + 0.0000000053675*T**4
    Bcoef = -0.005724 + 0.00010227*T - 0.0000016546*T**2
    return rho_fresh + Acoef*S + Bcoef*S**(3/2) + 0.00048314*S**2 

## Find files and enter time corrections

In [None]:
""" --- User input ----------------------------- """
# file types and paths
filetype = '*.csv'
imagetype = '*jpg'

# time corrections to be added onto CTD to match Camera stamp (use negative number to substract)
hrC = 0  # hour correction
minC = 0 # min correction 
scC = 4   # second correction (Barateria Bay correction)
# scC = 0   # second correction (Siene correction)
"""  ------------------------------------------- """

# castpath = '/Users/strom-adm/Documents/Floc-Processing/Code/1_Profile_Processing/0_CastPath.csv'
castpath = '0_CastPath.csv'
Path = pd.read_csv(castpath).profile_path[0]+'/'

CodePath = os.getcwd()
os.chdir(Path)

# find the data files  

files = sorted(glob.glob(filetype))

for i in range(0,len(files)):
    print('file: ',files[i],'(index: ',i,')')

## Pick files for CTD processing

In [None]:
""" --- User input ----------------------------- """
m = 0 # pick the processed profile data by choosing the file index
n = 1 # pick the raw CTD time series by choosing the file index

## Process CTD

In [None]:
# make a copy of the profile csv so that it is easily read in when creating the super data

if pd.read_csv(files[m],  nrows=1).shape[1] == 2:
    ctd_profile = pd.read_csv(files[m], skiprows = 28)
    ctd_profile.to_csv('CTD-profile.csv',index = False)
else:
    shutil.copyfile(files[m], 'CTD-profile.csv')

# get the ctd file, rearrange it and make it into a dataframe 

ctd_data = pd.read_csv(files[n], skiprows = 28)
ctd_header = pd.read_csv(files[n], nrows = 27)
ctd = pd.concat([ctd_data, ctd_header], axis=1)

# associated a full corrected day and time stamp with each time series measurement

cor_time = ctd.iloc[2,5]
cor_time = datetime.strptime(cor_time,'%Y-%m-%d %H:%M:%S')
cor_time = cor_time + timedelta(hours=hrC,minutes=minC,seconds=scC)    ##Change time to CST

N = len(ctd.iloc[:,0])
cor_timess = np.empty(N, dtype=object)
for i in np.arange(0,N):
    cor_times = cor_time + timedelta(seconds = ctd.iloc[i,0])
    cor_timess[i] = cor_times.strftime("%m%d%Y%H%M%S") 
    
# average P,T,C from CTD data, calculate specific conductance from C and convert P to depth

avg_ctd_P = np.zeros(N)
avg_ctd_T = np.zeros(N)
avg_ctd_C = np.zeros(N)
for i in np.arange(2,N-2):
    avg_ctd_P[i] = np.average([ctd.iloc[i-2,1],ctd.iloc[i-1,1],ctd.iloc[i,1],ctd.iloc[i+1,1],ctd.iloc[i+2,1]])
    avg_ctd_T[i] = np.average([ctd.iloc[i-2,2],ctd.iloc[i-1,2],ctd.iloc[i,2],ctd.iloc[i+1,2],ctd.iloc[i+2,2]])
    avg_ctd_C[i] = np.average([ctd.iloc[i-2,3],ctd.iloc[i-1,3],ctd.iloc[i,3],ctd.iloc[i+1,3],ctd.iloc[i+2,3]])
    
avg_SpC = SpC(avg_ctd_C,avg_ctd_T)      #microsiemens/cm
depth = avg_ctd_P*10/9.81               #meters 

# insert data into ctd dataframe and save as a csv file

ctd.insert(0,'Time',cor_timess)
ctd.insert(1,'Depth [m]',depth)
ctd.insert(2,'P [Decibars]',avg_ctd_P)
ctd.insert(3,'T [Celsius]',avg_ctd_T)
ctd.insert(4,'SpC [MicroSiemens/cm]',avg_ctd_C)

ctd.to_csv('CTD-timeseries.csv',index = False)

display(ctd)

# auto set the max detph
maxdepth = max(ctd['Depth [m]'])

# plot the data
fig, ax1 = plt.subplots()
ax1.plot(ctd['Time (Seconds)'], ctd['SpC [MicroSiemens/cm]'], color='C03',alpha=1,label='SpC')
ax1.set_ylabel('SpC [MicroSiemens/cm]')
# ax1.set_ylim(33000,37000)
ax2 = ax1.twinx()
ax2.plot(ctd['Time (Seconds)'], ctd['Depth [m]'], alpha=1,label='depth')
ax2.set_xlabel('Time [s]')
ax2.set_ylabel('Depth [m]')
ax2.axhline(y = maxdepth, color = 'k', linestyle = '--', label='max depth')
h1, l1 = ax1.get_legend_handles_labels()
h2, l2 = ax2.get_legend_handles_labels()
ax2.legend(h1+h2, l1+l2, loc=2);

## Identify the break points for specific subsets of the data and the max depth

In [None]:
""" --- User input ----------------------------- """
subsets = 1 # enter "1" to have subsets, zero for none
automax = 1 # use "1" to just take the max of the measurement. Set automax = 0 to use a user-specified value (then must set manual max below)
# manualmax = 12.8 # Set max depth manually

# Time breakpoints... enter the start and end of each period or subset for which you want the data grouped (use nearest second)

# Seine - 4/17/2022 --------------------------------

names = np.array(['profile','surface','mid-depth','bottom'])

startS = np.array([62,235,358,458]) # profile 4
endS = np.array([234,322,415,528]) # profile 4

# # Seine - 6/16/2022 --------------------------------

# names = np.array(['profile','surface','mid-depth','bottom'])

# startS = np.array([180,338,553,714]) # profile 4
# endS = np.array([338,518,679,832]) # profile 4

"""  ------------------------------------------- """



if automax == 0:
    maxdepth = manualmax
else:
    waterdepth = pd.DataFrame({'Depth [m]':np.array([maxdepth])})
    waterdepth.to_csv('Depth.csv',index=False)
    
if subsets == 1:
    
    deltaT = endS - startS
    start_time = np.zeros(len(startS))
    end_time = np.zeros(len(endS))

    for i in range(0,len(startS)):
        temp = ctd[(ctd['Time (Seconds)'] == startS[i])].copy()
        start_time[i] = temp.Time.iloc[0]

    for i in range(0,len(endS)):
        temp = ctd[(ctd['Time (Seconds)'] == endS[i])].copy()
        end_time[i] = temp.Time.iloc[0]

    breaktimes = pd.DataFrame({'Type/Location':names,'Start [DateTime]':start_time, 'End [DateTime]':end_time, 'Durration [sec]':deltaT})
    breaktimes.to_csv('Data-Breakpionts.csv',index = False)
    display(breaktimes)



## Save the depth to be used later on

In [None]:
pd.DataFrame({'Depth [m]':np.array([maxdepth])})

## Extract time stamps from images and save as a csv file

In [None]:
# find the list of images, build a dataframe, and then extract the date time

# uses the filemod_list.csv file ---------------

images = pd.read_csv('filemod_list.csv', usecols = ['file_Name'])

N = len(images)
datetime_raw = np.empty(N, dtype=object)
for i in np.arange(0,N):
    datetime_raw[i] = str(images.iloc[i,0].split('-')[1]) #adjust the number in brackets until the datetime_raw is the timestamp. This will depend on how you named the images in the field. 
images['Image Time']=datetime_raw
images.to_csv('ImageTime.csv', index=False)
display(images)

# uses the actual image files -----------------
# Ifiles = sorted(glob.glob(imagetype))
# images=pd.DataFrame(Ifiles)

# N = len(images)
# datetime_raw = np.empty(N, dtype=object)
# for i in np.arange(0,N):
#     datetime_raw[i] = str(images.iloc[i,0].split('-')[1]) #adjust the number in brackets until the datetime_raw is the timestamp. This will depend on how you named the images in the field. 
# images['Image Time']=datetime_raw
# images.to_csv('ImageTime.csv', index=False)
# display(images)

# imagehold = images[(images['Image Time'] >= breaktime)].copy()
# imagehold.to_csv('ImageTime-hold.csv',index = False)
# print("Only during the hold ------------")
# display(imagehold)

In [None]:
# change the working directory back to the original code path
os.chdir(CodePath)