### User input

In [None]:
decimal = '.'                                                            # decimal of the input file
sep = ';'                                                                # separator of the input file

from config import *                                                     # Personal settings of local user to set input and output directories
input_dir = input_directory + 'results/df_pathcor_sw.csv'                # input directory
output = output_directory + 'results/'                                   # output directory

dateheadername = 'Timestamp'                                             # header of the date  
header = 0                                                               # header row number
dateparsingformat = '%Y-%m-%d %H:%M:%S'                                  # format of the date 
sample_name = 'sw'                                                       # name of the sample

nperiods = 10                                                            # number of dates to display in the absorbance spectra plot
fig_format = '.tiff'                                                     # format of the exported figure
dpi = 300                                                                # resolution of the exported figure

startwv = '700 nm'                                                       # starting wavelength for the baseline correction

dates_start = ['2018-11-13 04:32:00']                                    # starting dates of specific periods to correct for specific factors or/and different wavelength range; if the factor/wavelength range to use is not the same for the whole dataframe add ", 'date'"
dates_end = ['2018-12-04 09:12:00']                                      # ending dates of specific periods to correct for specific factors or/and different wavelength range; if the factor/wavelength range to use is not the same for the whole dataframe add ", 'date'"

startwvs = ['700 nm']                                                    # starting wavelengths for the different periods for which perform a different baseline correction; ; if the wavelength range to use is not the same for the whole dataframe add ", 'wv'"
endwvs = ['737.5 nm']                                                    # ending wavelengths for the different periods for which perform a different baseline correction; ; if the wavelength range to use is not the same for the whole dataframe add ", 'wv'"

cal_factors = [2, 4]                                                     # calibration factors; if the factor needs to be summed or subtracted instead of multiplied, please change the symbol in the code (* cal_factors[j]). The numbers were chosen only to test the algorithm.
cal_symbols = [0, 1]                                                     # calibration operation: 0 (multiplication), 1 (sum), 2 (difference)

### Start environment and import data

In [None]:
import abspectroscopy_functions as abspy # Functions from the AbspectroscoPY toolbox
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from matplotlib.ticker import (MultipleLocator, FormatStrFormatter, AutoMinorLocator)
from datetime import datetime
from datetime import timedelta

df = pd.read_csv(input_dir, sep = sep, header = header, index_col = 0) 
df.index = pd.to_datetime(df.index)               # make sure time column (here index) is using time format
df

### Set a wavelength range to use for the baseline correction

In [None]:
### Plot the absorbance spectra for different dates covering the temporal variability of the data and choose a wavelength range to correct for the baseline drift. 

In [None]:
%matplotlib inline # necessary if the notebook is not configured to use the inline backend by default
#%matplotlib notebook
plt.ion()
abspy.makeabsplot(df, output, dateparsingformat, nperiods, sample_name)

### abs_basecor

In [None]:
### Compute the median of the attenuation data for the selected wavelength region and subtract it from the absorbance data to account for the instrumental baseline drift.

In [None]:
def abs_basecor(df_in, 
                startwv):
    '''
    function to account for the instrumental baseline drift
    :argument df_in: dataframe in input
    :return: the baseline corrected attenuation dataframe and the standard deviation per each measurement of the chosen range of wavelength   
    '''      
    df_in.columns = [str(col) + 'nm' for col in df_in.columns]
    df_out = df_in.copy()
    header = list(df_out)                            # list of wavelengths; 700-735.5 nm is the wavelength region chosen in this example
    start = df_out.columns.get_loc(startwv)          # get the starting and ending column position of the two wavelengths 
    end = len(header)                                
    med = df_out.iloc[:,start:end].median(axis = 1)    # compute the median of the attenuation values for the columns between start and end    
    std = df_out.iloc[:,start:end].std(axis = 1)
    df_out = df_out.iloc[:,0:start]                   # obtain a subdataset which excludes columns from 700 nm onwards
    df_out = df_out.subtract(med, axis = 0)           # perform the baseline correction    
    return(df_out, med, std)

In [None]:
df_bc = abs_basecor(df, startwv)
df_bc[0].to_csv(output + 'df_baselinecor_' + str(sample_name) + '.csv', sep = sep, decimal = decimal, index=True) # export the dataframe
df_bc[0]

### In case of baseline shift across periods use this function instead and the following cell:

In [None]:
'''
def abs_basecor(df_in,
                dates_start,
                dates_end,
                startwvs,
                endwvs):
    
    #function to account for the instrumental baseline drift
    #:argument df_in: dataframe in input
    #:argument dates_start: starting dates of specific periods to correct for specific factors or/and different wavelength range
    #:argument dates_end: starting dates of specific periods to correct for specific factors or/and different wavelength range
    #:argument startwvs: starting wavelengths for the different periods for which perform a different baseline correction
    #:argument endwvs: ending wavelengths for the different periods for which perform a different baseline correction
    #:return: the baseline corrected attenuation dataframe and the standard deviation per each measurement of the chosen range of wavelength   
        
    df_in = df.copy()
    df_out = df_in.copy()
    df_out.index = pd.to_datetime(df_out.index, format= dateparsingformat)
    df_out = df_out.reset_index()
    for j in range(0, len(dates_start)):
        start = pd.to_datetime(dates_start[j], format= dateparsingformat)
        end = pd.to_datetime(dates_end[j], format= dateparsingformat)
        for i in df_out.index:
            t = df_out.iloc[i,0]
            if (t >= start) & (t <= end):
                startwv = df_out.columns.get_loc(startwvs[j])                # get the starting and ending column position of the two wavelengths 
                endwv = df_out.columns.get_loc(endwvs[j]) + 1   
                med = df_out.iloc[i, startwv:endwv].median()                 # compute the median of the attenuation values for the columns between start and end 
                std = df_out.iloc[:, startwv:endwv].std()
                df_out.iloc[i, 1:] = df_out.iloc[i, 1:startwv].subtract(med) # obtain a subdataset which excludes columns from 700 nm (or another starting wv) onwards and perform the baseline correction    
    return(df_out, med, std)
'''

In [None]:
'''
df_bc = abs_basecor(df, dates_start, dates_end, startwvs, endwvs)
df_bc[0].set_index('Timestamp', inplace=True)
#df_bc[0].to_csv(output + 'df_baselinecor_' + str(sample_name) + '.csv', sep = sep, decimal = decimal, index=True) # export the dataframe
df_bc[0]
'''

### Compute and plot the median and the noise level as function of time

In [None]:
#%matplotlib inline 
%matplotlib notebook 
plt.ion()

abspy.makeaplot_nocol(df_bc[1], output, '2018-11-13 04:32:00','2018-12-04 09:12:00','median 700-737.5 nm', 'median_baseline_correction')

In [None]:
sigma3 = df_bc[2]*3
sigma3

In [None]:
%matplotlib inline
#%matplotlib notebook 
plt.ion()

abspy.makeaplot_nocol(sigma3, output, '2018-11-13 04:32:00','2018-12-04 09:12:00', '3 \u03C3', 'noise_level_3sigma_zoom')

### Calibration: multiply/sum/subtract a factor

In [None]:
'''
dfp = df.copy()
dfp.index = pd.to_datetime(dfp.index, format= dateparsingformat)
dfp = dfp.reset_index()
for j in range(0, len(dates_start)):
    start = pd.to_datetime(dates_start[j], format= dateparsingformat)
    end = pd.to_datetime(dates_end[j], format= dateparsingformat)
    for i in dfp.index:
        t = dfp.iloc[i,0]
        if (t >= start) & (t <= end):
            if cal_symbols[j] == 0:
                dfp.iloc[i, 1:] = dfp.iloc[i, 1:] * cal_factors[j]
            if cal_symbols[j] == 1:
                dfp.iloc[i, 1:] = dfp.iloc[i, 1:] + cal_factors[j]
            if cal_symbols[j] == 2:
                dfp.iloc[i, 1:] = dfp.iloc[i, 1:] - cal_factors[j]             
dfp.to_csv(output + 'df_calibration_' + str(sample_name) + '.csv', sep = sep, decimal = decimal, index=True) # export the dataframe
dfp
'''