### User input

In [None]:
# Before starting: FILL OUT THE EXCEL FILE "events_table" AND EXPORT IT AS CSV FILE

decimal = '.'                                                                # decimal of the input file
sep = ';'                                                                    # separator of the input file
sep2 =','                                                                    # separator of the input event file

from config import *                                                         # Personal settings of local user to set input and output directories
input_dir = input_directory + 'results/df_baselinecor_sw.csv'                # input directory
output = output_directory + 'results/'                                       # output directory
indata_events = input_directory + 'other_data/events_table.csv'              # input event table   

dateheadername = 'Timestamp'                                                 # header of the date  
header = 0                                                                   # header row number
dateparsingformat = '%Y-%m-%d %H:%M:%S'                                      # format of the date 
sample_name = 'sw'                                                           # name of the sample

timestart = '2018-11-13 05:02:00'                                            # starting date
timeend = '2018-12-04 08:44:00'                                              # ending date

nevents = 2                                                                  # number of typology of events
evdrop = [1, 2]                                                              # typology of event to drop (no events to drop: type None)


# To plot the time series of absorbance data plotting the different events, the user can modify:
col_sel = '255 nm'                                                           # select a specific wavelength to plot
title = 'absorbance_data_baseline_corrected_'                                # title of the exported figure 1
title1 = 'absorbance_data_baseline_corrected_with_events'                    # title of the exported figure 2
title2 = 'absorbance_data_baseline_corrected_with_no_events'                 # title of the exported figure 3
fig_format = '.tiff'                                                         # format of the exported figure
dpi = 300                                                                    # resolution of the exported figure

### Start environment and import data

In [None]:
import abspectroscopy_functions as abspy # Functions from the AbspectroscoPY toolbox
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from matplotlib.ticker import (MultipleLocator, FormatStrFormatter, AutoMinorLocator)
import statistics 
from statistics import median

df = pd.read_csv(input_dir, sep = sep, header = header, index_col = 0) 
df.index = pd.to_datetime(df.index)     # make sure time column (here index) is using time format
df

In [None]:
%matplotlib inline # necessary if the notebook is not configured to use the inline backend by default
#%matplotlib notebook
plt.ion()
abspy.makeaplot(df, output, col_sel, timestart, timeend, sample_name, title)

### outliers_id_drop

In [None]:
def outliers_id_drop(df_in1, 
                     df_in2, 
                     output_dir,
                     col_sel):
    '''
    function to label outliers and events based on user knowledge and remove the known outliers
    :argument df_in1: dataframe in input (df_bc from the function "abs_basecor")
    :argument df_in2: dataframe in input (df with events)
    :argument output_dir: directory where storing the results
    :argument col_sel: selected wavelength column
    return: the dataframe of events (df_out1) and the baseline corrected dataframe after removing the specified events (df_out2)
    '''
    ### VISUALISE SPECIFIC EVENT TYPOLOGIES
    df_ev = pd.read_csv(df_in2, sep = sep2, header = header, parse_dates=[['start_date', 'start_time'], ['end_date', 'end_time']])
    df_ev = df_ev.dropna(subset=['event_code'])                              # remove all the rows that do not have an event code

    x_ev = df_ev['start_date_start_time'] + ((df_ev['end_date_end_time']-df_ev['start_date_start_time'])/2) # compute the average datetime of the event
    #print('axis-x:', x_ev)

    y_ev = []                                                                 # compute at which y-axis coordinate insert the symbol
    for i in range(0, len(x_ev)):
        timestart2 = df_ev['start_date_start_time'] [i]
        timeend2 = df_ev['end_date_end_time'] [i]    
        ym = median(df_in1[col_sel].loc[timestart2:timeend2])                 # calculate the median per each time period correspondant to an event
        ystart = df_in1[col_sel].iloc[df_in1.index.get_loc(timestart2, method='nearest')] # get the absorbance value at the closest timestamp to the event starting date
        if ym > ystart:                                                       # add one unit or subtract one unit according to the fact that the median absorbance value during the event is a positive value greater or lower than one for visualisation purposes
            ysel = ym + 1
        else:
            ysel = ym - 1  
        y_ev.append(ysel)
    #print('axis-y:', y_ev)

    df_ev['middle_date_time'], df_ev ['median_abs_middle_date_time'] = [x_ev, y_ev] # add two columns to the events dataframe, including the average date of the event and the median absorbance value for that event plus/minus 1 unit
    ev = df_ev.loc[df_ev.loc[df_ev['event_code'] == evdrop].index]
    df_ev = df_ev[['middle_date_time', 'median_abs_middle_date_time', 'event_code']] # subset the dataframe 
    df_ev.set_index('middle_date_time', inplace=True)                          # set the average date as index
    
    ### DROP SPECIFIC EVENT TYPOLOGIES

    ev.reset_index(inplace = True)

    df_drop = df_in1.copy()
    for i in range(0, len(ev)):
        mask = (df_drop.index >= ev['start_date_start_time'][i]) & (df_drop.index <= ev['end_date_end_time'][i])
        df_drop = df_drop.drop(df_drop[mask].index)
    df_drop.to_csv(output_dir + 'df_noselectedevents.csv', index = True)
    df_out1 = df_ev
    df_out2 = df_drop
    return(df_out1, df_out2)   

In [None]:
df_ev, df_drop = abspy.outliers_id_drop(df, indata_events, output, col_sel)

### Visualise specific event typologies

In [None]:
#%matplotlib inline
%matplotlib notebook
plt.ion()
abspy.makeaplotev(df, df_ev, output, col_sel, timestart, timeend, dateparsingformat, sample_name, title1)

### Visualise the time series after dropping specific event typologies

In [None]:
#%matplotlib inline 
%matplotlib notebook
plt.ion()
abspy.makeaplotev(df_drop, df_ev, output, col_sel, timestart, timeend, dateparsingformat, sample_name, title2)