# Low-frequency DAS Edge Processing Using [DASDASE](https://github.com/DASDAE)

This Jupyter Notebook is created for low-frequency processing of a [spool](https://dascore.org/tutorial/concepts.html#:~:text=read%20the%20docs!-,Data%20structures,-DASCore%20has%20two) of distributed acoustic sensing (DAS) data in real-time. It uses [DASCore](https://dascore.org/) package and the ```lf_das.py``` script.


<svg width="100%" height="1">
  <line x1="0" y1="0" x2="100%" y2="0" style="stroke:rgb(0,0,0);stroke-width:2" />
</svg>


#### Notes: 
1. Before using this notebook, make sure you have included the ```lf_das.py``` script in the current directory with this notebook and successfully installed DASCore using ```pip``` or ```conda```:
    ```python
    pip install dascore
    ```
    or
    ```python
    conda install dascore -c conda-forge
    ```   
2. Please find all supported I/O [here](https://dascore.quarto.pub/dascore/).

Current DASCore version: 0.0.13 (tested)

Date: 09/07/2023


Contact: [Ahmad Tourei](https://github.com/ahmadtourei/)

ahmadtourei@gmail.com

In [None]:
# import libraries
import warnings
warnings.simplefilter('ignore')

import dascore as dc
import matplotlib.pyplot as plt
import numpy as np
import time 

from datetime import datetime
from lf_das import LFProc, get_edge_effect_time, get_patch_time


### Get a spool of data to work on

In [None]:
# define data path (spool of data) and output folder 
data_path = '/mnt/h/data'
output_data_folder =  '/mnt/h/results'
output_figure_folder = '/mnt/h/figures'

# get the sorted spool of data form the defined data path (on first run, it will index the patches and subsequently update the index file for future uses)
sp = dc.spool(data_path).sort("time").update()

# print the contents of first 5 patches
content_df = sp.get_contents()
content_df.head()

### Get some metadata and define a sub spool (if needed)

In [None]:
# get sampling rate, channel spacing, and gauge length from the first patch
patch_0 = sp[0]
gauge_length = patch_0.attrs['gauge_length']
print("Gauge length = ", gauge_length)
channel_spacing = patch_0.attrs['step_distance']
print("Channel spacing = ", channel_spacing)
sampling_interval = patch_0.attrs['step_time']
print("Sampling interval = ", sampling_interval)
sampling_rate = 1/(sampling_interval / np.timedelta64(1, 's'))
print("Sampling rate = ", sampling_rate)
num_sec = len(patch_0.coords["time"])/sampling_rate
print("Number of seconds in each patch= ", num_sec)

# select a sub-spool
ch_start = 400
ch_end = 1400
d_1 = patch_0.coords['distance'][ch_start] # in meter
d_2 = patch_0.coords['distance'][ch_end] # in meter
# or:
# d_1 = -115 # in meter
# d_2 = 2000 # in meter
sub_sp = sp.select(distance=(d_1, d_2)) 


### Get low-pass filter parameters

In [None]:
# define the memory size that you'd like to dedicate to low-frequency processing 
memory_size = 10000 # in MB
patch_length = get_patch_time(memory_size=memory_size, sampling_rate=sampling_rate, num_ch=ch_end-ch_start)
print('patch_length = ', patch_length, str(' sec.'))

# define the target sampling interval in seconds
d_t = 10.0 # so, cutoff_freq = Nyq_new = 1/(2*d_t) = 0.05 hz

# define the desired tolerance for getting the edge time (smaller tolerance results a longer eliminated edges in each patch and higher accuracy. 1e-3 is recommended.)
tolerance = 1e-3
edge_buffer = get_edge_effect_time(sampling_interval=1/sampling_rate, total_T=patch_length, tol=tolerance, freq=1/d_t)
print('edge_buffer = ', edge_buffer, str(' sec.'))


### Set real-time processing parameters

In [None]:
# set the desired wait time after each run
time_step_for_processing = 125 # in sec.

# make sure that the wait time is not smaller than number of seconds of each data file
if time_step_for_processing<num_sec:
    time_step_for_processing = num_sec

# make sure that the wait time is larger than the patch length multiplied by a buffer factor
if time_step_for_processing<(2*edge_buffer)*1.5:
    time_step_for_processing = (2*edge_buffer)*1.5
    if patch_length > time_step_for_processing:
        patch_length = time_step_for_processing

print("time_step_for_processing: ", time_step_for_processing)
print("patch length: ", patch_length)

# set the starting time from which low-freq. processing applys (it can be the time_min for the first patch of the spool)
start_processing_time = np.datetime64('2023-03-22T06:00:00') # in UTC, or any other time zone that original data are stored 


### Do low-frequency processing in real-time

In [None]:
# start the for loop for real-time processing
initial_run = True
while True:
    # select a updated sub-spool
    sp = dc.spool(data_path).update()
    sub_sp = sp.select(distance=(d_1, d_2)) 
    len_updated_sp = len(sub_sp)

    if not initial_run and len_last_sp == len_updated_sp:
        print("No new data was detected. Real-time processing ended successfully.")
        break

    # pass the spool to the LFProc class
    lfp = LFProc(sub_sp)
    lfp.update_processing_parameter(output_sample_interval=d_t, process_patch_size=int(patch_length/d_t), edge_buff_size=int(np.ceil(edge_buffer/d_t)))
    # set the output folder - Caution: If you set delete_existing=True, you will remove all contents in the output_data_folder directory. If you set delete_existing=False, you need to have a empty output_data_folder directory to proceed.
    lfp.set_output_folder(output_data_folder, delete_existing=False)

    if initial_run:
        i=1
        print("run number: ", i)
        t_1 = start_processing_time
        t_2 = np.datetime64(sub_sp[-1].attrs['time_max'])
        # do lowpass processing on (t_1,t_2) time range
        lfp.process_time_range(t_1,t_2)
        initial_run = False 
        len_last_sp = len(sub_sp)
        time.sleep(time_step_for_processing)
    else:
        i+=1
        print("run number: ", i)
        t_2 = lfp.get_last_processed_time() 
        buffer = int((np.ceil(edge_buffer/d_t)-1)*d_t) 
        t_1 = t_2 - np.timedelta64(buffer, 's') 
        t_2 = np.datetime64(sub_sp[-1].attrs['time_max'])
        # do lowpass processing on (t_1,t_2) time range
        lfp.process_time_range(t_1,t_2)
        len_last_sp = len(sub_sp)
        time.sleep(time_step_for_processing)
