# INGV ScatCluster
A workflow for clustering continuous time series with a deep scattering network. 

## Set Experiment Parameters

In [16]:
# Location to store the results of the experimentation. Please ensure that this directory exists 
# and the path terminates with a "/"
data_savepath='/home/jovyan/shared/users/zerafa/data/sds.chris/scatcluster_workflow/'

# Data Parameters
data_client_path ="/home/jovyan/data/Sardinia50Hz/"
data_network="MN"
data_station='AQU' 
data_location='*'
data_channel='*'

# Set this to be a day for the duration of interest. It will be used to plot a sample of the transform
# to confirm the parameterization of the Scattering Network is correct.
data_sample_starttime = "2022-01-01T00:00"
data_sample_endtime = "2022-01-01T02:00"

# This is the full duration of the analysis to be conducted
data_starttime = "2022-01-01T00:00"
data_endtime = "2022-01-03T00:00"

# Exclude days from the analysis
data_exclude_days = []
# data_exclude_days = ['2021-01-05T00:00', '2021-01-15T00:00']

# Network Settings
# Window size in SECONDS used to chunk the data into pieces before passing it to the scattering network.
# Recommendation is to 3600 windows. This is equivalent to 1 hour. Finer windowing is allowed, at an 
# increase cost of computation.
network_segment = 3600
# Sliding duration between two consecutive windows. Recommendation is 0 overlap as this might cause 
# multiple events within the same window.
network_step = network_segment
# Sampling frequency of the data
network_sampling_rate = 100

# Network parameterization
# `OCTAVES`    indicates the number of octaves per layer. This value can be a tuple where the number of 
#              elements defines the number of layers. 
# `RESOLUTION` represents the number of wavelets per octave (must be of the size of `OCTAVES`)
# `QUALITY`    represents the quality factor of the mother wavelet (must be of the size of `OCTAVES`)
#
# HINTS:
# The first layer filters should be very dense and they should have an overlap of at least 50% between frequencies
# The second layer should be sparse, with no overlap between frequencies.
network_banks = (
    {"octaves": 4, "resolution": 4, "quality": 2},
    {"octaves": 7, "resolution": 1, "quality": 1}
)

# The summarization of the scattering coefficients
network_pooling = "avg"

# ICA Reduction
ica_overwrite_previous_models = False
ica_ev_limit=0.99
ica_min_ICAs=9
ica_max_ICAs=10

# Clustering of ICA contributions
# Number of clusters splits. ("single", "centroid", "median", "ward"). 
# See documentation for fastcluster (http://danifold.net/fastcluster.html)
dendrogram_method = "ward"
# Time zone for local time histogram.
dendrogram_time_zone = "Europe/Rome"

# Waveform plotting
# Number of waveforms to show
waveforms_n_samples = 5

# Initiate ScatCluster

In [17]:
from scatcluster import ScatCluster

In [18]:
SC = ScatCluster(
    data_savepath = data_savepath,
    data_client_path = data_client_path,
    data_network = data_network,
    data_station = data_station,
    data_location = data_location,
    data_channel = data_channel,
    data_sample_starttime = data_sample_starttime,
    data_sample_endtime = data_sample_endtime,
    data_starttime = data_starttime,
    data_endtime = data_endtime,
    data_exclude_days = data_exclude_days,
    network_segment = network_segment,
    network_step = network_step,
    network_sampling_rate = network_sampling_rate,
    network_banks = network_banks,
    network_pooling = network_pooling,
    ica_ev_limit = ica_ev_limit,
    ica_min_ICAs = ica_min_ICAs,
    ica_max_ICAs = ica_max_ICAs,
    ica_overwrite_previous_models = ica_overwrite_previous_models,
    dendrogram_method = dendrogram_method,
    waveforms_n_samples = waveforms_n_samples
)

In [19]:
SC.prepare_directory_structure()

Main directory /Users/christopher.zerafa/Downloads/scatcluster_run/ already exists. 

Directory /Users/christopher.zerafa/Downloads/scatcluster_run/scatterings already exists.
Directory /Users/christopher.zerafa/Downloads/scatcluster_run/figures already exists.
Directory /Users/christopher.zerafa/Downloads/scatcluster_run/networks already exists.
Directory /Users/christopher.zerafa/Downloads/scatcluster_run/ICA already exists.
Directory /Users/christopher.zerafa/Downloads/scatcluster_run/clustering already exists.
Directory /Users/christopher.zerafa/Downloads/scatcluster_run/data already exists.
Directory /Users/christopher.zerafa/Downloads/scatcluster_run/config already exists.


In [20]:
SC.config_store()

SSN config stored at "/Users/christopher.zerafa/Downloads/scatcluster_run/config/MN_AQU_*_3600_3600_100_4_4_2_7_1_1_avg.json"
