# Group the spectral peaks of geophone data into regular time-frquency bins

In [None]:
# Imports
from os.path import join
from numpy import linspace
from pandas import read_csv, date_range, Timestamp, Timedelta

from utils_basic import SPECTROGRAM_DIR as indir, GEO_STATIONS as stations
from utils_spec import assemble_spec_filename, group_spectral_peaks_regular_bins, read_geo_spectrograms, find_geo_station_spectral_peaks, read_spectral_peaks, bin_counts_to_df
from utils_plot import plot_array_spec_peak_bin_counts, save_figure

In [None]:
# Inputs
# Data
window_length = 60.0
overlap = 0.0
downsample = True
downsample_factor = 60
prom_threshold = 5
rbw_threshold = 0.2
file_format = "h5"

# Grouping
starttime_bins = "2020-01-10T00:00:00"
endtime_bins = "2020-02-02T00:00:00"

time_bin_width = "1s"
freq_bin_width = 1.0 # in Hz
min_freq = 0.0
max_freq = 200.0

count_threshold = 4

# Plotting
starttime_plot = "2020-01-13T00:00:00"
endtime_plot = "2020-01-14T00:00:00"

size_scale = 30

In [None]:
# Process the detections of each station
if downsample:
    suffix = f"window{window_length:.0f}s_overlap{overlap:.1f}_downsample{downsample_factor:d}_prom{prom_threshold:.0f}db_rbw{rbw_threshold:.1f}"
else:
    suffix = f"window{window_length:.0f}s_overlap{overlap:.1f}_prom{prom_threshold:.0f}db_rbw{rbw_threshold:.1f}"

for i, station in enumerate(stations):
    print(f"Processing {station}...")

    # Read the spectral peaks
    filename = f"geo_spectral_peaks_{station}_{suffix}.{file_type}"
    inpath = join(indir, filename)
    peak_df = read_spectral_peaks(inpath, file_format)
    
    # Group the spectral peaks
    time_bin_centers, freq_bin_centers, bin_counts = group_spectral_peaks_regular_binsgroup_spectral_peaks_regular_bins(peak_df, starttime_bins, endtime_bins, time_bin_width = time_bin_width, min_freq = min_freq, max_freq = max_freq, freq_bin_width = freq_bin_width)

    if i == 0:
        bin_counts_sum = bin_counts
    else:
        bin_counts_sum += bin_counts
    

In [None]:
# Convert the bin counts to a DataFrame
bin_count_df = bin_counts_to_df(time_bin_centers, freq_bin_centers, bin_counts_sum, count_threshold = count_threshold)

In [None]:
# Save the DataFrame
file_stem = f"geo_spectral_peak_bin_counts_{suffix}"
save_spectral_peak_bin_counts(bin_count_df, file_stem, "hdf")

In [None]:
# Plot the bin counts in the example time range
fig, ax = plot_array_spec_peak_bin_counts(peak_df, 
                                        size_scale = size_scale, 
                                        starttime = starttime_plot, endtime = endtime_plot, freq_lim = (min_freq, max_freq),
                                        major_time_spacing = "6h", minor_time_spacing = "1h")