# Detect spectral peaks in geophone spectrograms

In [4]:
# Imports
from os.path import join
from pandas import concat
from time import time
from multiprocessing import Pool

from utils_basic import SPECTROGRAM_DIR as indir, GEO_STATIONS as stations
from utils_spec import assemble_spec_filename, find_geo_station_spectral_peaks, save_spectral_peaks, read_geo_spectrograms, read_geo_spec_headers
from utils_plot import plot_geo_total_psd_and_peaks, save_figure

In [2]:
# Inputs
# Data
window_length = 1.0
overlap = 0.0
downsample = False
downsample_factor = 60

# Finding peaks
num_process = 32
rbw_threshold = 0.2
prom_threshold = 5
freqmin = None
freqmax = None

In [3]:
# Loop over days and stations
peak_dfs = []
for station in stations:
    print(f"### Working on {station}... ###")

    # Read the list of time labels
    print("Reading the list of time labels...")
    if downsample:
        filename_in = assemble_spec_filename("whole_deployment", "daily", "geo", station, window_length, overlap, downsample, downsample_factor = downsample_factor)
        inpath = join(indir, filename_in)
    else:
        filename_in = assemble_spec_filename("whole_deployment", "daily", "geo", station, window_length, overlap, downsample)
        inpath = join(indir, filename_in)

    header_dict = read_geo_spec_headers(inpath)
    time_labels = header_dict["time_labels"]
    
    for time_label in time_labels:
        clock1 = time()
        # Read the spectrograms
        print(f"Reading the spectrograms of {time_label}...")
        stream_spec = read_geo_spectrograms(inpath, time_labels = [time_label])

        # Find the peaks
        print("Detecting the peaks...")
        peak_df, _ = find_geo_station_spectral_peaks(stream_spec, num_process, rbw_threshold = rbw_threshold, prom_threshold = prom_threshold, freqmin = freqmin, freqmax = freqmax)
        print(f"In total, {len(peak_df)} spectral peaks found.")

        # Add the station to the dataframe
        peak_df["station"] = station

        # Append to the list
        peak_dfs.append(peak_df)

        clock2 = time()
        elapse = clock2 - clock1
        print(f"Elapsed time: {elapse}")

### Working on A01... ###
Reading the list of time labels...
Reading the spectrograms of 20200110000000000000...
Detecting the peaks...
In total, 2556918 spectral peaks found.
Elapsed time: 19.67817258834839
Reading the spectrograms of 20200111000000000000...
Detecting the peaks...
In total, 3662454 spectral peaks found.
Elapsed time: 27.4687716960907
Reading the spectrograms of 20200112000000000000...
Detecting the peaks...


KeyboardInterrupt: 

In [None]:
# Save the results
peak_df = concat(peak_dfs)
peak_df.drop_duplicates(subset = ["station", "time", "frequency"], inplace = True)
peak_df.reset_index(drop = True, inplace = True)

print("Saving the results...")
outdir = indir
if downsample:
    filename = f"geo_spectral_peaks_window{window_length:.0f}s_overlap{overlap:.1f}_downsample{downsample_factor:d}_prom{prom_threshold:.0f}db_rbw{rbw_threshold:.1f}.csv"
else:
    filename = f"geo_spectral_peaks_window{window_length:.0f}s_overlap{overlap:.1f}_prom{prom_threshold:.0f}db_rbw{rbw_threshold:.1f}.csv"
    
outpath = join(outdir, filename)

save_spectral_peaks(peak_df, outpath)
print(f"Results saved to {outpath}")