# Stack the geophone spectrograms of all geophone stations and find the spectral peaks in the stacked spectrogram

In [1]:
# Imports
from os.path import join
from pandas import concat, Timestamp
from time import time
from multiprocessing import Pool

from utils_basic import SPECTROGRAM_DIR as indir, GEO_STATIONS as stations
from utils_basic import get_geophone_days
from utils_spec import find_trace_spectral_peaks, get_spectrogram_file_suffix, get_spec_peak_file_suffix 
from utils_spec import read_geo_spectrograms, read_geo_spec_headers, save_spectral_peaks
from utils_plot import plot_geo_total_psd_and_peaks, save_figure

In [2]:
# Inputs
# Data
window_length = 1.0
overlap = 0.0
downsample = False
downsample_factor = 60

# Finding peaks
num_process = 32
rbw_threshold = 0.2
prom_threshold = 10
min_freq = None
max_freq = 200.0

# Writing
to_csv = False
to_hdf = True

In [3]:
# Get the time labels to process
days = get_geophone_days()
time_labels = [Timestamp(day).strftime("%Y%m%d%H%M%S%f") for day in days]

In [4]:
# Compute the stack and find the peaks for each time label
suffix_spec = get_spectrogram_file_suffix(window_length, overlap, downsample, downsample_factor = downsample_factor)
peak_dfs = []
for time_label in time_labels:
    print(f"Processing {time_label}...")
    clock1 = time()

    # Loop over stations to compute the stack
    num_sta = 0
    for i, station in enumerate(stations):
        print(f"Processing {station}...")
        

        # Read the spectrogram blocks
        filename = f"whole_deployment_daily_geo_spectrograms_{station}_{suffix_spec}.h5"
        inpath = join(indir, filename)
        stream_spec = read_geo_spectrograms(inpath, time_labels = [time_label])

        if stream_spec is None:
            print(f"No data for {station} at {time_label}.")
            continue
        else:
            num_sta += 1

        # Compute the total PSD
        trace_spec_total = stream_spec.get_total_power()

        # Stack the PSDs
        if num_sta == 1:
            trace_spec_stack = trace_spec_total.copy()
        else:
            trace_spec_stack.data += trace_spec_total.data

    if num_sta == 0:
        print(f"No data for any station at {time_label}.")
        continue

    # Divide by the number of stations
    trace_spec_stack.data /= num_sta

    # Find the spectral peaks
    peak_df = find_trace_spectral_peaks(trace_spec_stack, num_process, prom_threshold = prom_threshold, rbw_threshold = rbw_threshold, min_freq = min_freq, max_freq = max_freq)
    num_peaks = len(peak_df)
    print(f"Found {num_peaks} peaks.")

    clock2 = time()
    print(f"Elapsed time: {clock2 - clock1} s.")

    # Append to the list
    peak_dfs.append(peak_df)
    
print("Done.")
print("")

# Concatenate the peak dataframes
print("Concatenating the peak dataframes...")
if len(peak_dfs) > 0:
    peak_df = concat(peak_dfs, axis = 0)

num_peaks = len(peak_df)
print(f"Total number of peaks: {num_peaks}.")

Processing 20200110000000000000...
Processing A01...
Processing A02...
Processing A03...
Processing A04...
Processing A05...
Processing A06...
Processing A07...
Processing A08...
Processing A09...
Processing A10...
Processing A11...
Processing A13...
Processing A14...
Processing A15...
Processing A16...
Processing A17...
Processing A19...
Processing B01...
Processing B02...
Processing B03...
Processing B04...
Processing B06...
Processing B07...
Processing B08...
Processing B09...
Processing B10...
Processing B11...
Processing B12...
Processing B13...
Processing B14...
Processing B15...
Processing B16...
Processing B17...
Processing B18...
Processing B19...
Processing B20...
Finding the spectral peaks in 32 processes...
Found 66258 peaks.
Elapsed time: 30.54726505279541 s.
Processing 20200111000000000000...
Processing A01...
Processing A02...
Processing A03...
Processing A04...
Processing A05...
Processing A06...
Processing A07...
Processing A08...
Processing A09...
Processing A10...
Pr

In [5]:
# Save the peaks
suffix_peak = get_spec_peak_file_suffix(prom_threshold, rbw_threshold, min_freq = min_freq, max_freq = max_freq)
file_stem = f"geo_stack_spectral_peaks_{suffix_spec}_{suffix_peak}"

if to_csv:
    save_spectral_peaks(peak_df, file_stem, "csv")

if to_hdf:
    save_spectral_peaks(peak_df, file_stem, "h5")

Results saved to /fp/projects01/ec332/data/spectrograms/geo_stack_spectral_peaks_window1s_overlap0.0_prom10db_rbw0.2_freq0to200hz.h5
