In [None]:
import numpy as np
import glob2
import datetime
from pathlib import Path
from tqdm.notebook import tqdm
import pickle
from matplotlib import pyplot as plt
from utils.detection.association_geodesic import squarize
import matplotlib as mpl
import matplotlib.dates as mdates
plt.style.use('classic')
mpl.rcParams.update({
    "font.size": 10,
    "axes.titlesize": 10,
    "axes.labelsize": 10,
    "xtick.labelsize": 10,
    "ytick.labelsize": 10,
    "legend.fontsize": 10,
    "figure.titlesize": 10,
    "font.family": "serif",
    "pdf.fonttype": 42,
    "ps.fonttype": 42
})
from matplotlib import rc
rc('font', **{'family': 'serif', 'serif': ['Computer Modern']})
rc('text', usetex=True)
import math
from numpy.linalg import LinAlgError
import pandas as pd

from utils.transformation.signal import butter_bandpass_filter
from utils.physics.signal.make_spectrogram import make_spectrogram
from utils.data_reading.sound_data.station import StationsCatalog
from utils.physics.sound_model.spherical_sound_model import GridSphericalSoundModel as GridSoundModel, MonthlyHomogeneousSphericalSoundModel as HomogeneousSoundModel
from utils.detection.association_geodesic import compute_candidates, update_valid_grid, update_results, load_detections, compute_grids

In [None]:
# paths
CATALOG_PATH = "/media/plerolland/akoustik/MAHY"
DETECTIONS_DIR = f"../../../../data/detection/TiSSNet_Pn_raw"
OUT_PATH = f"../../../../data/detection/TiSSNet_Pn_raw_repicked"
det_files = [f for f in glob2.glob(f"{DETECTIONS_DIR}/*/*.pkl") if Path(f).is_file()]
STATIONS = StationsCatalog(CATALOG_PATH).filter_out_undated().filter_out_unlocated()

detections = {}
for det_file in det_files:
    station_dataset, station_name = det_file[:-4].split("/")[-1].split("_")
    s = STATIONS.by_dataset(station_dataset).by_name(station_name)[0]

    detections[s] = []
    with open(det_file, "rb") as f:
        while True:
            try:
                detections[s].append(pickle.load(f))
            except EOFError:
                break
    detections[s] = np.array(detections[s])

    print(f"Found {len(detections[s])} detections for station {s}")

In [None]:
from utils.data_reading.sound_data.sound_file_manager import DatFilesManager

DELTA = datetime.timedelta(seconds=3)
DELTA_SMALL_S = 1.5

def find_onset(data, sf):
    d = data**2
    d = np.convolve(d, np.ones(24) / 24, mode='valid')
    d /= np.mean(d[:len(d)//2-round(DELTA_SMALL_S*sf)])
    bound_min, bound_max = round(len(d)//2-DELTA_SMALL_S*sf), round(len(d)//2+DELTA_SMALL_S*sf)
    d_idx = d[bound_min:bound_max]
    d_idx = np.nonzero(d_idx > np.max(d_idx)/5)[0][0]
    peak_time = (bound_min + d_idx) / sf - DELTA.total_seconds()
    return peak_time, d

for s in detections.keys():
    dataset_path = f"{OUT_PATH}/{s.dataset}"
    Path(dataset_path).mkdir(exist_ok=True, parents=True)
    path = f"{dataset_path}/{s.dataset}_{s.name}.pkl"
    if Path(path).exists():
        continue
    if "43" in s.name:
        manager = DatFilesManager("/media/plerolland/akoustik/MAHY/MAHY4_fixed/MAHY43", kwargs={"raw":True})
    else:
        s.other_kwargs["raw"] = True
        manager = s.get_manager()
    for di in tqdm(range(len(detections[s])), desc=s.name):
        date, p = detections[s][di]

        if date-DELTA < s.date_start or date+DELTA > s.date_end:
            continue

        sf = round(manager.sampling_f)
        data = manager.get_segment(date-DELTA,date+DELTA)
        data = butter_bandpass_filter(data, 4, 60, sf)
        
        try:
            peak_time, d = find_onset(data, sf)
        except IndexError:
            continue

        detections[s][di][0] += datetime.timedelta(seconds=peak_time)


    with open(path, "wb") as f:
        pickle.dump(detections[s], f)