In [2]:
import math
import numpy as np
import glob2
import torch
import datetime
from pathlib import Path
import pickle
from tqdm.notebook import tqdm
from torchvision.transforms import Resize
from scipy.signal import find_peaks
from matplotlib import pyplot as plt
import itertools
from numpy.linalg import LinAlgError

from utils.data_reading.sound_data.station import StationsCatalog
from utils.physics.sound_model.spherical_sound_model import HomogeneousSphericalSoundModel as SoundModel
from utils.physics.signal.make_spectrogram import make_spectrogram

In [26]:
det_dir = "../../../data/detection/TiSSNet/GEODAMS_res/"
catalog_path = "/home/plerolland/Bureau/dataset.yaml"
out_dir = "../../../data/detection/association/GEODAMS_apriori_min0.05.csv"

stations = StationsCatalog(catalog_path)
sound_model = SoundModel(sound_speed=1485.5)

MIN_P = 0.05

ALLOWED_DELTA = datetime.timedelta(seconds=5)
SMALL_DELTA = datetime.timedelta(seconds=5)

# a priori
CENTER = (-37.1130600118062, 78.29794527139633)
TOL = datetime.timedelta(seconds=100/1.5)

dets = {}
pos = {}
for det_file in tqdm(glob2.glob(det_dir + "*")):
    d = []
    with open(det_file, "rb") as f:
        while True:
            try:
                d.append(pickle.load(f))
            except EOFError:
                break
    d = np.array(d)
    d = d[d[:,1] > MIN_P]
    d = d[np.argsort(d[:,0])]

    # remove doublons and regularly spaced signals
    new_d = [d[0]]
    for i in range(1, len(d)):
        if d[i,0] - d[i-1,0] > SMALL_DELTA:
            if i < 3 or abs((d[i,0]-d[i-1,0]) - (d[i-1,0]-d[i-2,0])) > SMALL_DELTA and abs((d[i,0]-d[i-2,0]) - (d[i-1,0]-d[i-3,0])) > SMALL_DELTA:
                new_d.append(d[i])
    d = np.array(new_d)

    s_name = det_file.split("/")[-1]
    dets[s_name] = d

    station = stations.by_name(s_name).by_starting_year(np.min(d[:,0]).year)[0]
    pos[s_name] = station.get_pos()

    print(f"Found {len(d)} detections for station {s_name}")

s_names = list(dets.keys())

  0%|          | 0/4 [00:00<?, ?it/s]

Found 94335 detections for station HAMS-Centre
Found 74768 detections for station HAMS-East
Found 113107 detections for station HAMS-North
Found 87942 detections for station HAMS-South


In [27]:
allowed_delta_mat = {s_name:{s_name_2:None for s_name_2 in s_names} for s_name in s_names}
for i in s_names:
    di = sound_model.get_sound_travel_time(CENTER, pos[i])
    for j in s_names:
        dj = sound_model.get_sound_travel_time(CENTER, pos[j])
        allowed_delta_mat[i][j] = datetime.timedelta(seconds=dj - di)

dets_merged = np.concatenate([[(s_name, det[0], det[1]) for det in dets[s_name]] for s_name in s_names])
dets_merged = dets_merged[np.argsort(dets_merged[:,1])]

In [28]:
possible_associations = []
done = set() # set of already used detection times

with open(out_dir, "w") as f:
    f.write(f"lat,lon,date_human,date,p_mean,loc_cost\n")

for s_name, det_date, det_p in tqdm(dets_merged):
    if det_date in done:
        continue

    # get possibly matching detections for each other station (given expected sound travel time)
    candidates = {}
    for s_name_2 in s_names:
        if s_name_2 != s_name:
            candidates[s_name_2] = []
            idx = np.searchsorted(dets[s_name_2][:,0], det_date + allowed_delta_mat[s_name][s_name_2] - TOL, side="left")
            idx = max(idx-1, 0)
            if idx < len(dets[s_name_2]):
                while dets[s_name_2][idx][0] < det_date + allowed_delta_mat[s_name][s_name_2] + TOL:
                    if dets[s_name_2][idx][0] in done:
                        idx += 1
                        if idx >= len(dets[s_name_2]):
                            break
                        continue
                    if dets[s_name_2][idx][0] > (det_date + allowed_delta_mat[s_name][s_name_2] - TOL):
                        candidates[s_name_2].append((s_name_2, dets[s_name_2][idx][0], dets[s_name_2][idx][1]))
                    idx += 1
                    if idx >= len(dets[s_name_2]):
                        break

    # we have our candidates and make our associations
    candidates_list = [dets for dets in candidates.values()]
    associations = list(itertools.product(*candidates_list))

    # check all associations are consistent (i.e. we know they are so with det_date but not if they are so together)
    new_associations = []
    for association in associations:
        consistent = True
        for i, (s_name_2, det_date_2, det_p_2) in enumerate(association):
            for (s_name_3, det_date_3, det_p_3) in association[i+1:]:
                if (det_date_3 - det_date_2 > allowed_delta_mat[s_name_2][s_name_3] + TOL or
                    det_date_3 - det_date_2 < allowed_delta_mat[s_name_2][s_name_3] - TOL):
                    consistent = False
        if consistent:
            new_associations.append(association)
    associations = new_associations

    if len(associations) == 0:
        continue

    best_a, best_loc = None, None
    # at this point we know we have consistent association(s). We check if the location inversion works
    for association in associations:
        association = [(s_name, det_date, det_p)] + list(association)  # we add the main detection to the association
        det_pos = [pos[s_name_2] for (s_name_2, _, _) in association]
        det_dates = [det_date_2 for (_, det_date_2, _) in association]

        try:
            loc = sound_model.localize_common_source(det_pos, det_dates)
        except LinAlgError:
            continue
        if best_loc is None or loc.cost < best_loc.cost:
            best_a = association
            best_loc = loc

    if best_a and best_loc.cost < 1:
        det_dates = [det_date_2 for (_, det_date_2, _) in best_a]
        date = np.min(det_dates) + datetime.timedelta(seconds=best_loc.x[0])

        for (s, d, p) in best_a:
            done.add(d)

        with open(out_dir, "a") as f:
            f.write(f'{best_loc.x[1]},{best_loc.x[2]},{date.strftime("%Y%m%d_%H%M%S")},{date.timestamp()},{np.mean(np.array(best_a)[:,2])},{best_loc.cost}\n')

  0%|          | 0/370152 [00:00<?, ?it/s]