In [None]:
import copy
import pickle
from pathlib import Path

import glob2
import datetime

import numpy as np
from scipy.signal import find_peaks
from tqdm.notebook import tqdm
import tensorflow as tf
from skimage.transform import resize
from matplotlib import pyplot as plt

from detection.clustering import get_valid, get_clusters, get_delta
from utils.data_reading.catalogs.ISC import ISC_file
from utils.data_reading.sound_data.sound_file_manager import NpyFilesManager
from utils.data_reading.sound_data.station import StationsCatalog
from utils.physics.bathymetry.bathymetry_grid import BathymetryGrid
from utils.physics.sound.sound_model import HomogeneousSoundModel
from utils.physics.sound.sound_velocity_grid import MonthlySoundVelocityGridOptimized
from utils.transformations.features_extractor import STFTFeaturesExtractor

In [None]:
year = 2018
isc_file = f"/home/plerolland/Bureau/catalogs/ISC/eqk_isc_{year}.txt"
bathy_model = BathymetryGrid.create_from_NetCDF("../../data/geo/GEBCO_2023_sub_ice_topo.nc", lat_bounds=[-75, 35], lon_bounds=[-20, 180])

isc = ISC_file(isc_file)

to_del = set()
for ID, event in isc.items.items():
    if bathy_model.get_nearest_values(event.get_pos()) > 0:
        to_del.add(ID)
    lat, lon = tuple(isc[ID].get_pos())
    if (lat>-5 and lon > 115) or (lat>-30 and lon > 130) or (lat>-45 and lon > 170) or (lat>-20 and lon > 85):
        to_del.add(ID)
for ID in to_del:
    del isc.items[ID]
print(f"{len(to_del)} terrestrial events removed from catalog ({len(isc.items)} remain)")

IDs = list(isc.items.keys())

In [None]:
max_allowed_delta = 50

delta = get_delta(isc, max_allowed_delta)

In [None]:
# expected run time of 30 min for above conditions
allowed_deltas_to_try = list(range(1, max_allowed_delta))
valid = {a_d: {ID: {} for ID in IDs} for a_d in allowed_deltas_to_try}
for allowed_delta in tqdm(allowed_deltas_to_try):
    valid = get_valid(allowed_delta, delta, IDs)

In [None]:
clusters = {}
for allowed_delta in tqdm(allowed_deltas_to_try):
    clusters[allowed_delta] = get_clusters(IDs, valid[allowed_delta])

In [None]:
med_nb, med_delta, med_delta_km, med_delta_d, max_nb, max_delta, max_delta_km, max_delta_d = {}, {}, {}, {}, {}, {}, {}, {}

for a_d, current_clusters in tqdm(clusters.items()):
    nb = []
    
    d, d_km, d_d = [], [], []
    for cluster in tqdm(current_clusters.values(), leave=False):
        nb.append(len(cluster))
        d_, d_km_, d_d_ = [], [], []
        if len(cluster) > 1:
            for ID in cluster:
                for ID_ in cluster:
                    if ID_ not in delta[ID]:  # they were too distant away (in time) to have a delta
                        pos1, pos2, date1, date2 = isc[ID].get_pos(), isc[ID_].get_pos(), isc[ID].date, isc[ID_].date
                        delta_km[ID][ID_] = np.sqrt((pos1[0]-pos2[0])**2+(pos1[1]-pos2[1])**2)*111
                        delta_km[ID_][ID] = delta_km[ID][ID_]
                        delta_d[ID][ID_] = np.abs(date1-date2).total_seconds() / 86400
                        delta_d[ID_][ID] = delta_d[ID][ID_]
                        delta[ID][ID_] = np.sqrt(delta_km[ID][ID_] ** 2 + delta_d[ID][ID_] ** 2)
                        delta[ID_][ID] = delta[ID][ID_]
                    d_.append(delta[ID][ID_])
                    d_km_.append(delta_km[ID][ID_])
                    d_d_.append(delta_d[ID][ID_])
            d.append(np.max(d_))
            d_km.append(np.max(d_km_))
            d_d.append(np.max(d_d_))
    med_nb[a_d] = np.median(nb)
    med_delta[a_d] = np.median(d)
    med_delta_km[a_d] = np.median(d_km)
    med_delta_d[a_d] = np.median(d_d)
    max_nb[a_d] = np.max(nb)
    max_delta[a_d] = np.max(d)
    max_delta_km[a_d] = np.max(d_km)
    max_delta_d[a_d] = np.max(d_d)

In [None]:
plt.scatter(list(med_nb.keys()), list(med_nb.values()), label="med nb of events per cluster")
plt.scatter(list(max_nb.keys()), list(max_nb.values()), label="max nb of events per cluster")
plt.legend()
plt.xlabel("max allowed delta")
plt.ylabel("nb of events per cluster")

In [None]:
plt.scatter(list(med_delta.keys()), list(med_delta.values()), label="med delta")
plt.scatter(list(med_delta_km.keys()), list(med_delta_km.values()), label="med delta km")
plt.scatter(list(med_delta_d.keys()), list(med_delta_d.values()), label="med delta d")
plt.legend()
plt.xlabel("max allowed delta")
plt.ylabel("km or days")

In [None]:
plt.scatter(list(max_delta.keys()), list(max_delta.values()), label="max delta")
plt.scatter(list(max_delta_km.keys()), list(max_delta_km.values()), label="max delta km")
plt.scatter(list(max_delta_d.keys()), list(max_delta_d.values()), label="max delta d")
plt.legend()
plt.xlabel("max allowed delta")
plt.ylabel("km or days")