In [None]:
import numpy as np
import glob2
import datetime
from pathlib import Path
from tqdm.notebook import tqdm
import pickle
from matplotlib import pyplot as plt
from utils.detection.association_geodesic import squarize
import matplotlib as mpl
import matplotlib.dates as mdates
from scipy import stats
from utils.physics.geodesic.distance import distance_point_point

plt.style.use('classic')
plt.rcParams.update({
    "pgf.texsystem": "pdflatex",
    "text.usetex": True,
    "font.family": "serif",
    "font.size": 10,
    "axes.titlesize": 10,
    "axes.labelsize": 10,
    "xtick.labelsize": 8,
    "ytick.labelsize": 8,
    "legend.fontsize": 8,
})
from matplotlib import rc
rc('font', **{'family': 'serif', 'serif': ['Computer Modern']})
rc('text', usetex=True)
import matplotlib.ticker as ticker
import math
from numpy.linalg import LinAlgError
import pandas as pd
from scipy.interpolate import RegularGridInterpolator
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score

from utils.data_reading.sound_data.station import StationsCatalog
from utils.physics.sound_model.spherical_sound_model import GridSphericalSoundModel as GridSoundModel, MonthlyHomogeneousSphericalSoundModel as HomogeneousSoundModel
from utils.detection.association_geodesic import compute_candidates, update_valid_grid, update_results, load_detections, compute_grids

In [None]:
STATIONS = StationsCatalog("/media/plerolland/akoustik/MAHY").filter_out_undated().filter_out_unlocated()
match_files = glob2.glob("../../../../data/MAHY/loc_3D/twin-cat/*_raw_OBS-fixed.csv")
cat = df_S = pd.read_csv(
    "../../../../data/MAHY/lavayssiere_and_public.csv", header=None, names=["date","lat","lon","depth","mb"], parse_dates=["date"]
)
match = {}
for f in match_files:
    d = f.split("/")[-1].split("_")[0]
    match[d] = pd.read_csv(f, parse_dates=['date'] + [s.name for s in STATIONS.by_dataset(d)])
match_merged = pd.concat(list(match.values()))
clock_corrections = pd.read_csv("../../../../data/detection/TiSSNet_Pn_raw_repicked/corrections_fixed-intercept.csv", names=["s","intercept","slope","u"], header=None).set_index('s')

asso_catalog = pd.read_csv("../../../../data/MAHY/loc_3D/P_association_catalog.csv").sort_values("mb")

In [None]:
d_v_seismo, d_v_hydro, d_h_seismo, RL, mb = [], [], [], [], []

DELTA = datetime.timedelta(seconds=30)

for dataset in match.keys():
    for idx in tqdm(match[dataset].index):
        a = match[dataset].loc[idx]
        rms_10_local = []
        for s in STATIONS.by_dataset(dataset):
            if "43" in s.name:
                s.path = "/media/plerolland/akoustik/MAHY/MAHY4_fixed/MAHY43"
            s.other_kwargs["raw"] = True
            if not type(a[s.name]) == pd._libs.tslibs.nattype.NaTType:
                mb.append(a["mb"])
                c = clock_corrections.loc[s.name]["intercept"] + clock_corrections.loc[s.name]["slope"] * (a[s.name]-s.date_start).total_seconds() * 10**-6
                c = datetime.timedelta(seconds=c)

                hydro_dep, local_bathy = s.other_kwargs["depth"], s.other_kwargs["bathy"]
                d_h_seismo.append(distance_point_point([a["lat"],a["lon"]], s.get_pos()))
                d_v_hydro.append(local_bathy-hydro_dep)
                d_v_seismo.append(a["depth"]*1000 - local_bathy)

                data = s.get_manager().get_segment(a[s.name]+c,a[s.name]+c+DELTA)
                RL.append(np.sqrt(np.mean(data**2)))

In [None]:
import matplotlib.pyplot as plt
import numpy as np
from scipy.stats import pearsonr

d_v_seismo, d_v_hydro, d_h_seismo, RL, mb = np.array(d_v_seismo), np.array(d_v_hydro), np.array(d_h_seismo), np.array(RL), np.array(mb)
d_seismo = np.sqrt(d_v_seismo**2 + d_h_seismo**2)
d_seismo_log, d_v_hydro_log, RL_log = np.log10(d_seismo), np.log10(d_v_hydro), np.log10(RL)

valid_mask = (~np.isnan(d_v_seismo)) & (~np.isnan(d_v_hydro)) & (~np.isnan(d_h_seismo)) & \
             (~np.isnan(RL,)) & (~np.isnan(mb)) & (~np.isnan(d_seismo_log)) & (~np.isnan(d_v_hydro_log)) & (~np.isnan(RL_log))

corr_mb_RL, _ = pearsonr(mb[valid_mask], np.log10(RL[valid_mask]))
print(f"Correlation of mb and RL : {corr_mb_RL}")

In [None]:
from sklearn.linear_model import LinearRegression

#X = np.column_stack([RL, RL_log, d_seismo_log, d_seismo, d_v_hydro_log, d_v_hydro])[valid_mask]
#X = np.column_stack([RL_log, d_seismo_log, d_v_hydro_log, d_v_hydro])[valid_mask]
#X = np.column_stack([RL_log, d_seismo_log, d_v_hydro_log])[valid_mask]
#X = np.column_stack([RL_log, d_seismo_log])[valid_mask]
X = np.column_stack([20*RL_log, d_seismo_log])[valid_mask]
y = mb[valid_mask]
print(len(X))

reg = LinearRegression().fit(X, y)


print(reg.coef_[0], reg.coef_[1:] / reg.coef_[0])
print(reg.intercept_)


# 0*RL + 0.18 * log10(RL) -0.045 * log(dseis) + 0*d_seismo -1.36 * log(d_h) + 0.0032 d_h
# 0.11661705 RL + 0.10029726 d - 10.0396188 -> mb = 0.11661705* (10*log10(RL) + (0.10029726/0.11661705) * 10*log10(d)) -10.039618895196403
# -> mb = 0.117 * (10logRL + 0.860logd) - 10.040
# 0.117 * (10logRlc  + 8.63 * d)

xb = (20*RL_log + 17.201 * d_seismo_log)
plt.scatter(xb, mb)

In [None]:
x, detected, OBS, percent = [], [], [], []
match_merged = match_merged.sort_values("mb")
for m in match_merged["mb"]:
    det = np.count_nonzero(match_merged["mb"]>m)
    det_cat = np.count_nonzero(cat["mb"]>m)

    if det_cat > 0:
        x.append(m)
        detected.append(det)
        OBS.append(det_cat)
        percent.append(100 * det/det_cat)

fig, axs = plt.subplots(1, 2)
width_in = 5.5
height_in = width_in / 2
fig.set_size_inches(width_in, height_in)
fig.patch.set_facecolor('white')
fig.subplots_adjust(wspace=0.2)

ax1 = axs[1]
ax1.scatter(x, np.log10(OBS), color="black", label="Reference", s=1)
ax1.scatter(x, np.log10(detected), color="royalblue", label="Matched", s=1, alpha=0.8)
ax1.scatter(asso_catalog["mb"], np.log10(np.arange(1,1+len(asso_catalog)))[::-1], color="green", label="Associations", s=1)
ax1.set_xlabel("$m_b$", labelpad=-4)
ax1.set_ylabel("log$_{10}$(N)", color='black', labelpad=0.6)
ax1.set_xlim(0, 4.5)
ax1.set_ylim(0, ymax:=5)
ax1.set_xticks([0,1.5,3,4.5])
ax1.tick_params(axis='x', which='major', direction='out', length=7, top=False, labelcolor='black')
minor_locator = ticker.MultipleLocator(0.5)
ax1.xaxis.set_minor_locator(minor_locator)
ax1.tick_params(axis='x', which='minor', direction='out', length=4, labelbottom=False, top=False, labelcolor='black')
ax1.grid(True, linestyle='--', alpha=0.7)
ax1.grid(True, which='minor', linestyle=':', linewidth=0.5, alpha=0.5)

yticks_detected = yticks_percent * ymax / 100
ax1.set_yticks(yticks_detected)
lines_1, labels_1 = ax1.get_legend_handles_labels()
leg = ax1.legend(lines_1 + lines_2, labels_1 + labels_2, bbox_to_anchor=(0.62, 0.31), borderaxespad=0.)

ax3 = axs[0]
ax3.scatter(x_vals := mb[valid_mask], y_vals := xb[valid_mask], color='seagreen', s=5, alpha=0.8)
ax3.set_xlabel("$m_b$", labelpad=-4)
ax3.set_ylabel("Source Level (dB re $1 \mu Pa^2$ re 1 m)", labelpad=0.75)
ax3.set_xlim(0, 4.5)
#ax3.set_ylim(150, 210)
ax3.set_xticks([0,1.5,3,4.5])
ax3.tick_params(axis='x', which='major', direction='out', length=7, top=False)
ax3.xaxis.set_minor_locator(minor_locator)
ax3.tick_params(axis='x', which='minor', direction='out', length=4, labelbottom=False, top=False)
ax3.grid(True, linestyle='--', alpha=0.7)
ax3.grid(True, which='minor', linestyle=':', linewidth=0.5, alpha=0.5)

##### REGRESSION
model = LinearRegression()
reg = model.fit(x_vals.reshape((-1,1)), y_vals.reshape((-1,1)))
slope, intercept = reg.coef_[0][0], reg.intercept_[0]
x_range = np.linspace(min(mb[valid_mask]), max(mb[valid_mask]), 100).reshape(-1, 1)
#slope, intercept = 14.8, 141.3
r2 = r2_score(y_vals, intercept + slope*x_vals)
y_pred = x_range*slope + intercept
ax3.plot(x_range, y_pred, color='firebrick', linewidth=2, label=f'$R^2$={r2:.2f}\nSL={slope:.1f}$m_b$+{intercept:.1f}')
ax3.legend(loc='best')

plt.savefig(
    f'../../../../data/MAHY/figures/mb_SL_regressed-d.pdf',
    dpi=500,
    bbox_inches='tight',
    pad_inches=0
)

In [None]:
ùprint(np.max(percent))
print(x[np.argmax(percent)])