In [None]:
import numpy as np
import glob2
import datetime
from pathlib import Path
from tqdm.notebook import tqdm
import pickle
import matplotlib.cm as cm
import matplotlib.colors as mcolors
from matplotlib import pyplot as plt
from utils.detection.association_geodesic import squarize
import matplotlib as mpl
import matplotlib.dates as mdates
from scipy import stats
from utils.physics.geodesic.distance import distance_point_point
import matplotlib.patheffects as path_effects

plt.style.use('classic')
plt.rcParams.update({
    "pgf.texsystem": "pdflatex",
    "text.usetex": True,
    "font.family": "serif",
    "font.size": 10,
    "axes.titlesize": 10,
    "axes.labelsize": 10,
    "xtick.labelsize": 8,
    "ytick.labelsize": 8,
    "legend.fontsize": 8,
})
from matplotlib import rc
rc('font', **{'family': 'serif', 'serif': ['Computer Modern']})
rc('text', usetex=True)
import matplotlib.ticker as ticker
import math
from numpy.linalg import LinAlgError
import pandas as pd
from scipy.interpolate import RegularGridInterpolator
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score

from utils.data_reading.sound_data.station import StationsCatalog
from utils.physics.sound_model.spherical_sound_model import GridSphericalSoundModel as GridSoundModel, MonthlyHomogeneousSphericalSoundModel as HomogeneousSoundModel
from utils.detection.association_geodesic import compute_candidates, update_valid_grid, update_results, load_detections, compute_grids

In [None]:
STATIONS = StationsCatalog("/media/plerolland/akoustik/MAHY").filter_out_undated().filter_out_unlocated()
match_files = glob2.glob("../../../../data/MAHY/loc_3D/twin-cat/*_raw_OBS-fixed.csv")
df_S = pd.read_csv(
    "../../../../data/MAHY/lavayssiere_and_public.csv", header=None, names=["date","lat","lon","depth","mb"], parse_dates=["date"]
).sort_values("mb")
match = {}
for f in match_files:
    d = f.split("/")[-1].split("_")[0]
    match[d] = pd.read_csv(f, parse_dates=['date'] + [s.name for s in STATIONS.by_dataset(d)])
match_merged = pd.concat(list(match.values()))
clock_corrections = pd.read_csv("../../../../data/detection/TiSSNet_Pn_raw_repicked/corrections_fixed-intercept.csv", names=["s","intercept","slope","u"], header=None).set_index('s')

asso_catalog = pd.read_csv("../../../../data/MAHY/loc_3D/P_association_catalog.csv").sort_values("mb")

In [None]:
d, mb, peaks, peaks_10, rms, rms_10, mb_event, rms_10_event = [], [], [], [], [], [], [], []
DELTA = datetime.timedelta(seconds=1)
DELTA_10 = datetime.timedelta(seconds=30)

for dataset in match.keys():
    for idx in tqdm(match[dataset].index):
        a = match[dataset].loc[idx]
        rms_10_local = []
        for s in STATIONS.by_dataset(dataset):
            if "43" in s.name:
                s.path = "/media/plerolland/akoustik/MAHY/MAHY4_fixed/MAHY43"
            s.other_kwargs["raw"] = True
            if not type(a[s.name]) == pd._libs.tslibs.nattype.NaTType:
                d.append(distance_point_point([a["lat"],a["lon"]], s.get_pos()))
                mb.append(a["mb"])
                c = clock_corrections.loc[s.name]["intercept"] + clock_corrections.loc[s.name]["slope"] * (a[s.name]-s.date_start).total_seconds() * 10**-6
                c = datetime.timedelta(seconds=c)

                data = s.get_manager().get_segment(a[s.name]+c-DELTA,a[s.name]+c+DELTA)
                peaks.append(np.max(data**2))
                rms.append(np.mean(data**2))

                data = s.get_manager().get_segment(a[s.name]+c,a[s.name]+c+DELTA_10)
                peaks_10.append(np.max(data**2))
                rms_10.append(np.mean(data**2))
                rms_10_local.append(17.201 * np.log10(d[-1]) + 10*np.log10(rms_10[-1]))
        mb_event.append(a["mb"])
        rms_10_event.append(10*np.log10(np.nanmean(10**(np.array(rms_10_local)/10))))

In [None]:
import matplotlib.pyplot as plt
import numpy as np
from scipy.stats import pearsonr

peaks, peaks_10, rms, rms_10, mb, d = np.array(peaks), np.array(peaks_10), np.array(rms), np.array(rms_10), np.array(mb), np.array(d)
mb_event, rms_10_event = np.array(mb_event), np.array(rms_10_event)
valid_mask = (~np.isnan(mb)) & (~np.isnan(peaks)) & (~np.isnan(peaks_10)) & \
             (~np.isnan(rms)) & (~np.isnan(rms_10)) & \
             (~np.isinf(mb)) & (~np.isinf(peaks)) & (~np.isinf(peaks_10)) & \
             (~np.isinf(rms)) & (~np.isinf(rms_10))

valid_mask_event = (~np.isinf(mb_event)) & (~np.isnan(mb_event)) & (~np.isinf(rms_10_event))

# Exemple de calcul des coefficients de corrélation de Pearson
corr_peaks, _ = pearsonr(mb[valid_mask], np.log10(peaks[valid_mask]))
corr_peaks_10, _ = pearsonr(mb[valid_mask], np.log10(peaks_10[valid_mask]))
corr_rms, _ = pearsonr(mb[valid_mask], np.log10(rms[valid_mask]))
corr_rms_10, _ = pearsonr(mb[valid_mask], np.log10(rms_10[valid_mask]))
corr_peaks_d, _ = pearsonr(mb[valid_mask], np.log10(d[valid_mask]) + np.log10(peaks[valid_mask]))
corr_peaks_10_d, _ = pearsonr(mb[valid_mask], np.log10(d[valid_mask]) + np.log10(peaks_10[valid_mask]))
corr_rms_d, _ = pearsonr(mb[valid_mask], np.log10(d[valid_mask]) + np.log10(rms[valid_mask]))
corr_rms_10_d, _ = pearsonr(mb[valid_mask], 1*np.log10(d[valid_mask]) + np.log10(rms_10[valid_mask]))
print(pearsonr(mb_event[valid_mask_event], rms_10_event[valid_mask_event]))


# Affichage des corrélations
print(f"Corrélation mb vs peaks: {corr_peaks:.3f}")
print(f"Corrélation mb vs peaks_10: {corr_peaks_10:.3f}")
print(f"Corrélation mb vs rms: {corr_rms:.3f}")
print(f"Corrélation mb vs rms_10: {corr_rms_10:.3f}")
print(f"Corrélation mb vs peaks (d-fixed): {corr_peaks_d:.3f}")
print(f"Corrélation mb vs peaks_10 (d-fixed): {corr_peaks_10_d:.3f}")
print(f"Corrélation mb vs rms (d-fixed): {corr_rms_d:.3f}")
print(f"Corrélation mb vs rms_10 (d-fixed): {corr_rms_10_d:.3f}")

# Tracé des variables en fonction de mb
plt.figure(figsize=(10, 6))

plt.scatter(mb[valid_mask], np.log10(peaks[valid_mask]), label=f'peaks (r={corr_peaks:.2f})', color="blue")
plt.scatter(mb[valid_mask], np.log10(peaks_10[valid_mask]), label=f'peaks_10 (r={corr_peaks_10:.2f})', color="red")
plt.scatter(mb[valid_mask], np.log10(rms[valid_mask]), label=f'rms (r={corr_rms:.2f})', color="orange")
plt.scatter(mb[valid_mask], np.log10(rms_10[valid_mask]), label=f'rms_10 (r={corr_rms_10:.2f})', color="pink")

plt.xlabel('mb')
plt.ylabel('Valeurs')
plt.legend()
plt.title('Comparaison des variables en fonction de mb')
plt.grid(True)
plt.show()

# 1 avant 1 après : 0.685
# 10 avant 10 après : 0.794
# 1 avant 10 après : 0.801
# 0 avant 10 après : 0.802
# 0 avant 20 après : 0.822
# 0 avant 30 après : 0.825
# 0 avant 40 après : 0.826
# 0 avant 50 après : 0.825
# 0 avant 50 après + 1*log10(d): 0.836
# 0 avant 30 après + 0*log10(d): 0.871

In [None]:
asso_catalog = pd.read_csv("../../../../data/MAHY/loc_3D/P_association_catalog.csv", parse_dates=["date"]).sort_values("mb")

match_merged, df_S = match_merged.sort_values("mb"), df_S.sort_values("mb")

fig, axs = plt.subplots(1, 2)
width_in = 5.5
height_in = width_in / 2
fig.set_size_inches(width_in, height_in)
fig.patch.set_facecolor('white')
fig.subplots_adjust(wspace=0.2)

ax1 = axs[1]
ax1.scatter(asso_catalog["mb"], np.log10(np.arange(1,1+len(asso_catalog)))[::-1], color="royalblue", label=f"TAPAAs ({len(asso_catalog)})", s=1)
ax1.scatter(df_S["mb"], np.log10(np.arange(1,1+len(df_S)))[::-1], color="black", label=f"Reference ({len(df_S)})", s=1)
ax1.scatter(match_merged["mb"], np.log10(np.arange(1,1+len(match_merged)))[::-1], color="seagreen", label=f"Matched ({len(match_merged)})", s=1, alpha=0.8)
# p.float64(-0.9598943780038178), array([5.40140672]
b_val = 0.93
ax1.plot([2.5,3.5], [3.5, 3.5 - b_val * 1], linestyle="-", color="royalblue", linewidth=1.5)
angle = np.degrees(np.arctan(-b_val))
txt1 = ax1.text(2.7, 3.35, f"b={b_val:.2f}",
                color="royalblue", fontweight='bold',
                ha='left', va='bottom', rotation=angle-3.5,
                rotation_mode='anchor')
txt1.set_path_effects([
    path_effects.Stroke(linewidth=5, foreground='white'),
    path_effects.Normal()
])
txt2 = ax1.text(0.675, 0.84, "$m_{b_c}$=1.5", color="royalblue", fontweight='bold', transform=plt.gcf().transFigure)
for txt in [txt1, txt2]:
    txt.set_path_effects([
        path_effects.Stroke(linewidth=4, foreground='white'),
        path_effects.Normal()
    ])

ax1.axvline(x=1.5, color='royalblue', linestyle='--', linewidth=1.2)
x_arrow, y_arrow = 1.5, np.log10(np.count_nonzero(asso_catalog["mb"]>1.5))
#ax1.annotate('$m_{b_c}$=1.5', xy=(x_arrow, y_arrow), xytext=(x_arrow+1, y_arrow+0.1),
#            arrowprops=dict(facecolor='royalblue', shrink=0.05, lw=0.1, width=2, color="royalblue"))
ax1.set_xlabel("$m_b$", labelpad=-4)
ax1.set_ylabel("log$_{10}$(N)", color='black', labelpad=0.6)
ax1.set_xlim(0, 4.5)
ax1.set_ylim(0, ymax:=4.5)
ax1.set_xticks([0,1.5,3,4.5])
ax1.set_yticks([0,1,2,3,4])
ax1.tick_params(axis='x', which='major', direction='out', length=7, top=False, labelcolor='black')
minor_locator = ticker.MultipleLocator(0.5)
ax1.xaxis.set_minor_locator(minor_locator)
ax1.tick_params(axis='x', which='minor', direction='out', length=4, labelbottom=False, top=False, labelcolor='black')
ax1.grid(True, linestyle='--', alpha=0.7)
ax1.grid(True, which='minor', linestyle=':', linewidth=0.5, alpha=0.5)

ax1.legend(bbox_to_anchor=(0.703, 0.26), borderaxespad=0., markerscale=3, numpoints=1)

ax3 = axs[0]
ax3.scatter(x_vals := mb_event[valid_mask_event], y_vals := rms_10_event[valid_mask_event], color='seagreen', s=5, alpha=0.8)
ax3.set_xlabel("$m_b$", labelpad=-4)
ax3.set_ylabel("Source Level (dB re $1 \mu Pa^2$ re 1 m)", labelpad=0.75)
ax3.set_xlim(0, 4.5)
ax3.set_ylim(185, 235)
ax3.set_xticks([0,1.5,3,4.5])
ax3.tick_params(axis='x', which='major', direction='out', length=7, top=False)
ax3.xaxis.set_minor_locator(minor_locator)
ax3.tick_params(axis='x', which='minor', direction='out', length=4, labelbottom=False, top=False)
ax3.grid(True, linestyle='--', alpha=0.7)
ax3.grid(True, which='minor', linestyle=':', linewidth=0.5, alpha=0.5)

##### REGRESSION
OBS_reg = df_S[df_S["mb"] > 2.5]
x, y = np.array(OBS_reg["mb"]), np.log10(np.arange(1,1+len(OBS_reg)))[::-1]
model = LinearRegression()
model.fit(x.reshape((-1,1)), y.reshape((-1,1)))
print(model.coef_, model.intercept_)

model = LinearRegression()
model.fit(x_vals.reshape((-1,1)), y_vals.reshape((-1,1)))
print(model.coef_, model.intercept_)
x_range = np.linspace(min(mb[valid_mask]), max(mb[valid_mask]), 100).reshape(-1, 1)
slope, intercept = 14.2, 178
r2 = r2_score(y_vals, intercept + slope*x_vals)
y_pred = x_range*slope + intercept
ax3.plot(x_range, y_pred, color='firebrick', linewidth=2, label=f'$R^2$={r2:.2f}\nSL={slope:.1f}$m_b$+{intercept:.1f}')
ax3.legend(loc='best')

plt.savefig(
    f'../../../../data/MAHY/figures/mb_SL.pdf',
    dpi=500,
    bbox_inches='tight',
    pad_inches=0
)

In [None]:
vs, rs, s = [], [], []

for v in np.linspace(0,3.5,1000):
    x = np.array(sorted(asso_catalog["mb"]))
    x, y = x, np.log10(np.arange(1,1+len(x)))[::-1]
    mask = (x > v) & (x < 4)
    x, y = x[mask], y[mask]
    if len(x) == 0:
        continue
    model_SL = LinearRegression()
    model_SL.fit(x.reshape((-1,1)), y.reshape((-1,1)))
    r2 = r2_score(y, model_SL.predict(x.reshape((-1,1))))
    vs.append(v)
    rs.append(r2)
    s.append((model_SL.coef_[0][0], model_SL.intercept_[0]))
plt.plot(vs,rs)
print(vs[np.argmax(rs)], s[np.argmin(abs(np.array(vs)-1.5))])

In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import t

start = datetime.datetime(2020,8,1)
end = datetime.datetime(2024,10,1)
step = datetime.timedelta(days=30)
overlap = 25

dates = []
slopes = []
slope_errs = []
counts = []
r2s = []

while start <= end:
    e = asso_catalog[(asso_catalog["date"] > start - step/2) & (asso_catalog["date"] < start + step/2)]
    start += (step * (1-overlap/100))
    x = np.array(sorted(e["mb"]))
    x, y = x, np.log10(np.arange(1,1+len(x)))[::-1]

    mask = (x > 1.5) & (x < 4)
    x, y = x[mask], y[mask]

    start += step

    if len(x) < 5:
        continue

    model = LinearRegression()
    model.fit(x.reshape(-1,1), y.reshape(-1,1))

    slope = model.coef_[0,0]

    y_pred = model.predict(x.reshape(-1,1)).flatten()
    residuals = y - y_pred
    n = len(x)
    dof = n - 2
    s_err = np.sqrt(np.sum(residuals**2) / dof)
    SE_slope = s_err / np.sqrt(np.sum( (x - np.mean(x))**2 ))
    t_val = t.ppf(0.975, dof)
    slope_ci95 = t_val * SE_slope

    r2_val = r2_score(y, model.predict(x.reshape(-1,1)))

    dates.append(start)
    counts.append(len(x))
    slopes.append(-slope)
    slope_errs.append(slope_ci95)
    r2s.append(r2_val)

fig, ax = plt.subplots()
cmap = cm.viridis
norm = mcolors.Normalize(vmin=min(r2s), vmax=max(r2s))
colors = cmap(norm(r2s))
for i in range(len(dates)):
    ax.errorbar(dates[i], slopes[i], yerr=slope_errs[i],
                fmt='o', c=colors[i], ecolor=colors[i], capsize=3)
sm = plt.cm.ScalarMappable(cmap=cmap, norm=norm)
sm.set_array([])
cbar = plt.colorbar(sm, ax=ax)
cbar.set_label("$R^2$", rotation=0, labelpad=20)
for x_val, y_val, n, err in zip(dates, slopes, counts, slope_errs):
    plt.text(x_val, y_val - 0.03 - 1*err/np.max(slopes), str(n), ha='center', va='bottom')

plt.xlim(datetime.datetime(2020,8,1), datetime.datetime(2024,10,1))
plt.xlabel("Time")
plt.ylabel("Slope $(log(N)/m_b)$")
plt.grid(True)
plt.tight_layout()
fig.patch.set_facecolor('white')
plt.show()

In [None]:
print(np.count_nonzero(asso_catalog["mb"] > 3.25))

In [None]:
# Données triées par date
df_sorted = asso_catalog.sort_values("date").reset_index(drop=True)

window_size = 200
overlap = 0.5
step = int(window_size * (1 - overlap))

dates_start = []
dates_end = []
dates_middle = []
slopes = []
slope_errs = []
r2s = []

for i in tqdm(range(0, len(df_sorted) - window_size + 1, step)):
    group = df_sorted.iloc[i:i+window_size]

    x = np.array(sorted(group["mb"]))
    x, y = x, np.log10(np.arange(1, 1 + len(x)))[::-1]

    mask = (x > 1.5) & (x < 4)
    x, y = x[mask], y[mask]

    if len(x) < 5:
        continue

    model = LinearRegression()
    model.fit(x.reshape(-1,1), y.reshape(-1,1))
    slope = model.coef_[0,0]

    y_pred = model.predict(x.reshape(-1,1)).flatten()
    residuals = y - y_pred
    n = len(x)
    dof = n - 2
    s_err = np.sqrt(np.sum(residuals**2) / dof)
    SE_slope = s_err / np.sqrt(np.sum( (x - np.mean(x))**2 ))
    t_val = t.ppf(0.975, dof)
    slope_ci95 = t_val * SE_slope

    r2_val = r2_score(y, model.predict(x.reshape(-1,1)))

    d_start = group["date"].iloc[0]
    d_end = group["date"].iloc[-1]
    d_middle = d_start + (d_end - d_start) / 2

    dates_start.append(d_start)
    dates_end.append(d_end)
    dates_middle.append(d_middle)
    slopes.append(-slope)
    slope_errs.append(slope_ci95)
    r2s.append(r2_val)

fig, ax = plt.subplots(figsize=(5.5,4))

cmap = cm.plasma
norm = mcolors.Normalize(vmin=min(r2s), vmax=max(r2s))
colors = cmap(norm(r2s))

# Points + "barre" temporelle
for i in range(len(dates_middle)):
    # Span horizontal (date_start à date_end)
    ax.hlines(y=slopes[i], xmin=dates_start[i], xmax=dates_end[i],
              color=colors[i], linewidth=2, alpha=0.8)

    ax.errorbar(dates_middle[i], slopes[i], yerr=slope_errs[i],
                fmt='o', color=colors[i], ecolor=colors[i], capsize=3)

plt.xlim(datetime.datetime(2020,8,1), datetime.datetime(2024,10,1))

# X to dates
ax.xaxis_date()

# Colorbar
sm = plt.cm.ScalarMappable(cmap=cmap, norm=norm)
sm.set_array([])
cbar = plt.colorbar(sm, ax=ax)
cbar.set_label("$R^2$", rotation=0, labelpad=20)
fig.patch.set_facecolor('white')

plt.savefig(
    f'../../../../data/MAHY/figures/b-value_200_50-overlap.pdf',
    dpi=500,
    bbox_inches='tight',
    pad_inches=0
)

In [None]:
def dates_format(x, pos):
    dt = mdates.num2date(x)
    return r'\shortstack{%s\\%s}' % (dt.strftime('%d/%m'), dt.strftime('%Y'))

fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(5.5, 3))
fig.patch.set_facecolor('white')
fig.subplots_adjust(wspace=0.25)

# --- (ax1) ---
e_end = datetime.datetime(2020,12,5)
y = (2020,2021,2022,2023,2024,2025)
data = {y[i] : asso_catalog[(asso_catalog["date"] > datetime.datetime(y[i],1,1)) & (asso_catalog["date"] < datetime.datetime(y[i+1],1,1))] for i in range(0,len(y)-1)}
cmap = plt.get_cmap("plasma")
norm = plt.Normalize(vmin=y[0], vmax=y[-1])
for year, data in data.items():
    ax1.scatter(
        data["mb"],
        np.log10(np.arange(1, 1 + len(data)))[::-1],
        label=f"{year} ({len(data)})",
        s=5,
        color=cmap(norm(year))
    )
ax1.legend(loc="best", markerscale=1)

ax1.set_xlabel("$m_b$", labelpad=-4)
ax1.set_ylabel("log$_{10}$(N)", color='black', labelpad=0.6)
ax1.set_xlim(0, 4.5)
ax1.set_ylim(0, 4)
ax1.set_xticks([0, 1.5, 3, 4.5])
ax1.set_yticks([0, 1, 2, 3, 4])
ax1.tick_params(axis='x', which='major', direction='out', length=7, top=False, labelcolor='black')

minor_locator = ticker.MultipleLocator(0.5)
ax1.xaxis.set_minor_locator(minor_locator)
ax1.tick_params(axis='x', which='minor', direction='out', length=4, labelbottom=False, top=False, labelcolor='black')

ax1.grid(True, linestyle='--', alpha=0.7)
ax1.grid(True, which='minor', linestyle=':', linewidth=0.5, alpha=0.5)

# --- (ax2) ---
dates_middle, slopes, slope_errs = np.array(dates_middle), np.array(slopes), np.array(slope_errs)
masks = {y[i] : (dates_middle > datetime.datetime(y[i],1,1)) & (dates_middle < datetime.datetime(y[i+1],1,1)) for i in range(0,len(y)-1)}
cmap = plt.get_cmap("plasma")
norm = plt.Normalize(vmin=y[0], vmax=y[-1])
for year, mask in masks.items():
    ax2.scatter(
        dates_middle[mask],
        slopes[mask],
        marker="o",
        label=f"{year} ({len(data)})",
        s=10,
        color=cmap(norm(year))
    )
    ax2.errorbar(dates_middle[mask], slopes[mask], yerr=slope_errs[mask], color=cmap(norm(year)), ls = "None")

ax2.set_xlim(start := datetime.datetime(2020, 8, 1), end := datetime.datetime(2024, 10, 1))
ax2.set_ylabel("b-value")
ax2.xaxis_date()
ax2.xaxis.set_major_locator(mdates.MonthLocator(bymonth=[1]))
ax2.xaxis.set_minor_locator(mdates.MonthLocator(bymonth=range(1, 13)))
ax2.xaxis.set_major_formatter(dates_format)
ax2.tick_params(axis='x', labelrotation=0)
ax2.set_ylim(0.6,1.6)

### PHASES
date_shift = datetime.datetime(2021,6,1)
ax2.axvline(x=date_shift, color='red', linewidth=1.5, alpha=0.5)
y_phase = 1.45
x_text = date_shift + datetime.timedelta(days=150)
y_text = y_phase
ax2.text(x_text, y_text, f"Phase I \n $\mu$={np.mean(slopes[dates_middle<date_shift]):.2f}", ha='left', va='center',
         fontweight='bold', bbox=dict(boxstyle="round,pad=0.2", facecolor="white", edgecolor="gray", alpha=0.9))
arrow_target = start + (date_shift - start) / 2
ax2.annotate('', xy=(arrow_target, y_phase - 0.005), xytext=(x_text, y_text),
             arrowprops=dict(arrowstyle='->', color='black', lw=1))

ax2.text(date_shift + 0.75*(end - date_shift) / 2, y_phase - 0.005 - 0.2, f"Phase II \n $\mu$={np.mean(slopes[dates_middle>date_shift]):.2f}", ha='left', va='center',
         fontweight='bold', bbox=dict(boxstyle="round,pad=0.2", facecolor="white", edgecolor="gray", alpha=0.9))

plt.savefig(
    f'../../../../data/MAHY/figures/b-value_200_50-overlap_GR.pdf',
    dpi=500,
    bbox_inches='tight',
    pad_inches=0
)


print(np.mean(slopes[(dates_middle>date_shift) & (dates_middle<datetime.datetime(2021,8,1))]))

In [None]:
df_sorted = asso_catalog.sort_values("date").reset_index(drop=True)

window_size = 200
overlap = 0.98
step = int(window_size * (1 - overlap))

dates_start = []
dates_end = []
dates_middle = []
slopes = []
slope_errs = []
r2s = []

for i in tqdm(range(0, len(df_sorted) - window_size + 1, step)):
    group = df_sorted.iloc[i:i+window_size]

    x = np.array(sorted(group["mb"]))
    x, y = x, np.log10(np.arange(1, 1 + len(x)))[::-1]

    mask = (x > 1.5) & (x < 4)
    x, y = x[mask], y[mask]

    if len(x) < 5:
        continue

    model = LinearRegression()
    model.fit(x.reshape(-1,1), y.reshape(-1,1))
    slope = model.coef_[0,0]

    y_pred = model.predict(x.reshape(-1,1)).flatten()
    residuals = y - y_pred
    n = len(x)
    dof = n - 2
    s_err = np.sqrt(np.sum(residuals**2) / dof)
    SE_slope = s_err / np.sqrt(np.sum( (x - np.mean(x))**2 ))
    t_val = t.ppf(0.975, dof)
    slope_ci95 = t_val * SE_slope

    r2_val = r2_score(y, model.predict(x.reshape(-1,1)))

    d_start = group["date"].iloc[0]
    d_end = group["date"].iloc[-1]
    d_middle = d_start + (d_end - d_start) / 2

    dates_start.append(d_start)
    dates_end.append(d_end)
    dates_middle.append(d_middle)
    slopes.append(-slope)
    slope_errs.append(slope_ci95)
    r2s.append(r2_val)

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm

# Préparation
df = asso_catalog.copy()
df = df.sort_values("date").reset_index(drop=True)

# Paramètres
window_days = 10
overlap = 0.25
step_days = window_days * (1 - overlap)

# Génération des fenêtres
start_date = df["date"].min()
end_date = df["date"].max()

current_date = start_date
dates_middle_bis = []
counts = []

with tqdm(total=(end_date - start_date).days // step_days) as pbar:
    while current_date + pd.Timedelta(days=window_days) <= end_date:
        window_start = current_date
        window_end = current_date + pd.Timedelta(days=window_days)
        mask = (df["date"] >= window_start) & (df["date"] < window_end)
        n_events = mask.sum()

        dates_middle_bis.append(window_start + pd.Timedelta(days=window_days/2))
        counts.append(n_events)

        current_date += pd.Timedelta(days=step_days)
        pbar.update(1)

# Plot
fig, ax = plt.subplots(figsize=(8, 4))
ax.plot(dates_middle_bis, counts, '-o', color='steelblue')
ax.set_xlabel("Date")
ax.set_ylabel("Number of events (per 10 days)")
ax.grid(True, linestyle='--', alpha=0.6)
ax.set_title("Event rate (10-day sliding window)")

fig.autofmt_xdate()
plt.show()


In [None]:
print(len(dates_middle), len(counts))

In [None]:
def dates_format(x, pos):
    dt = mdates.num2date(x)
    return r'\shortstack{%s\\%s}' % (dt.strftime('%d/%m'), dt.strftime('%Y'))

fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(8, 5))
fig.patch.set_facecolor('white')
fig.subplots_adjust(wspace=0.25)

# --- (ax1) ---
ax1.plot(dates_middle_bis, counts, '-', color='steelblue')
ax1.set_xlabel("Date")
ax1.set_ylabel("Number of events (per 10 days)")
ax1.grid(True, linestyle='--', alpha=0.6)
ax1.set_xlim(datetime.datetime(2020, 8, 1), datetime.datetime(2024, 10, 1))
ax1.xaxis_date()
ax1.xaxis.set_major_locator(mdates.MonthLocator(bymonth=[1]))
ax1.xaxis.set_minor_locator(mdates.MonthLocator(bymonth=range(1, 13)))
ax1.xaxis.set_major_formatter(dates_format)
ax1.tick_params(axis='x', labelrotation=0)

# --- (ax2) ---
cmap = cm.plasma
norm = mcolors.Normalize(vmin=min(r2s), vmax=max(r2s))
colors = cmap(norm(r2s))

ax2.plot(dates_middle, slopes, color="royalblue")

ax2.set_xlim(datetime.datetime(2020, 8, 1), datetime.datetime(2024, 10, 1))
ax2.xaxis_date()
ax2.xaxis.set_major_locator(mdates.MonthLocator(bymonth=[1]))
ax2.xaxis.set_minor_locator(mdates.MonthLocator(bymonth=range(1, 13)))
ax2.xaxis.set_major_formatter(dates_format)
ax2.tick_params(axis='x', labelrotation=0)
ax2.set_ylabel("b-value")

plt.savefig(
    f'../../../../data/MAHY/figures/b-value_200_98-overlap_distrib.pdf',
    dpi=500,
    bbox_inches='tight',
    pad_inches=0
)