In [182]:
import numpy as np
import pandas as pd
import os
import pickle
from scipy.stats import spearmanr
from scipy.signal import medfilt

In [183]:
var = "wind_speed" #Choose "wind_speed", "current_speed", "total_precipitation", "wave_height", "wave_period"
PSD_dir = "../../../../data/test_correlation/data_mahy"
variable_dir = f"../../../../data/test_correlation/data_soundscape/{var}"
#%matplotlib qt

In [184]:
# Name and unit catalog for each variable
var_info = {
    "wind_speed": ("Wind speed", "m/s"),
    "current_speed": ("Current speed", "m/s"),
    "total_precipitation": ("Total precipitation", "m"),
    "wave_height": ("Wave height", "m"),
    "wave_period": ("Wave period", "s"),

}
var_name, unit = var_info.get(var, ("Unknown variable", ""))


In [185]:
# select the stations you want to plot. /!\ You can only choose stations for one hydrophone
selected_stations = ["MAHY01",
                     "MAHY11",
                     "MAHY21",
                     "MAHY31",
                     "MAHY41"
                     ]

hydrophone_number = [station[-1] for station in selected_stations]
# check that only one hydrophone is chosen
if len(set(hydrophone_number))>1:
    raise ValueError("You have selected more than one hydrophone")
else:
    print(f"Hydrophone n°{hydrophone_number[0]} selected")

Hydrophone n°4 selected


In [186]:
# Load the PSD data
psds_all = []
for station in selected_stations:
    psd_path = os.path.join(PSD_dir, f"{station}.pkl")
    psds_all.append(pd.read_pickle(psd_path))
# Concatenate psds
psd_df = pd.concat(psds_all).sort_index()

# Change 0 to NaN
psd_df.replace(0, np.nan, inplace=True)

# Load the variable data
variable_path = os.path.join(variable_dir, f"h{hydrophone_number[0]}.pkl")
variable_df = pd.read_pickle(variable_path)

# Keep lines of variable_df only if their date is in psd_df
variable_df = variable_df[variable_df.index.isin(psd_df.index)]

if var == "current_speed":
    # rolling median (== median filter) on a 24h time window
    rolling_median = psd_df.rolling(window="24h", center=True).median()
    # keep only the values matching the variable
    psd_df = rolling_median[rolling_median.index.isin(variable_df.index)]

In [187]:
# Spectrogram
if var in["wind_speed", "combined_wave_index", "wave_height", "wave_period"]:
    # median daily filter
    variable_daily = variable_df["values"].resample("D").median()
    title_modifier = " (daily median)"
elif var == "total_precipitation":
    # Daily sum
    variable_daily = variable_df["values"].resample("D").sum()
    title_modifier = " (daily sum)"
else:
    # No change for current speed
    variable_daily = variable_df["values"]
    title_modifier = " (depth = 1300 m), daily data"

full_index = pd.date_range(start=variable_daily.index.min(), end=variable_daily.index.max(), freq="D")
psd_reindexed = psd_df.reindex(full_index)
variable_reindexed = variable_daily.reindex(full_index)

# time_numeric = [t.timestamp() for t in full_index]

In [188]:
# # Spearman's rank correlation
# # Align common dates
# common_index = psd_df.index.intersection(variable_df.index)
# psd_aligned = psd_df.loc[common_index]
# variable_aligned = variable_df.loc[common_index]["values"]
#
# correlations = []
# amplitude_median = {}
#
# for freq in psd_aligned.columns:
#     amplitude = psd_aligned[freq]
#     amplitude_filt = medfilt(amplitude, kernel_size=51)
#     amplitude_median[freq] = amplitude_filt
#     #NaN filter
#     mask = (~np.isnan(amplitude_filt)) & (~np.isnan(variable_aligned))
#     if mask.sum() > 2:
#         corr,_ = spearmanr(amplitude_filt[mask], variable_aligned[mask])
#     else:
#         corr = np.nan
#     correlations.append(corr)
#
# frequencies = psd_aligned.columns

In [189]:
# Spearman's rank correlation
# Align common dates
common_index = psd_df.index.intersection(variable_df.index)
psd_aligned = psd_df.loc[common_index]
variable_aligned = variable_df.loc[common_index]["values"]

correlations = []

for freq in psd_aligned.columns:
    amplitude = psd_aligned[freq]
    amplitude_filt = medfilt(amplitude, kernel_size=5)
    #NaN filter
    mask = (~np.isnan(amplitude_filt)) & (~np.isnan(variable_aligned))
    if mask.sum() > 2:
        corr,_ = spearmanr(amplitude_filt[mask], variable_aligned[mask])
    else:
        corr = np.nan
    correlations.append(corr)

frequencies = psd_aligned.columns

In [190]:
# Save spectro and corr data
data = {
    "psd": psd_reindexed,
    "variable": variable_reindexed,
    "time": full_index,
    "frequencies": frequencies,
    "correlations": correlations,
    "title": title_modifier,
    # "amplitude": amplitude_matrix
}

with open(f"../../../../data/test_correlation/data_soundscape/spectro_and_corr/{var}-h{hydrophone_number[0]}.pkl", "wb") as f:
    pickle.dump(data, f)