In [29]:
import numpy as np
import os
import pandas as pd
import matplotlib.pyplot as plt
from utils.data_reading.sound_data.station import StationsCatalog
from matplotlib import cm
from matplotlib.lines import Line2D

In [2]:
PSD_dir = "../../../../../data/test_correlation/data_mahy"
AIS_dir = "../../../../../data/test_correlation/AIS/AIS.pkl"
output_path = "/home/imonge/Documents/Masking" # Output repository for the plots
catalog_path = "/media/imonge/CORSAIR"
stations = StationsCatalog(catalog_path)
hydrophone_number = 1 # choose between 1, 2, 3, 4

In [4]:
# Select stations for the chose hydrophone
if hydrophone_number != 4:
    selected_stations = [f"MAHY0{hydrophone_number}",
                         f"MAHY1{hydrophone_number}",
                         f"MAHY2{hydrophone_number}",
                         f"MAHY3{hydrophone_number}",
                         f"MAHY4{hydrophone_number}",
                         ]
else:
    selected_stations = [f"MAHY0{hydrophone_number}",
                         f"MAHY1{hydrophone_number}",
                         f"MAHY3{hydrophone_number}",
                         f"MAHY4{hydrophone_number}",
                         ]

hydrophone_location = {
1: (-13.19, 45.35),
2: (-13.35, 45.82),
3: (-12.77, 46.15),
4: (-12.50, 45.55)
}


In [5]:
# Load the PSD data
psds_all = []
for station in selected_stations:
    psd_path = os.path.join(PSD_dir, f"{station}.pkl")
    psds_all.append(pd.read_pickle(psd_path))

# Concatenate psds
psd_df = pd.concat(psds_all).sort_index()

# Change 0 to NaN
psd_df.replace(0, np.nan, inplace=True)

# Change absent data to Nan
full_index = pd.date_range(start=psd_df.index.min(), end=psd_df.index.max(), freq="h")
psd_reindexed = psd_df.reindex(full_index)

In [23]:
## Calculate parameters for the plot
# Make sure we have a temporal index
psd_reindexed.index = pd.to_datetime(psd_reindexed.index)

# Calsculate sound intensity
psd_db = 10 * np.log10(psd_reindexed)

# Mean PSD for all frequencies
psd_db_mean = psd_db.mean(axis=1)

# Extract hour and date
df = pd.DataFrame({
    "datetime": psd_db_mean.index,
    "value": psd_db_mean.values
})
df["date"] = df["datetime"].dt.date
df["hour"] = df["datetime"].dt.hour

# Group by date and hour
grouped = df.groupby(["date", "hour"]).mean().unstack(level=1)
%matplotlib qt

In [35]:
## Plot parameters
# Convert to matrix
psd_matrix = grouped["value"].values
dates = grouped.index

# Plot
fig, ax = plt.subplots(figsize=(16, 18))
im = ax.imshow(psd_matrix, aspect='auto', cmap='Reds', origin='lower', extent=[0, 24, 0, len(dates)])

# X axis
ax.set_xticks(range(0, 25, 2))
ax.set_xlabel("Hour", fontsize=14)

# Y axis
yticks_idx = np.linspace(0, len(dates) - 1, 10).astype(int)
ax.set_yticks(yticks_idx)
ax.set_yticklabels([str(dates[i]) for i in yticks_idx])
ax.set_ylabel("Date", fontsize=14)

# Title and colorbar
ax.set_title(f"Diel plot - hydrophone {hydrophone_number}", fontsize=16)
cbar= fig.colorbar(im, ax=ax, label="Mean PSD (dB re 1 µPA²/Hz")

plt.tight_layout()
plt.show()

In [None]:
#######################

In [34]:
## Diel plot with boat presence
# Function to calculate distances toa hydrophone
def haversine(lon1, lat1, lon2, lat2):

    """
    Calculate the great circle distance between two points on the earth (specified in decimal degrees), returns distance in km.
    """
    # Convert decimal degrees to radian
    lat1, lon1, lat2, lon2 = map(np.radians, [lat1, lon1, lat2, lon2])

    # haversine formula
    dlat = lat2 - lat1
    dlon = lon2 - lon1
    a = np.sin(dlat/2.0)**2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon/2.0)**2
    c = 2 * np.arcsin(np.sqrt(a))
    r = 6371 # radius of earth in kilometers
    return c * r

hydrophone_lat, hydrophone_lon = hydrophone_location.get(hydrophone_number, (None, None))

# freq_axis = psd_reindexed.columns.astype(float)
# max_freqs = psd_reindexed.apply(lambda row: freq_axis[np.argmax(row.values)], axis=1)

# # Create dataset
# dffq = pd.DataFrame({
#     "datetime": max_freqs.index,
#     "max_freq": max_freqs.values
# })
# dffq["hour"] = dffq["datetime"].dt.hour
# dffq["date"] = dffq["datetime"].dt.date
#
# diel_matrix = dffq.pivot_table(index="date", columns="hour", values="max_freq").sort_index()
## Calculate parameters for the plot
# Make sure we have a temporal index
psd_reindexed.index = pd.to_datetime(psd_reindexed.index)

# Calsculate sound intensity
psd_db = 10 * np.log10(psd_reindexed)

# Mean PSD for all frequencies
psd_db_mean = psd_db.mean(axis=1)

# Extract hour and date
df = pd.DataFrame({
    "datetime": psd_db_mean.index,
    "value": psd_db_mean.values
})
df["date"] = df["datetime"].dt.date
df["hour"] = df["datetime"].dt.hour

# Group by date and hour
grouped = df.groupby(["date", "hour"]).mean().unstack(level=1)


# Load AIS data
AIS_df = pd.read_pickle(AIS_dir)
AIS_df['datetime'] = pd.to_datetime(AIS_df['datetime'])
AIS_df['hour'] = AIS_df['datetime'].dt.hour + AIS_df['datetime'].dt.minute / 60
AIS_df['date'] = AIS_df['datetime'].dt.date

# Select 10 km radius around hydrophone
if hydrophone_lat is not None and hydrophone_lon is not None:
    # Calculate distance
    AIS_df['distance_km'] = haversine(hydrophone_lat, hydrophone_lon, AIS_df['lat'], AIS_df['lon'])
    AIS_nearby = AIS_df[AIS_df['distance_km'] <=10].copy() # Number of boats in a 10 km distance to the hydrophone
else:
    print(f"Coordinates for hydrophone {hydrophone_number} not found.")
    AIS_nearby = AIS_df.copy()

################
# Select only one point for each boat's passages
AIS_nearby = AIS_nearby.sort_values(['mmsi', 'datetime']).reset_index(drop=True)
AIS_nearby['time_diff'] = AIS_nearby.groupby('mmsi')['datetime'].diff().dt.total_seconds()

threshold_seconds = 3600

AIS_nearby['new_session'] = (AIS_nearby['time_diff'] > threshold_seconds) | (AIS_nearby['time_diff'].isna())
AIS_nearby['session_id'] = AIS_nearby.groupby('mmsi')['new_session'].cumsum()

median_ais_sessions = AIS_nearby.groupby(['mmsi', 'session_id']).agg({
    'datetime': 'median'
}).reset_index()

median_ais_sessions['hour'] = median_ais_sessions['datetime'].dt.hour + median_ais_sessions['datetime'].dt.minute / 60 + median_ais_sessions['datetime'].dt.second / 3600
median_ais_sessions['date'] = median_ais_sessions['datetime'].dt.date


median_ais_sessions = median_ais_sessions[median_ais_sessions['date'].isin(date_list)].copy()
median_ais_sessions['y_idx'] = median_ais_sessions['date'].apply(lambda d: date_list.index(d))
#####################################

# Plot
psd_matrix = grouped["value"].values
dates = grouped.index
fig, ax = plt.subplots(figsize=(16, 18))
im = ax.imshow(psd_matrix, aspect='auto', cmap='Reds', origin='lower', extent=[0, 24, 0, len(dates)])

ax.scatter(median_ais_sessions['hour'], median_ais_sessions['y_idx'], marker='x', color='darkred', label='AIS signal', s=20, alpha=0.5)

ax.set_xlabel("Hour", fontsize=14)
ax.set_xticks(range(0, 25, 2))

ax.set_ylabel("Date", fontsize=14)
ax.set_yticks(yticks_idx)
yticks_idx = np.linspace(0, len(dates) - 1, 10).astype(int)
ax.set_yticklabels([str(dates[i]) for i in yticks_idx])
# ax.set_yticks(np.arange(len(date_list))[::max(1, len(date_list)//10)])
# ax.set_yticklabels([str(date) for date in date_list][::max(1, len(date_list)//10)])

# Title and colorbar
ax.set_title(f"Diel plot - hydrophone {hydrophone_number}", fontsize=16)
cbar= fig.colorbar(im, ax=ax, label="Mean PSD (dB re 1 µPA²/Hz")

plt.tight_layout()
plt.show()
########################################################################
# Convert to matrix


# Plot



# X axis



# Y axis









In [None]:
#######################

In [32]:
## Max frequency plot with boat presence
# Function to calculate distances toa hydrophone
def haversine(lon1, lat1, lon2, lat2):

    """
    Calculate the great circle distance between two points on the earth (specified in decimal degrees), returns distance in km.
    """
    # Convert decimal degrees to radian
    lat1, lon1, lat2, lon2 = map(np.radians, [lat1, lon1, lat2, lon2])

    # haversine formula
    dlat = lat2 - lat1
    dlon = lon2 - lon1
    a = np.sin(dlat/2.0)**2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon/2.0)**2
    c = 2 * np.arcsin(np.sqrt(a))
    r = 6371 # radius of earth in kilometers
    return c * r

hydrophone_lat, hydrophone_lon = hydrophone_location.get(hydrophone_number, (None, None))
psd_reindexed.index = pd.to_datetime(psd_reindexed.index)

freq_axis = psd_reindexed.columns.astype(float)
max_freqs = psd_reindexed.apply(lambda row: freq_axis[np.argmax(row.values)], axis=1)

# Create dataset
dffq = pd.DataFrame({
    "datetime": max_freqs.index,
    "max_freq": max_freqs.values
})
dffq["hour"] = dffq["datetime"].dt.hour
dffq["date"] = dffq["datetime"].dt.date

diel_matrix = dffq.pivot_table(index="date", columns="hour", values="max_freq").sort_index()

# Load AIS data
AIS_df = pd.read_pickle(AIS_dir)
AIS_df['datetime'] = pd.to_datetime(AIS_df['datetime'])
AIS_df['hour'] = AIS_df['datetime'].dt.hour + AIS_df['datetime'].dt.minute / 60
AIS_df['date'] = AIS_df['datetime'].dt.date

# Select 10 km radius around hydrophone
if hydrophone_lat is not None and hydrophone_lon is not None:
    # Calculate distance
    AIS_df['distance_km'] = haversine(hydrophone_lat, hydrophone_lon, AIS_df['lat'], AIS_df['lon'])
    AIS_nearby = AIS_df[AIS_df['distance_km'] <=15].copy() # Number of boats in a 10 km distance to the hydrophone
else:
    print(f"Coordinates for hydrophone {hydrophone_number} not found.")
    AIS_nearby = AIS_df.copy()

################
# Select only one point for each boat's passages
AIS_nearby = AIS_nearby.sort_values(['mmsi', 'datetime']).reset_index(drop=True)
AIS_nearby['time_diff'] = AIS_nearby.groupby('mmsi')['datetime'].diff().dt.total_seconds()

threshold_seconds = 3600

AIS_nearby['new_session'] = (AIS_nearby['time_diff'] > threshold_seconds) | (AIS_nearby['time_diff'].isna())
AIS_nearby['session_id'] = AIS_nearby.groupby('mmsi')['new_session'].cumsum()

median_ais_sessions = AIS_nearby.groupby(['mmsi', 'session_id']).agg({
    'datetime': 'median'
}).reset_index()

median_ais_sessions['hour'] = median_ais_sessions['datetime'].dt.hour + median_ais_sessions['datetime'].dt.minute / 60 + median_ais_sessions['datetime'].dt.second / 3600
median_ais_sessions['date'] = median_ais_sessions['datetime'].dt.date

date_list = diel_matrix.index.tolist()
median_ais_sessions = median_ais_sessions[median_ais_sessions['date'].isin(date_list)].copy()
median_ais_sessions['y_idx'] = median_ais_sessions['date'].apply(lambda d: date_list.index(d))
#####################################

# Plot
fig, ax = plt.subplots(figsize=(16, 18))
img = ax.imshow(diel_matrix, aspect='auto', cmap='viridis', origin='lower', extent=[0, 24, 0, len(date_list)])

ax.scatter(median_ais_sessions['hour'], median_ais_sessions['y_idx'], marker='x', color='lightcoral', label='AIS signal', s=20, alpha=0.5)

ax.set_xlabel("Hour", fontsize=14)
ax.set_ylabel("Date", fontsize=14)
# ax.set_yticks(np.arange(len(date_list))[::max(1, len(date_list)//10)])
# ax.set_yticklabels([str(date) for date in date_list][::max(1, len(date_list)//10)])

cbar = plt.colorbar(img, ax=ax)
cbar.set_label("Frequency of maximum energy (Hz)", fontsize=14)

plt.tight_layout()
plt.show()




  cmap = cm.get_cmap('viridis').copy()


In [33]:

print(diel_matrix.isna().sum().sum())



12
