In [7]:
import numpy as np
import pandas as pd
import os
import matplotlib.patches as patches
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import seaborn as sns
from matplotlib import ticker

In [8]:
segments_updated_dir = "../../../../../data/SPL"
df = pd.read_pickle(os.path.join(segments_updated_dir, 'df_segments_updated.pkl'))
df_species = df[df['species'].notna()]

In [9]:
# Colorpalette by label
labels = df_species['label'].unique()
color_map = {label: plt.cm.tab20(i % 20) for i, label in enumerate(labels)}

In [10]:
# Plot per species
species_list = df_species['species'].unique()

for species in species_list:
    subset =  df_species[df_species['species'] == species]

    fig, ax = plt.subplots(figsize=(15, 6))

    for _, row in subset.iterrows():
        rect = patches.Rectangle(
            (0, row['start_frequency']),
            row['duration'],
            row['end_frequency'] - row['start_frequency'],
            linewidth=0.5,
            edgecolor=color_map[row['label']],
            facecolor=color_map[row['label']],
            alpha=0.2
        )
        ax.add_patch(rect)
    ax.set_xlim(0,df_species['duration'].max() * 1.05)
    ax.set_ylim(0, 120)

    ax.set_xlabel("Duration (s)")
    ax.set_ylabel("Frequency (Hz)")
    ax.set_title(f"Vocalisation duration and frequency for {species}")

    # Legend
    handles = [patches.Patch(color=color_map[l], label=l) for l in subset['label'].unique()]
    ax.legend(handles=handles, title="Labels", bbox_to_anchor=(1.0, 1), loc="upper right")

    plt.tight_layout()
    plt.show()

In [5]:
# Graph comme Miller et al.
%matplotlib qt
sns.set(style="whitegrid")
df_species = df[df['species'].notna()]

species_list = df_species['species'].unique()

unique_labels = df_species['label'].unique()
palette = sns.color_palette("tab10", len(unique_labels))
color_map = dict(zip(unique_labels, palette))

for species in species_list:
    subset = df_species[df_species['species'] == species]

    fig, ax = plt.subplots(figsize=(12, 8))

    for _, row in subset.iterrows():
        rect = patches.Rectangle(
            (0, row['start_frequency']),
            row['duration'],
            row['end_frequency'] - row['start_frequency'],
            linewidth=0.5,
            edgecolor=color_map[row['label']],
            facecolor=color_map[row['label']],
            alpha=0.2
        )
        ax.add_patch(rect)

    if species == 'minke':
        ax.set_xlim(0,500)
    else:
        ax.set_xlim(0,70)
    ax.set_ylim(0,120)

    # Legend
    handles = [patches.Patch(color=color_map[l], label=l) for l in subset['label'].unique()]
    ax.legend(handles=handles, title="Labels", bbox_to_anchor=(1.0, 1), loc="upper right")
    ax.set_xlabel("Duration (s)")
    ax.set_ylabel("Frequency (Hz)")
    ax.set_title(f"Vocalisation duration and frequency for {species}")
    ax.grid(linestyle='--', alpha=0.4)
    plt.tight_layout()
    plt.show()

In [None]:
df_species[df_species.isna().any(axis=1)]

In [None]:
print(df_species['label'])

In [6]:
# Frequency histogram by label -> frequency repartition, not necessarly useful if we have the frequency curve by label
output_dir = "/home/imonge/Documents/annotations/histo_fq"

unique_labels = df_species['label'].unique()

for target_label in unique_labels:

    # Filter data for this label
    df_label = df_species[df_species['label'] == target_label].copy()

    # Global frequency range
    freq_min = int(np.floor(df_label['start_frequency'].min()))
    freq_max = int(np.ceil(df_label['end_frequency'].max()))

    # 1-Hz bins
    freq_bins = np.arange(freq_min, freq_max + 1)
    bin_counts = np.zeros_like(freq_bins, dtype=int)

    # Count annotations covering each bin
    for _, row in df_label.iterrows():
        start = int(np.floor(row['start_frequency']))
        end = int(np.ceil(row['end_frequency']))

        for f in range(start, end + 1):
            if freq_min <= f <= freq_max:
                bin_index = f - freq_min
                bin_counts[bin_index] += 1

    # Plot histograms
    fig, ax = plt.subplots(figsize=(12, 8))

    ax.bar(freq_bins, bin_counts, width=1.0, color="teal", alpha=0.7)
    ax.set_xlabel("Frequency (Hz)")
    ax.set_ylabel("Count")
    ax.set_title(f"Frequency coverage histogram for label: {target_label}")
    ax.set_xlim(0, 120)

    # Ticks parameters
    ax.xaxis.set_major_locator(ticker.MultipleLocator(5))
    ax.xaxis.set_minor_locator(ticker.MultipleLocator(1))
    ax.tick_params(axis='x', which='minor', length=10, width=1, color='gray', direction='inout')
    ax.tick_params(axis='x', which='major', length=10, width=1, color='gray', direction='inout')

    # ax.grid(True, linestyle="--", alpha=0.5, color='gray')
    plt.tight_layout()
    plt.savefig(os.path.join(output_dir, f"frequency_coverage_{target_label}.png"))
    plt.close()
    # plt.show()