# Project 2


The purpose of this project is to build a model, that will be able to distinguish between different seal calls. 

Grey Seals are known for their wide repertoire of communications which range from clapping [[1]](https://www.smithsonianmag.com/smart-news/first-scientists-film-grey-seals-clapping-show-their-strength-180974133/) to vocalisations so complex that they have been shown to imitate the sounds of vowels and other building blocks of human speech [[2]](https://www.pbs.org/wgbh/nova/article/seals-mimic-speech/).

This project will look at specific vocalisations called Rupes and Moans.

There are 3 types of rupes A, B and C are distinguished from each other based on frequency, duration and modulation.

The dataset used in this stuy comes froma  study into the vocalisations of grey seals off of malin head. [[3]](https://www.mdpi.com/2077-1312/12/1/118)

In [1]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from scipy import signal
from scipy.io import wavfile
from scipy.signal import spectrogram
import os
from matplotlib.colors import LogNorm 

  from pandas.core import (


In [None]:
#File index
file =  'data/Samples Grey Seal/Rupes A and B/5713.210809120002'  #from PPT at time 892-896 (Rupe B)
#file = 'Guttural rupe\\5711.211013040024'
#file = 'Rupes A and B\\5713.210825190002'
#file =          'Moan\\5713.210902110002'  #from PPT at time 212 seconds

#Read the 2 files
sample_rate, samples = wavfile.read(file+'.wav')
annot_file_path = file +'.Table.1.selections.txt'

#Read the file into a DataFrame
df = pd.read_csv(file +'.Table.1.selections.txt', sep='\t')

#Display the first few rows of the DataFrame
print(df.head())

In [16]:
# combine all rupe call annotation into one dataframe

folder = "data/Samples Grey Seal/Rupes A and B"

combined_df = pd.DataFrame()

for file in os.listdir(folder):
  if file.endswith('.txt'): # only want the annotated files
    file_path = os.path.join(folder, file)

    # add to dataframe
    df = pd.read_csv(file_path, sep='\t')

    combined_df = pd.concat([combined_df, df], ignore_index=True)

combined_df.head(20)

#save to CSV
combined_df.to_csv('data/combined_rupes_data.csv', index=False)

## Calculate the Spectrogram

The signal.spectrogram function from scipy [[4]](https://docs.scipy.org/doc/scipy/reference/generated/scipy.signal.spectrogram.html) will produce a visual represenatation of the audio file showing how the frequencies change over time. 

In [40]:
frequencies, times, spectrogram = signal.spectrogram(samples, sample_rate, nperseg=2456, nfft=4096, noverlap=1228, window='hann')       #Try nfft=8192 if computer permits

In [None]:
frequencies

In [18]:
spectrogram[spectrogram < 0.001] = 0.001    #Trim off all the tiny values so that log scale displays correctly

#Trim the frequency. All seal sounds are under 1000 Hz
fmin = 20 # Hz
fmax = 1000 # Hz
freq_slice = np.where((frequencies >= fmin) & (frequencies <= fmax))

#keep only frequencies of interest
frequencies = frequencies[freq_slice]
spectrogram = spectrogram[freq_slice,:][0]

In [19]:
def overlay_annotations(ax, df, annotation_colors):
    #Track labels to ensure they are added only once in the legend
    added_labels = set()

    for _, row in df.iterrows():
        start_time = row['Begin Time (s)']
        end_time = row['End Time (s)']
        low_freq = row['Low Freq (Hz)']
        high_freq = row['High Freq (Hz)']
        annotation = row['Annotation']

        #Skip if the annotation is not in the defined colors
        if annotation not in annotation_colors:
            continue

        #Draw rectangles
        ax.add_patch(
            plt.Rectangle(
                (start_time, low_freq),  #Bottom Left corner
                end_time - start_time,  #Width (time)
                high_freq - low_freq,  #Height (frequency)
                edgecolor=annotation_colors[annotation],
                facecolor='none',
                linewidth=2,
                label=annotation if annotation not in added_labels else None  #Add label once
            )
        )
        added_labels.add(annotation)  #Mark label as added

    ax.legend(loc='upper right')     #Add legend

In [21]:
def update_colormap(event):
    #Get current view limits
    xlim = ax.get_xlim()
    ylim = ax.get_ylim()

    #Find indices corresponding to the current view limits
    x_indices = np.where((times >= xlim[0]) & (times <= xlim[1]))[0]
    y_indices = np.where((frequencies >= ylim[0]) & (frequencies <= ylim[1]))[0]

    #Handle cases where no data is visible
    if len(x_indices) == 0 or len(y_indices) == 0:
        return

    #Extract the visible data
    data_visible = spectrogram[np.ix_(y_indices, x_indices)]
    #data_visible = np.log(spectrogram)[np.ix_(y_indices, x_indices)]

    #Compute new color limits
    vmin = np.nanmin(data_visible)
    vmax = np.nanmax(data_visible)

    #Update the color limits of the pcolormesh
    pc.set_clim(vmin=vmin, vmax=vmax)
    
    #Update the colorbar to reflect the new color limits
    cbar.update_normal(pc)

    #Redraw the figure
    plt.draw()
    

In [None]:
#Define colors for annotations
annotation_colors = {
    "Rupe A": "red",
    "Rupe B": "green",
    "Growl B": "yellow",
    "Rupe C" : "purple",
    "Moan": "pink",
    "G rupe" : "blue"
}

pc = plt.pcolormesh(times, frequencies, spectrogram, norm=LogNorm(), cmap='Spectral_r')
#pc = plt.pcolormesh(times, frequencies, np.log(spectrogram))
cbar = plt.colorbar(pc)
#plt.imshow(spectrogram)
plt.ylabel('Frequency [Hz]')
plt.xlabel('Time [sec]')

#Get the current axes
ax = plt.gca()
overlay_annotations(ax, df, annotation_colors)

#Connect the update function to the axes limit change events
ax.callbacks.connect('xlim_changed', update_colormap)
ax.callbacks.connect('ylim_changed', update_colormap)

plt.show()

In [None]:
#Define time and frequency limits
time_start, time_end = 891, 897  # Time range in seconds
freq_start, freq_end = 20, 600  # Frequency range in Hz

#Find indices for the time range
time_indices = np.where((times >= time_start) & (times <= time_end))[0]

#Find indices for the frequency range
freq_indices = np.where((frequencies >= freq_start) & (frequencies <= freq_end))[0]

#Extract the portion of the spectrogram
spectrogram_sub = spectrogram[freq_indices][:, time_indices]
frequencies_sub = frequencies[freq_indices]
times_sub = times[time_indices]
print(spectrogram_sub.shape)

#Plot the original and sub-portion spectrograms
plt.figure(figsize=(10, 6))

plt.subplot(2, 1, 1)
plt.pcolormesh(times, frequencies, spectrogram, norm=LogNorm(), cmap='Spectral_r')
plt.title('Original Spectrogram')
plt.ylabel('Frequency [Hz]')
plt.xlabel('Time [s]')
plt.colorbar(label='Power [dB]')

plt.subplot(2, 1, 2)
plt.pcolormesh(times_sub, frequencies_sub, spectrogram_sub, norm=LogNorm(), cmap='Spectral_r')
plt.title('Extracted Portion of Spectrogram')
plt.ylabel('Frequency [Hz]')
plt.xlabel('Time [s]')
plt.colorbar(label='Power [dB]')

plt.tight_layout()
plt.show()

In [None]:
#Define time and frequency limits
time_start, time_end = 892.5, 893.2  # Time range in seconds
freq_start, freq_end = 20, 600  # Frequency range in Hz

#Find indices for the time range
time_indices = np.where((times >= time_start) & (times <= time_end))[0]

#Find indices for the frequency range
freq_indices = np.where((frequencies >= freq_start) & (frequencies <= freq_end))[0]

#Extract the portion of the spectrogram
spectrogram_sub = spectrogram[freq_indices][:, time_indices]
frequencies_sub = frequencies[freq_indices]
times_sub = times[time_indices]
print("Spectrogram size: ", spectrogram_sub.shape)

#Plot the original and sub-portion spectrograms
plt.figure(figsize=(10, 6))

plt.subplot(2, 1, 1)
plt.pcolormesh(times, frequencies, spectrogram, norm=LogNorm(), cmap='Spectral_r')
plt.title('Original Spectrogram')
plt.ylabel('Frequency [Hz]')
plt.xlabel('Time [s]')
plt.colorbar(label='Power [dB]')

plt.subplot(2, 1, 2)
plt.pcolormesh(times_sub, frequencies_sub, spectrogram_sub, norm=LogNorm(), cmap='Spectral_r')
plt.title('Extracted Portion of Spectrogram')
plt.ylabel('Frequency [Hz]')
plt.xlabel('Time [s]')
plt.colorbar(label='Power [dB]')

plt.tight_layout()
plt.show()

In [None]:
plt.savefig('spectrogram.png')
#plt.clf()
print(spectrogram_sub)

In [1]:
#Looking at Spectrogram

In [2]:
# Example: Load the annotations

file = 'data/Samples Grey Seal/Rupes A and B/5713.210806110002'  #
annotation_file = f"{file}.Table.1.selections.txt"  
annotations = pd.read_csv(annotation_file, sep="\t")

# Preview the annotations
annotations.head(30)

Unnamed: 0,Selection,View,Channel,Begin Time (s),End Time (s),Low Freq (Hz),High Freq (Hz),Delta Time (s),Delta Freq (Hz),Avg Power Density (dB FS/Hz),Annotation
0,1,Spectrogram 1,1,13.433865,13.703649,53.191,324.361,0.2698,271.17,-79.66,Rupe B
1,2,Spectrogram 1,1,26.072555,26.283886,53.191,376.259,0.2113,323.068,-84.94,Rupe B
2,3,Spectrogram 1,1,53.382769,53.528888,66.489,478.723,0.1461,412.234,-89.26,Rupe A
3,4,Spectrogram 1,1,65.997665,66.110075,59.347,415.43,0.1124,356.083,-86.86,Rupe A
4,5,Spectrogram 1,1,81.003161,81.266117,59.347,376.259,0.263,316.912,-82.32,Rupe B
5,6,Spectrogram 1,1,86.568316,86.874071,59.347,400.006,0.3058,340.659,-83.07,Rupe B
6,7,Spectrogram 1,1,97.068628,97.333469,59.347,498.471,0.2648,439.124,-85.93,Rupe A
7,8,Spectrogram 1,1,97.899554,98.048987,74.184,356.083,0.1494,281.899,-86.57,Rupe A
8,9,Spectrogram 1,1,105.623018,105.750196,71.281,570.247,0.1272,498.966,-88.29,Rupe A
9,10,Spectrogram 1,1,106.102974,106.16755,57.025,498.966,0.0646,441.941,-86.36,Rupe A


In [3]:
audiofile = f'{file}.wav'
output_dir = "data/spectrograms"

sample_rate, audio_data = wavfile.read(audiofile)


In [4]:
# Variables
FREQ_MIN = 20  # Minimum frequency of interest
FREQ_MAX = 1000  # Maximum frequency of interest
NFFT = 4096  # FFT size
NPERSEG = 2456  # Number of samples per segment
NOVERLAP = 1228  # Overlap between segments
WINDOW = 'hann'  # Window type
ANNOTATION_COLORS = {
    "Rupe A": "red",
    "Rupe B": "green",
    "Growl B": "yellow",
    "Rupe C": "purple",
    "Moan": "pink",
    "G rupe": "blue",
}




In [8]:
# Functions used to generate the spectrograms

def compute_spectrogram(samples, sample_rate):
    """
    Generate a spectrogram for a given audio segment.
    """
    freqs, times, spec = spectrogram(
        samples, fs=sample_rate, nperseg=NPERSEG, nfft=NFFT, noverlap=NOVERLAP, window=WINDOW
    )
    # Trim frequencies to the desired range
    freq_mask = (freqs >= FREQ_MIN) & (freqs <= FREQ_MAX)
    freqs = freqs[freq_mask]
    spec = spec[freq_mask, :]
    
    # Avoid log scale issues by capping small values
    spec[spec < 0.001] = 0.001
    return freqs, times, spec

def pad_or_truncate(samples, sample_rate, target_duration):
    """
    Pad or truncate audio samples to a target duration.
    """
    target_length = int(target_duration * sample_rate)
    if len(samples) < target_length:
        # Pad with zeros
        padding = target_length - len(samples)
        samples = np.pad(samples, (0, padding), mode='constant')
    else:
        # Truncate to target length
        samples = samples[:target_length]
    return samples


def overlay_annotations(ax, annotations_df, annotation_colors):
    """
    Overlay annotations as rectangles on the spectrogram.
    """
    added_labels = set()
    for _, row in annotations_df.iterrows():
        start_time = row['Begin Time (s)']
        end_time = row['End Time (s)']
        low_freq = row['Low Freq (Hz)']
        high_freq = row['High Freq (Hz)']
        annotation = row['Annotation']
        
        if annotation not in annotation_colors:
            continue

        ax.add_patch(
            plt.Rectangle(
                (start_time, low_freq),  # Bottom-left corner
                end_time - start_time,  # Width (time)
                high_freq - low_freq,  # Height (frequency)
                edgecolor=annotation_colors[annotation],
                facecolor='none',
                linewidth=2,
                label=annotation if annotation not in added_labels else None,
            )
        )
        added_labels.add(annotation)
    ax.legend(loc='upper right')

def plot_spectrogram(freqs, times, spec, annotations_df=None, save_path=None):
    """
    Plot and optionally save the spectrogram with annotations.
    """
    fig, ax = plt.subplots(figsize=(12, 6))
    pc = ax.pcolormesh(times, freqs, spec, norm=LogNorm(), cmap='Spectral_r')
    plt.colorbar(pc, ax=ax, label="Power [dB]")
    ax.set_ylabel("Frequency [Hz]")
    ax.set_xlabel("Time [s]")
    plt.title("Spectrogram")
    
    # Overlay annotations if provided
    if annotations_df is not None:
        overlay_annotations(ax, annotations_df, ANNOTATION_COLORS)
    
    # Save or show
    if save_path:
        plt.savefig(save_path)
        print(f"Saved spectrogram to {save_path}")
    else:
        plt.show()
    plt.close(fig)

def process_file(wav_file, txt_file, output_dir):
    """
    Process a WAV and corresponding TXT file to generate and save a spectrogram for each call.
    """
    # Load the audio file
    sample_rate, audio_data = wavfile.read(wav_file)
    
    # Load annotations
    annotations = pd.read_csv(txt_file, sep="\t")
    
    # Determine the longest call duration
    longest_call_duration = annotations["End Time (s)"].max() - annotations["Begin Time (s)"].min()

    # Ensure the output directory exists
    os.makedirs(output_dir, exist_ok=True)

    # Process each call
    for idx, row in annotations.iterrows():
        start_time = row["Begin Time (s)"]
        end_time = row["End Time (s)"]
        call_class = row["Annotation"]

        # Slice the audio corresponding to the call
        start_sample = int(start_time * sample_rate)
        end_sample = int(end_time * sample_rate)
        call_audio = audio_data[start_sample:end_sample]
        
        # Pad or truncate the audio to the length of the longest call
        call_audio = pad_or_truncate(call_audio, sample_rate, longest_call_duration)

        # Generate spectrogram
        freqs, times, spec = compute_spectrogram(call_audio, sample_rate)

        # Save spectrogram as .npz
        save_path = os.path.join(output_dir, f"{os.path.basename(wav_file).replace('.wav', '')}_call_{idx+1}_{call_class}.npz")
        np.savez_compressed(save_path, spectrogram=spec, frequencies=freqs, times=times, class_label=call_class)
        print(f"Saved spectrogram for call {idx+1} ({call_class}) to {save_path}")

In [None]:
process_file(audiofile, annotation_file, output_dir)

Saved spectrogram for call 1 (Rupe B) to data/spectrograms\5713.210806110002_call_1_Rupe B.npz


## References

- 1 https://www.smithsonianmag.com/smart-news/first-scientists-film-grey-seals-clapping-show-their-strength-180974133/
- 2 https://www.pbs.org/wgbh/nova/article/seals-mimic-speech/
- 3 https://www.mdpi.com/2077-1312/12/1/118
- 4 https://docs.scipy.org/doc/scipy/reference/generated/scipy.signal.spectrogram.html
- https://towardsdatascience.com/audio-deep-learning-made-simple-part-1-state-of-the-art-techniques-da1d3dff2504
- https://medium.com/@okezieowen/audio-deep-learning-in-plain-english-b52843deb64e