# Prof's Edited Code

In [1]:
!pip install librosa


!pip install soundfile
import os
import numpy as np
from matplotlib import pyplot as plt
from collections import namedtuple
import librosa

# Add-on modules, use conda or pip to install
from librosa import display  # conda install -c conda-forge librosa
# from librosa.util import normalize
import numpy as np

def normalize(y, axis=0):
    """Normalize an array along a specified axis."""
    return y / np.max(np.abs(y), axis=axis, keepdims=True)
# from librosa import power_to_db
import numpy as np

def power_to_db(S, ref=1.0, amin=1e-10, top_db=80.0):
    """Convert a power spectrogram to decibel (dB) units."""
    S = np.asarray(S)
    magnitude = 10.0 * np.log10(np.maximum(amin, S))
    magnitude -= 10.0 * np.log10(np.maximum(amin, ref))
    if top_db is not None:
        magnitude = np.maximum(magnitude, magnitude.max() - top_db)
    return magnitude
import soundfile  # conda install -c conda-forge pysoundfile




In [29]:
print(librosa.__version__)

0.10.2.post1


In [40]:
class AudioPool_short:
    # Each cache entry contains a SoundFile audio file class instance, framing
    # parameters calculated for the sample rate, and the channel to be
    # processed.
    EntryType = namedtuple("EntryType", ('audio', "adv_N", "len_N", "channel"))

    # Spectrogram bias to prevent log10 from taking log(0)
    spec_bias = 1e-7

    # Channel to use in multi-channel recordings when there is no
    # channel map entry to tell us what to do.
    default_channel = 0

    def __init__(self, adv_s=0.0025, len_s=0.005, window="hamming",
                 low_Hz=0, high_Hz=None):
        """
        AudioPool maintains a least recently used cache of open files.
        Files are accessed via their filename:

        pool = AudioPool()
        soundfile_obj = pool['path/to/file.wav']

        When the pool size is reached, older sounds are closed.

        :param adv_s:  frame advance (s) for spectrograms
        :param len_s:  frame length (s) for spectrograms
        :param window: window function for spectrograms
        :param low_Hz: lowest frequency of spectrograms (default 0 Hz)
        :param high_Hz: highest frequency of spectrograms (default Nyquist)
        """
        # Create least recently used cache of specified size
        self.cache = {}  # Use a dictionary for simplicity

        self.adv_s = adv_s
        self.len_s = len_s
        self.window = window
        self.low_Hz = low_Hz
        self.high_Hz = high_Hz

    def __getitem__(self, filename):
        """
        [filename] - Retrieve SoundFile instance for the specified filename
        """
        if filename not in self.cache:
            # Not in cache
            sound = soundfile.SoundFile(filename)
            adv_N = int(sound.samplerate * self.adv_s + 0.5)
            len_N = int(sound.samplerate * self.len_s + 0.5)
            channel = 0  # Assuming single channel for now

            item = self.EntryType(sound, adv_N, len_N, channel)
            self.cache[filename] = item

        return self.cache[filename]

    def get_spectrogram(self, filename, start_s=0, duration_s=-1):
        """
        get_spectrogram - Return power spectrogram in dB rel.
        :param filename:  audio file path
        :param start_s: start time (s)
        :param duration_s: duration (s)
        :return:  frequency X time spectra in dB
        """

        data = self.get_seconds(filename, start_s, duration_s)
        entry = self.cache[filename]
        Fs = entry.audio.samplerate

        # Normalize data
        data = normalize(data)

        # Compute spectrogram
       # Set n_fft relative to data length
        # n_fft = max(256, min(1024, len(data)))  # Ensure n_fft is reasonable for short clips
      
        hop_length = int(self.adv_s * Fs + 0.5)
        win_length = int(self.len_s * Fs + 0.5)
        n_fft = win_length  # Ensure n_fft is at least win_length
        n_fft = min(n_fft, len(data))

        D = librosa.stft(data,n_fft=n_fft,
                         hop_length=hop_length, win_length=win_length,
                         window=self.window, center=False)
        spectrogram = np.abs(D)

        # Convert to decibels
        spectrogram_db = power_to_db(spectrogram**2, ref=np.max(spectrogram**2))
       
        return spectrogram_db, Fs

    def get_seconds(self, filename, start_s, duration_s=-1):
        """
        Return duration_s seconds of data from filename starting at start_s
        seconds into the file.

        :param filename:  file to access
        :param start_s:  offset into file in seconds
        :param duration_s:   read duration seconds from start_s, to end of file if -1
        :return:  audio data
        """

        entry = self[filename]
        Fs = entry.audio.samplerate

        if start_s is not None:
            start_sample = int(start_s * Fs)
            # Ensure we are at the correct start position
            current = entry.audio.tell()
            if current != start_sample:
                entry.audio.seek(start_sample)  # Move to desired position

        if duration_s == -1:
            Nsamples = -1
        else:
            Nsamples = int(Fs * duration_s)

        data = entry.audio.read(frames=Nsamples)
        # print(f"Data shape: {data.shape}, Min: {data.min()}, Max: {data.max()}")
        return data


In [None]:
    audio_file="Audio_Files/Audio_extracted/Roch/20190611_030000.WAV"
    pool = AudioPool_short(len_s = 0.005)
    spectrogram, sr = pool.get_spectrogram(audio_file, start_s = 0, duration_s = -1)

    # Display the spectrogram
    plt.figure(figsize=(10, 4))
    librosa.display.specshow(spectrogram, sr=sr, x_axis='time', y_axis='linear')
    plt.colorbar(format='%+2.0f dB')
    plt.title('Spectrogram')
    plt.tight_layout() 
    plt.show()

In [9]:
data.shape

(23040000,)

In [10]:
23040000/384000


60.0

# Code for Spectrogram of Centroids and Closest Point

In [9]:
!pip install h5py



In [23]:
import pandas as pd
import h5py
clusters = pd.read_csv('cluster_centroids_with_ids_np_array.csv')

In [20]:
centroids = clusters['Centroid']


Keys in Embeddings/embed_files/20190611_030000.WAV_embeddings_Data2VecMultiModel_checkpoint_last.pt_12.h5: ['embedding', 'filename', 'time']
Total files added: 192


## Spectrogram of Cluster 1 two closest points

In [None]:
import matplotlib.pyplot as plt
import librosa
import librosa.display

# Assuming you have two audio files for the closest points
audio_file_1 = "Audio_Files/Audio_extracted/Roch/20190612_173000.WAV"
audio_file_2 = "Audio_Files/Audio_extracted/Roch/20190612_174000.WAV"  
# Create an AudioPool_short object
pool = AudioPool_short(len_s=0.005)

# Get spectrograms for both audio files
spectrogram_1, sr_1 = pool.get_spectrogram(audio_file_1, start_s=53.447765, duration_s=-1)
spectrogram_2, sr_2 = pool.get_spectrogram(audio_file_2, start_s=45.561047, duration_s=-1)

# Create subplots to display the two spectrograms side by side
fig, ax = plt.subplots(1, 2, figsize=(15, 5))

# Plot the first spectrogram
img1 = librosa.display.specshow(spectrogram_1, sr=sr_1, x_axis='time', y_axis='linear', ax=ax[0])
ax[0].set_title('Spectrogram 1 from Cluster 1')
plt.colorbar(img1, ax=ax[0], format='%+2.0f dB')

# Plot the second spectrogram
img2 = librosa.display.specshow(spectrogram_2, sr=sr_2, x_axis='time', y_axis='linear', ax=ax[1])
ax[1].set_title('Spectrogram 2 from Cluster 1 ')
plt.colorbar(img2, ax=ax[1], format='%+2.0f dB')

# Adjust layout to make it look neat
plt.tight_layout()
plt.show()
plt.savefig("Cluster1 Spectrograms")

## Spectrogram of cluster 18 ( Randomly Selected)

In [None]:
import matplotlib.pyplot as plt
import librosa
import librosa.display

# Assuming you have two audio files for the closest points
audio_file_1 = "Audio_Files/Audio_extracted/Roch/20190611_030000.WAV"
audio_file_2 = "Audio_Files/Audio_extracted/Roch/20190611_045000.WAV"  
# Create an AudioPool_short object
pool = AudioPool_short(len_s=0.005)

# Get spectrograms for both audio files
spectrogram_1, sr_1 = pool.get_spectrogram(audio_file_1, start_s=0.009995842, duration_s=-1)
spectrogram_2, sr_2 = pool.get_spectrogram(audio_file_2, start_s=35.670162, duration_s=-1)

# Create subplots to display the two spectrograms side by side
fig, ax = plt.subplots(1, 2, figsize=(15, 5))

# Plot the first spectrogram
img1 = librosa.display.specshow(spectrogram_1, sr=sr_1, x_axis='time', y_axis='linear', ax=ax[0])
ax[0].set_title('Spectrogram 1')
plt.colorbar(img1, ax=ax[0], format='%+2.0f dB')

# Plot the second spectrogram
img2 = librosa.display.specshow(spectrogram_2, sr=sr_2, x_axis='time', y_axis='linear', ax=ax[1])
ax[1].set_title('Spectrogram 2')
plt.colorbar(img2, ax=ax[1], format='%+2.0f dB')

# Adjust layout to make it look neat
plt.tight_layout()
plt.show()


## Code for all clusters in one plot randomly selectig two embeddings from each cluster

In [None]:

import librosa
import numpy

print("Librosa version:", librosa.__version__)
print("NumPy version:", numpy.__version__)



In [None]:
import os
import pandas as pd
import random

def get_random_files(cluster_data, audio_directory):
    """
    Extracts two random rows from the given dataframe and returns their filenames and times.

    Args:
    cluster_data (pd.DataFrame): Dataframe containing the cluster data with 'filename' and 'Time' columns.
    audio_directory (str): Path to the directory containing audio files.

    Returns:
    tuple: A tuple containing filenames and times for two random rows 
           (audio_file_1, audio_file_2, time_1, time_2).
    """
    # Ensure the dataframe has at least two rows
    if len(cluster_data) < 2:
        raise ValueError("The dataframe must contain at least two rows.")
    
    # Select two random indices
    random_rows = cluster_data.sample(n=2, random_state=random.randint(0, 1000))

    # Extract filenames and times
    audio_file1 = random_rows.iloc[0]['filename']
    # Ensure the filename does not already have '.WAV' before appending it
    audio_file_1 = os.path.join(audio_directory, audio_file1.split('_')[0] + "_" + audio_file1.split('_')[1] + ".WAV")
    audio_file_1 = audio_file_1.replace('.WAV.WAV', '.WAV')  # Remove any accidental duplicate extensions
    
    audio_file2 = random_rows.iloc[1]['filename']
    audio_file_2 = os.path.join(audio_directory, audio_file2.split('_')[0] + "_" + audio_file2.split('_')[1] + ".WAV")
    audio_file_2 = audio_file_2.replace('.WAV.WAV', '.WAV')  # Remove any accidental duplicate extensions
        
    time_1 = random_rows.iloc[0]['Time']
    time_2 = random_rows.iloc[1]['Time']

    # Ensure files exist
    if not os.path.isfile(audio_file_1):
        raise FileNotFoundError(f"File {audio_file_1} does not exist.")
    if not os.path.isfile(audio_file_2):
        raise FileNotFoundError(f"File {audio_file_2} does not exist.")
    
    return audio_file_1, audio_file_2, time_1, time_2

# Load the data
df = pd.read_csv('clusters_with_time_filename_mini.csv')

# Assuming your 'Audio_Files' directory is in the correct path
audio_directory = "Audio_Files/Audio_extracted/Roch"

# Create an AudioPool_short object (make sure it's defined properly in your code)
pool = AudioPool_short(len_s=0.005)

# Set up the figure
fig, axs = plt.subplots(20, 2, figsize=(20, 50))  # 20 clusters, 2 columns
fig.suptitle("Spectrograms for All Clusters", fontsize=16)

for cluster in range(20):
    # Filter the data for the current cluster
    cluster_data = df[df['Cluster'] == cluster]
    
    # Skip clusters with less than 2 rows
    if len(cluster_data) < 2:
        print(f"Skipping cluster {cluster}: Not enough data.")
        continue

    # Get two random files for the current cluster
    audio_file_1, audio_file_2, time_1, time_2 = get_random_files(cluster_data, audio_directory)

    # Get spectrograms for both audio files
    spectrogram_1, sr_1 = pool.get_spectrogram(audio_file_1, start_s=time_1, duration_s=-1)
    spectrogram_2, sr_2 = pool.get_spectrogram(audio_file_2, start_s=time_2, duration_s=-1)

    # Plot the first spectrogram
    img1 = librosa.display.specshow(spectrogram_1, sr=sr_1, x_axis='time', y_axis='linear', ax=axs[cluster, 0])
    axs[cluster, 0].set_title(f'Cluster {cluster} - Spectrogram 1')
    axs[cluster, 0].set_ylim(0, 20000)
    plt.colorbar(img1, ax=axs[cluster, 0], format='%+2.0f dB')

    # Plot the second spectrogram
    img2 = librosa.display.specshow(spectrogram_2, sr=sr_2, x_axis='time', y_axis='linear', ax=axs[cluster, 1])
    axs[cluster, 1].set_title(f'Cluster {cluster} - Spectrogram 2')
    axs[cluster, 1].set_ylim(0, 20000)  
    plt.colorbar(img2, ax=axs[cluster, 1], format='%+2.0f dB')

# Adjust layout to make it look neat
plt.tight_layout(rect=[0, 0, 1, 0.98])  # Leave space for the title
plt.show()


## Code with audio icon

In [None]:
import os
import pandas as pd
import random
import librosa
import librosa.display
import matplotlib.pyplot as plt
from IPython.display import Audio, display


# Function to load and extract a segment from an audio file
def load_audio_clip(audio_path, start_time, duration_s):
    """
    Load an audio file and extract a segment based on start_time and duration.

    Args:
    audio_path (str): Path to the audio file.
    start_time (float): Start time of the clip in seconds.
    duration_s (float): Duration of the clip in seconds.

    Returns:
    np.array: Extracted audio segment.
    int: Sample rate of the audio.
    """
    y, sr = librosa.load(audio_path, sr=None)  # Load entire audio
    start_sample = int(start_time * sr)  # Convert start time to samples
    if duration_s ==-1:
        return y, sr
    else:
        end_sample = int((start_time + duration_s) * sr)  # Convert duration to samples
        return y[start_sample:end_sample], sr
    

def get_random_files(cluster_data, audio_directory):
    """
    Extracts two random rows from the given dataframe and returns their filenames and times.

    Args:
    cluster_data (pd.DataFrame): Dataframe containing the cluster data with 'filename' and 'Time' columns.
    audio_directory (str): Path to the directory containing audio files.

    Returns:
    tuple: A tuple containing filenames and times for two random rows 
           (audio_file_1, audio_file_2, time_1, time_2).
    """
    if len(cluster_data) < 2:
        raise ValueError("The dataframe must contain at least two rows.")
    
    random_rows = cluster_data.sample(n=2, random_state=random.randint(0, 1000))

    audio_file1 = random_rows.iloc[0]['filename']
    audio_file_1 = os.path.join(audio_directory, audio_file1.split('_')[0] + "_" + audio_file1.split('_')[1] + ".WAV")
    audio_file_1 = audio_file_1.replace('.WAV.WAV', '.WAV')
    
    audio_file2 = random_rows.iloc[1]['filename']
    audio_file_2 = os.path.join(audio_directory, audio_file2.split('_')[0] + "_" + audio_file2.split('_')[1] + ".WAV")
    audio_file_2 = audio_file_2.replace('.WAV.WAV', '.WAV')
        
    time_1 = random_rows.iloc[0]['Time']
    time_2 = random_rows.iloc[1]['Time']

    if not os.path.isfile(audio_file_1):
        raise FileNotFoundError(f"File {audio_file_1} does not exist.")
    if not os.path.isfile(audio_file_2):
        raise FileNotFoundError(f"File {audio_file_2} does not exist.")
    
    return audio_file_1, audio_file_2, time_1, time_2

# Load the data
df = pd.read_csv('clusters_with_time_filename_mini.csv')

audio_directory = "Audio_Files/Audio_extracted/Roch"
clip_duration = 0.005
pool = AudioPool_short(len_s=0.015)

n_plots =2

# Set up the figure
fig, axs = plt.subplots(n_plots, 2, figsize=(20, 50))  # 20 clusters, 2 columns
fig.suptitle("Spectrograms with Audio Players", fontsize=16)

for cluster in range(n_plots):
    cluster_data = df[df['Cluster'] == cluster]
    
    if len(cluster_data) < 2:
        print(f"Skipping cluster {cluster}: Not enough data.")
        continue

    audio_file_1, audio_file_2, time_1_file, time_2_file = get_random_files(cluster_data, audio_directory)

    time_1 = max(0, time_1_file )
    time_2 = max(0, time_2_file )

    spectrogram_1, sr_1 = pool.get_spectrogram(audio_file_1, start_s=time_1, duration_s=-1)
    spectrogram_2, sr_2 = pool.get_spectrogram(audio_file_2, start_s=time_2, duration_s=-1)

    img1 = librosa.display.specshow(spectrogram_1, sr=sr_1, x_axis='time', y_axis='linear', ax=axs[cluster, 0])
    axs[cluster, 0].set_title(f'Cluster {cluster} - {audio_file_1}')
    axs[cluster, 0].set_ylim(0, 20000)
    plt.colorbar(img1, ax=axs[cluster, 0], format='%+2.0f dB')

    img2 = librosa.display.specshow(spectrogram_2, sr=sr_2, x_axis='time', y_axis='linear', ax=axs[cluster, 1])
    axs[cluster, 1].set_title(f'Cluster {cluster} - {audio_file_2}')
    axs[cluster, 1].set_ylim(0, 20000)  
    plt.colorbar(img2, ax=axs[cluster, 1], format='%+2.0f dB')
    print(time_1)
    print(time_2)
      # Load audio clips
    clip_1, sr_clip_1 = load_audio_clip(audio_file_1, start_time=time_1, duration_s=-1)
    clip_2, sr_clip_2 = load_audio_clip(audio_file_2, start_time=time_2, duration_s=-1)

    # Display audio players for extracted clips
    print(f"Audio Clip for Cluster {cluster} - {audio_file_1}:")
    display(Audio(clip_1, rate=sr_clip_1, autoplay=False))

    print(f"Audio Clip for Cluster {cluster} - {audio_file_2}:")
    display(Audio(clip_2, rate=sr_clip_2, autoplay=False))

plt.tight_layout(rect=[0, 0, 1, 0.98])
plt.show()



In [None]:
import os
import pandas as pd
import random
import librosa
import librosa.display
import matplotlib.pyplot as plt
from IPython.display import Audio, display

# Function to load and extract a segment from an audio file
def load_audio_clip(audio_path, start_time, duration_s):
    """
    Load an audio file and extract a segment based on start_time and duration.

    Args:
    audio_path (str): Path to the audio file.
    start_time (float): Start time of the clip in seconds.
    duration_s (float): Duration of the clip in seconds.

    Returns:
    np.array: Extracted audio segment.
    int: Sample rate of the audio.
    """
    y, sr = librosa.load(audio_path, sr=None)  # Load entire audio
    start_sample = int(start_time * sr)  # Convert start time to samples
    end_sample = int((start_time + duration_s) * sr)  # Convert duration to samples
    return y[start_sample:end_sample], sr

# Function to get two random audio files from the cluster
def get_random_files(cluster_data, audio_directory):
    if len(cluster_data) < 2:
        raise ValueError("The dataframe must contain at least two rows.")
    
    random_rows = cluster_data.sample(n=2, random_state=random.randint(0, 1000))

    audio_file1 = random_rows.iloc[0]['filename']
    audio_file_1 = os.path.join(audio_directory, audio_file1.split('_')[0] + "_" + audio_file1.split('_')[1] + ".WAV")
    audio_file_1 = audio_file_1.replace('.WAV.WAV', '.WAV')
    
    audio_file2 = random_rows.iloc[1]['filename']
    audio_file_2 = os.path.join(audio_directory, audio_file2.split('_')[0] + "_" + audio_file2.split('_')[1] + ".WAV")
    audio_file_2 = audio_file_2.replace('.WAV.WAV', '.WAV')
        
    time_1 = random_rows.iloc[0]['Time']
    time_2 = random_rows.iloc[1]['Time']

    if not os.path.isfile(audio_file_1):
        raise FileNotFoundError(f"File {audio_file_1} does not exist.")
    if not os.path.isfile(audio_file_2):
        raise FileNotFoundError(f"File {audio_file_2} does not exist.")
    
    return audio_file_1, audio_file_2, time_1, time_2

# Load the data
df = pd.read_csv('clusters_with_time_filename_mini.csv')

audio_directory = "Audio_Files/Audio_extracted/Roch"
clip_duration = 0.005  # 5ms window before and after the embedding time
extra_time = 0.005  # Extra 5ms for left and right padding

pool = AudioPool_short(len_s=0.015)

n_plots = 2

# Set up the figure
fig, axs = plt.subplots(n_plots, 2, figsize=(20, 10))  # 2 rows (for n_plots clusters), 2 columns
fig.suptitle("Spectrograms with Audio Players", fontsize=16)

for cluster in range(n_plots):
    cluster_data = df[df['Cluster'] == cluster]
    
    if len(cluster_data) < 2:
        print(f"Skipping cluster {cluster}: Not enough data.")
        continue

    audio_file_1, audio_file_2, time_1_file, time_2_file = get_random_files(cluster_data, audio_directory)

    # Adjust start and end times with padding (5ms before and after)
    time_1_start = max(0, time_1_file - extra_time)
    # time_1_end = time_1_file + clip_duration + extra_time
    time_1_end = time_1_start + 0.015  # Ensure 15ms coverage


    time_2_start = max(0, time_2_file - extra_time)
    # time_2_end = time_2_file + clip_duration + extra_time
    time_2_end = time_2_start + 0.015  # Ensure 15ms coverage


    # Generate spectrograms
    spectrogram_1, sr_1 = pool.get_spectrogram(audio_file_1, start_s=time_1_start, duration_s=time_1_end - time_1_start+0.001)
    spectrogram_2, sr_2 = pool.get_spectrogram(audio_file_2, start_s=time_2_start, duration_s=time_2_end - time_2_start+0.001)

    # Plot spectrogram 1
    img1 = librosa.display.specshow(spectrogram_1, sr=sr_1, x_axis='time', y_axis='linear', ax=axs[cluster, 0])
    axs[cluster, 0].set_title(f'Cluster {cluster} - {audio_file_1}')
    axs[cluster, 0].set_ylim(0, 20000)
    plt.colorbar(img1, ax=axs[cluster, 0], format='%+2.0f dB')

    # Draw vertical lines for embedding time duration
    axs[cluster, 0].axvline(x=time_1_file - time_1_start, color='g', linestyle='--', label="Embedding Start")
    axs[cluster, 0].axvline(x=time_1_end - time_1_file, color='g', linestyle='-.', label="Embedding End")
    axs[cluster, 0].legend()

    # Plot spectrogram 2
    img2 = librosa.display.specshow(spectrogram_2, sr=sr_2, x_axis='time', y_axis='linear', ax=axs[cluster, 1])
    axs[cluster, 1].set_title(f'Cluster {cluster} - {audio_file_2}')
    axs[cluster, 1].set_ylim(0, 20000)  
    # axs[cluster, 1].set_xlim(time_1_start, time_1_end)  
    plt.colorbar(img2, ax=axs[cluster, 1], format='%+2.0f dB')

    # Draw vertical lines for embedding time duration
    axs[cluster, 1].axvline(x=time_2_file - time_2_start, color='g', linestyle='--', label="Embedding Start")
    axs[cluster, 1].axvline(x=time_2_end - time_2_file, color='g', linestyle='-.', label="Embedding End")
    axs[cluster, 1].legend()

    print(f"Embedding Time Range for {audio_file_1}: {time_1_file} to {time_1_end}")
    print(f"Embedding Time Range for {audio_file_2}: {time_2_file} to {time_2_end}")

    # Load and display audio clips
    clip_1, sr_clip_1 = load_audio_clip(audio_file_1, start_time=time_1_start, duration_s=(time_1_end - time_1_start))
    clip_2, sr_clip_2 = load_audio_clip(audio_file_2, start_time=time_2_start, duration_s=(time_2_end - time_2_start))

    print(f"Audio Clip for Cluster {cluster} - {audio_file_1}:")
    display(Audio(clip_1, rate=sr_clip_1, autoplay=False))

    print(f"Audio Clip for Cluster {cluster} - {audio_file_2}:")
    display(Audio(clip_2, rate=sr_clip_2, autoplay=False))

plt.tight_layout(rect=[0, 0, 1, 0.98])
plt.show()
