In [4]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.widgets import SpanSelector
import librosa
import librosa.display
from scipy import signal
import soundfile as sf

In [5]:
def plot_spectrogram(audio_file, window_size=2048, hop_length=None, 
                    overlap_ratio=0.75, colormap='jet', figsize=(12, 8)):
    """
    Plot an interactive spectrogram of an audio file with zoom capabilities.
    
    Parameters:
    -----------
    audio_file : str
        Path to the audio file (.wav, .mp3, .flac, etc.)
    window_size : int, default=2048
        FFT window size (number of samples)
    hop_length : int, optional
        Number of samples between successive frames. If None, uses window_size * (1 - overlap_ratio)
    overlap_ratio : float, default=0.75
        Overlap ratio between windows (0-1). Only used if hop_length is None
    colormap : str, default='jet'
        Matplotlib colormap name
    figsize : tuple, default=(12, 8)
        Figure size (width, height) in inches
        
    Returns:
    --------
    fig : matplotlib figure
        Figure object containing the spectrogram
    """
    
    # Load audio file
    try:
        # Try with librosa first (handles more formats)
        y, sr = librosa.load(audio_file, sr=None)
        print(f"Loaded audio: {len(y)} samples, {sr} Hz sampling rate")
    except:
        # Fallback to soundfile
        y, sr = sf.read(audio_file)
        print(f"Loaded audio: {len(y)} samples, {sr} Hz sampling rate")
    
    # Calculate hop_length if not provided
    if hop_length is None:
        hop_length = int(window_size * (1 - overlap_ratio))
    
    # Compute spectrogram using scipy for more control
    frequencies, times, Sxx = signal.spectrogram(
        y, 
        fs=sr,
        window='hann',
        nperseg=window_size,
        noverlap=window_size - hop_length,
        scaling='density'
    )
    
    # Convert to dB scale
    Sxx_db = 10 * np.log10(Sxx + 1e-10)  # Add small epsilon to avoid log(0)
    
    # Create figure and axis
    fig, ax = plt.subplots(figsize=figsize)
    
    # Plot spectrogram
    im = ax.pcolormesh(times, frequencies, Sxx_db, 
                       shading='gouraud', cmap=colormap)
    
    # Set labels and title
    ax.set_xlabel('Time (s)')
    ax.set_ylabel('Frequency (Hz)')
    ax.set_title(f'Spectrogram - {audio_file}\n'
                f'Window: {window_size}, Hop: {hop_length}, Overlap: {overlap_ratio:.1%}')
    
    # Add colorbar
    cbar = plt.colorbar(im, ax=ax)
    cbar.set_label('Power Spectral Density (dB/Hz)')
    
    # Store original limits for reset functionality
    original_xlim = ax.get_xlim()
    original_ylim = ax.get_ylim()
    
    # Add zoom functionality
    class SpectrogramZoom:
        def __init__(self, ax, original_xlim, original_ylim):
            self.ax = ax
            self.original_xlim = original_xlim
            self.original_ylim = original_ylim
            self.zoom_stack = []
            
        def on_select_x(self, xmin, xmax):
            """Handle horizontal zoom selection"""
            if abs(xmax - xmin) > 0.01:  # Minimum zoom threshold
                current_ylim = self.ax.get_ylim()
                self.zoom_stack.append((self.ax.get_xlim(), current_ylim))
                self.ax.set_xlim(xmin, xmax)
                plt.draw()
                
        def on_select_y(self, ymin, ymax):
            """Handle vertical zoom selection"""
            if abs(ymax - ymin) > 10:  # Minimum zoom threshold
                current_xlim = self.ax.get_xlim()
                self.zoom_stack.append((current_xlim, self.ax.get_ylim()))
                self.ax.set_ylim(ymin, ymax)
                plt.draw()
                
        def reset_zoom(self):
            """Reset to original view"""
            self.ax.set_xlim(self.original_xlim)
            self.ax.set_ylim(self.original_ylim)
            self.zoom_stack.clear()
            plt.draw()
            
        def zoom_back(self):
            """Go back one zoom level"""
            if self.zoom_stack:
                xlim, ylim = self.zoom_stack.pop()
                self.ax.set_xlim(xlim)
                self.ax.set_ylim(ylim)
                plt.draw()
    
    # Create zoom handler
    zoom_handler = SpectrogramZoom(ax, original_xlim, original_ylim)
    
    # Add span selectors for zoom
    span_x = SpanSelector(ax, zoom_handler.on_select_x, 'horizontal',
                         useblit=True, rectprops=dict(alpha=0.3, facecolor='red'))
    span_y = SpanSelector(ax, zoom_handler.on_select_y, 'vertical',
                         useblit=True, rectprops=dict(alpha=0.3, facecolor='blue'))
    
    # Key event handler for shortcuts
    def on_key(event):
        if event.key == 'r':
            zoom_handler.reset_zoom()
        elif event.key == 'backspace':
            zoom_handler.zoom_back()
        elif event.key == 'h':
            print("\nKeyboard shortcuts:")
            print("- Click and drag horizontally (red) to zoom in time")
            print("- Click and drag vertically (blue) to zoom in frequency")  
            print("- Press 'r' to reset zoom")
            print("- Press 'backspace' to go back one zoom level")
            print("- Press 'h' to show this help")
    
    # Connect key events
    fig.canvas.mpl_connect('key_press_event', on_key)
    
    # Add instructions
    fig.text(0.02, 0.02, "Drag horizontally/vertically to zoom | 'r'=reset | 'backspace'=back | 'h'=help", 
             fontsize=9, alpha=0.7)
    
    plt.tight_layout()
    
    # Print initial help
    print("\nInteractive Spectrogram Controls:")
    print("- Click and drag horizontally (red overlay) to zoom in time")
    print("- Click and drag vertically (blue overlay) to zoom in frequency")
    print("- Press 'r' to reset zoom to original view")
    print("- Press 'backspace' to go back one zoom level")
    print("- Press 'h' anytime for help")
    
    return fig

In [6]:
# Example usage and additional utility functions
def compare_window_sizes(audio_file, window_sizes=[512, 1024, 2048, 4096], 
                        colormap='jet'):
    """
    Compare spectrograms with different window sizes side by side.
    
    Parameters:
    -----------
    audio_file : str
        Path to the audio file
    window_sizes : list of int
        List of window sizes to compare
    colormap : str
        Colormap to use
    """
    
    # Load audio
    y, sr = librosa.load(audio_file, sr=None)
    
    # Create subplots
    fig, axes = plt.subplots(2, 2, figsize=(15, 10))
    axes = axes.ravel()
    
    for i, window_size in enumerate(window_sizes[:4]):
        # Compute spectrogram
        hop_length = window_size // 4
        frequencies, times, Sxx = signal.spectrogram(
            y, fs=sr, window='hann', nperseg=window_size, 
            noverlap=window_size - hop_length, scaling='density'
        )
        Sxx_db = 10 * np.log10(Sxx + 1e-10)
        
        # Plot
        im = axes[i].pcolormesh(times, frequencies, Sxx_db, 
                               shading='gouraud', cmap=colormap)
        axes[i].set_title(f'Window Size: {window_size}')
        axes[i].set_xlabel('Time (s)')
        axes[i].set_ylabel('Frequency (Hz)')
        
        # Add colorbar
        plt.colorbar(im, ax=axes[i])
    
    plt.tight_layout()
    return fig

In [8]:
# Example usage
if __name__ == "__main__":
    # Example: plot spectrogram with default settings
    fig = plot_spectrogram('/data/scooter_example_1.wav')
    plt.show()
    
    # Example: plot with custom parameters
    # fig = plot_spectrogram('your_audio_file.wav', 
    #                       window_size=4096, 
    #                       overlap_ratio=0.8,
    #                       colormap='viridis')
    # plt.show()
    
    # Example: compare different window sizes
    # fig = compare_window_sizes('your_audio_file.wav')
    # plt.show()
    
    print("Spectrogram functions ready!")
    print("Use plot_spectrogram('your_file.wav') to create an interactive spectrogram")
    print("Use compare_window_sizes('your_file.wav') to compare different window sizes")

LibsndfileError: Error opening '/data/scooter_example_1.wav': System error.