In [None]:
import math
import pandas as pd
import numpy as np
import librosa
import librosa.display
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle
from scipy.io import wavfile
from collections import OrderedDict
from tqdm import tqdm

In [None]:
left_col, right_col = "Begin Time (s)", "End Time (s)"
top_col, bot_col = "High Freq (Hz)", "Low Freq (Hz)"

In [None]:
recording_name = "671658014.180929033558"
wav_name = "../data/{}_norm_8k-resample.wav".format(recording_name)
initials = "JW"

annotations = pd.read_csv("../data/{}-{}-Raven_fixes.txt".format(recording_name, initials), sep="\t")
annotations["Species+Index"] = annotations["Species"].str.cat(annotations.index.astype(str))
annotations

In [None]:
annotations = annotations.loc[annotations["View"] == "Spectrogram 1"]
annotations = annotations.drop("Species+Index", axis=1)

In [None]:
# Visualize Distribution of Box Heights and Widths
def visualize_dimensions(annotations, bins=100):
    annotation_heights = annotations[top_col] - annotations[bot_col]
    annotation_widths = annotations[right_col] - annotations[left_col]
    plt.hist2d(annotation_widths, annotation_heights, bins=bins)
    plt.title("Distribution of Box Dimensions")
    plt.xlabel("Box Width (s)")
    plt.ylabel("Box Height (Hz)")
    plt.show()
    plt.close()
    
visualize_dimensions(annotations)

In [None]:
# TODO: measure speed of different fns for opening wav files
def read_wavfile(wav_name, normalize=True, verbose=False):
    if verbose:
        print("Reading {}".format(wav_name))
    sr, data = wavfile.read(wav_name)
    if verbose:
        print("{} samples at {} samples/sec --> {} seconds".format(data.shape[0], sr, data.shape[0]/sr))

    if normalize:
        data = data.astype(float)
        data = data - data.min()
        data = data / data.max()
        data = data - 0.5
    
    return sr, data

samplerate, data = read_wavfile(wav_name, verbose=True)

In [None]:
def plot_annotated_mel_spec(data, samplerate, annotations, cls_col=None, bounds=None, n_fft=1200, hop_length=20,
                            n_mels=400, fmax=1600, adjust_fmax=True, figsize=(15, 5), buffer_s=0.125,
                            title=None):
    # Extract annotation bounds
    if bounds is None:
        start_s = annotations[left_col].min() - buffer_s
        end_s = annotations[right_col].max() + buffer_s
    else:
        start_s, end_s = bounds
    start_s, end_s = max(start_s, 0.0), min(end_s, len(data)/samplerate)
    observed_max = annotations[top_col].max()
    if adjust_fmax and observed_max > fmax:
        new_fmax = observed_max*1.1
        print("Annotations extend above frequency max of {} Hz, increasing to {:g} Hz.".format(fmax, new_fmax))
        fmax = new_fmax
    shift = math.ceil(n_fft/2)
    start_i, end_i = int(math.floor(start_s*samplerate) - shift), int(math.ceil(end_s*samplerate) + shift)
    if start_i < 0:
        print("Start Index < 0! Setting to 0 instead.")
        start_i = 0
        start_s = (start_i + shift) / samplerate
    if end_i >= len(data):
        print("End Index > length of sequence. Setting to end of sequence instead.")
        end_i = len(data)-1
        end_s = (end_i - shift) / samplerate
    
    # Compute & Draw Mel Spectrogram
    mel_spec = librosa.feature.melspectrogram(y=data[start_i:end_i],
                                              sr=samplerate,
                                              n_fft=n_fft,
                                              hop_length=hop_length,
                                              n_mels=n_mels,
                                              fmax=fmax,
                                              center=False)
    S_dB = librosa.power_to_db(mel_spec, ref=np.max)
    plt.figure(figsize=figsize)
    librosa.display.specshow(S_dB,
                             x_axis='time',
                             y_axis='mel',
                             sr=samplerate,
                             hop_length=hop_length,
                             fmax=fmax)
    
    # Draw Annotations
    ax = plt.gca()
    if cls_col is not None:
        classes = annotations[cls_col].unique()
    else:
        classes = ["NA"]
    colors = plt.cm.get_cmap("hsv")
    class_colors = {classes[c]: colors(c / (len(classes)+1)) for c in range(len(classes))}
    for b_i in annotations.index:
        box = annotations.loc[b_i]
        left, right, top, bot = box[left_col], box[right_col], \
                                box[top_col], max(box[bot_col], 5)
        if cls_col is not None:
            cls = box[cls_col]
        else:
            cls = "NA"
        
        rect = Rectangle((left - start_s, bot), # X,Y of bottom left
                         right-left, # Width
                         top-bot, # Height
                         linewidth=2,
                         edgecolor=class_colors[cls],
                         facecolor='none',
                         label=cls)
        ax.add_patch(rect)
    
    # Decorate Plot
    y_ticks = [64]
    while y_ticks[-1] * 2 < fmax:
        y_ticks.append(y_ticks[-1] * 2)
    x_ticks = np.linspace(0.0, end_s - start_s, num=15)
    x_tick_labels = ["{:.3f}".format(t) for t in (x_ticks+start_s)]
    plt.xticks(x_ticks, x_tick_labels)
    plt.yticks(y_ticks)
    plt.xlim([0.0, end_s - start_s])
    plt.xlabel("Time (Seconds)")
    plt.ylabel("Frequency (Hz)")
    if title is None:
        plt.title("Mel Spectrogram")
    else:
        plt.title("Mel Spectrogram ({})".format(title))
    if cls_col is not None:
        handles, labels = plt.gca().get_legend_handles_labels()
        by_label = OrderedDict(zip(labels, handles))
        plt.legend(by_label.values(), by_label.keys(), loc='upper right')
    plt.show()
    plt.close()

In [None]:
# Scans through all annotations and visualizes in 30-second blocks
def visualize_all_annotations(annotations, data, samplerate,
                              block_size=30, step_size=20, n_fft=4096, hop_length=64,
                              n_mels=512, fmax=1600, adjust_fmax=True, figsize=(15, 5)):
    file_length = len(data) / samplerate
    start = 0
    while start < file_length:
        mask = ~((annotations[left_col] > start+block_size) | (annotations[right_col] < start))
        plot_annotated_mel_spec(data, samplerate,
                                annotations.loc[mask],
                                bounds=[start, start+block_size],
                                buffer_s=0.0,
                                cls_col="Species+Index")
        start += step_size

In [None]:
visualize_all_annotations(annotations, data, samplerate)