In [None]:
import math
import pandas as pd
import numpy as np
import librosa
import librosa.display
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle
from scipy.io import wavfile
from collections import OrderedDict
from tqdm import tqdm

In [None]:
recording_name = "671658014.180929033558"
wav_name = "../data/{}_norm_8k-resample.wav".format(recording_name)
initials = ["AW","MS"]
selection_tables = ["../data/{}-{}.txt".format(recording_name, ins) for ins in initials]
selection_tables = [pd.read_csv(p, sep="\t").drop(["Selection", "View", "Channel"], axis=1) for p in selection_tables]
for i in range(len(selection_tables)):
    selection_tables[i]["Source"] = initials[i]
    
all_annotations = pd.concat(selection_tables, ignore_index=True)
all_annotations["Species Confidence"] = all_annotations["Species"].str.strip().str[-1]
all_annotations.loc[all_annotations["Species Confidence"].isna(), "Species Confidence"] = "5"
has_conf = all_annotations["Species Confidence"].apply(lambda x: x.isdigit())
all_annotations.loc[has_conf, "Species"] = all_annotations.loc[has_conf, "Species"].str[:-1].str.strip().str.lower()
all_annotations.loc[~has_conf, "Species Confidence"] = "5"
all_annotations["Species Confidence"] = all_annotations["Species Confidence"].astype(int)
all_annotations

In [None]:
all_annotations = all_annotations.loc[all_annotations["species"].isna()]
all_annotations = all_annotations.drop("species", axis=1)

In [None]:
all_annotations = all_annotations.loc[all_annotations["Uncertainty"].isna()]
all_annotations = all_annotations.drop("Uncertainty", axis=1)

In [None]:
all_annotations.loc[all_annotations["Species"].isna(), "Species"] = "hb whale"

In [None]:
all_annotations["Species"].value_counts(dropna=False)

In [None]:
species_cleaning_dict = {
"hb whale": "Humpback Whale",
"hb" : "Humpback Whale",
"mechanical": "Mechanical",
"??": "Unknown",
"?": "Unknown",
"sea lion": "Sea Lion",
"unknown": "Unknown",
"mech.": "Mechanical",
"hb whale": "Humpback Whale",
"hb whale 5": "Humpback Whale",
"unkown": "Unknown",
"mechanic": "Mechanical",
"?hb whale": "Humpback Whale",
"helicopter/plane": "Mechanical",
"mech": "Mechanical",
"hb whale5": "Humpback Whale",
"hb whale 3": "Humpback Whale",
"hb": "Humpback Whale",
"hbwhale": "Humpback Whale",
" hb whale": "Humpback Whale",
"hb whael": "Humpback Whale",
"boat": "Mechanical",
}
all_annotations["Species"] = all_annotations["Species"].map(species_cleaning_dict)
all_annotations["Species"].value_counts(dropna=False)

In [None]:
# TODO: measure speed of different fns for opening wav files
def read_wavfile(wav_name, normalize=True, verbose=False):
    if verbose:
        print("Reading {}".format(wav_name))
    sr, data = wavfile.read(wav_name)
    if verbose:
        print("{} samples at {} samples/sec --> {} seconds".format(data.shape[0], sr, data.shape[0]/sr))

    if normalize:
        data = data.astype(float)
        data = data - data.min()
        data = data / data.max()
        data = data - 0.5
    
    return sr, data

samplerate, data = read_wavfile(wav_name, verbose=True)

In [None]:
def plot_annotated_mel_spec(data, samplerate, annotations, cls_col=None, n_fft=4096, hop_length=64,
                            n_mels=512, fmax=1600, adjust_fmax=True, figsize=(15, 5), buffer_s=0.125,
                            title=None):
    # Extract annotation bounds
    start_s, end_s = annotations["Begin Time (s)"].min() - buffer_s, annotations["End Time (s)"].max() + buffer_s
    start_s, end_s = max(start_s, 0.0), min(end_s, len(data)/samplerate)
    observed_max = annotations["High Freq (Hz)"].max()
    if adjust_fmax and observed_max > fmax:
        new_fmax = observed_max*1.1
        print("Annotations extend above frequency max of {} Hz, increasing to {:g} Hz.".format(fmax, new_fmax))
        fmax = new_fmax
    start_i, end_i = int(math.floor(start_s*samplerate) - n_fft/2), int(math.ceil(end_s*samplerate) + n_fft/2)
    if start_i < 0:
        print("Start Index < 0! Setting to 0 instead.")
        start_i = 0
        start_s = (start_i + n_fft/2) / samplerate
    if end_i >= len(data):
        print("End Index > length of sequence. Setting to end of sequence instead.")
        end_i = len(data)-1
        end_s = (end_i - n_fft/2) / samplerate
    
    # Compute & Draw Mel Spectrogram
    mel_spec = librosa.feature.melspectrogram(y=data[start_i:end_i],
                                              sr=samplerate,
                                              n_fft=n_fft,
                                              hop_length=hop_length,
                                              n_mels=n_mels,
                                              fmax=fmax,
                                              center=False)
    S_dB = librosa.power_to_db(mel_spec, ref=np.max)
    plt.figure(figsize=figsize)
    librosa.display.specshow(S_dB,
                             x_axis='time',
                             y_axis='mel',
                             sr=samplerate,
                             hop_length=hop_length,
                             fmax=fmax)
    
    # Draw Annotations
    ax = plt.gca()
    if cls_col is not None:
        classes = annotations[cls_col].unique()
    else:
        classes = ["NA"]
    colors = plt.cm.get_cmap("hsv")
    class_colors = {classes[c]: colors(c / (len(classes)+1)) for c in range(len(classes))}
    for b_i in annotations.index:
        box = annotations.loc[b_i]
        left, right, top, bot = box["Begin Time (s)"], box["End Time (s)"], \
                                box["High Freq (Hz)"], max(box["Low Freq (Hz)"], 5)
        if cls_col is not None:
            cls = box[cls_col]
        else:
            cls = "NA"
        
        rect = Rectangle((left - start_s, bot), # X,Y of bottom left
                         right-left, # Width
                         top-bot, # Height
                         linewidth=2,
                         edgecolor=class_colors[cls],
                         facecolor='none',
                         label=cls)
        ax.add_patch(rect)
    
    # Decorate Plot
    x_ticks = np.linspace(0.0, end_s - start_s, num=5)
    x_tick_labels = ["{:.3f}".format(t) for t in (x_ticks+start_s)]
    plt.xticks(x_ticks, x_tick_labels)
    plt.xlabel("Time (Seconds)")
    plt.ylabel("Frequency (Hz)")
    if title is None:
        plt.title("Mel Spectrogram")
    else:
        plt.title("Mel Spectrogram ({})".format(title))
    if cls_col is not None:
        handles, labels = plt.gca().get_legend_handles_labels()
        by_label = OrderedDict(zip(labels, handles))
        plt.legend(by_label.values(), by_label.keys(), loc='upper right')
    plt.show()
    plt.close()

In [None]:
for start in [15*s for s in range(4)]:
    mask = (all_annotations["End Time (s)"] > start) & (all_annotations["Begin Time (s)"] < start+30)
    n_boxes = mask.sum()
    if n_boxes > 0:
        print("Found {} annotations between {} and {} seconds.".format(mask.sum(), start, start+30))
        plot_annotated_mel_spec(data, samplerate,
                                all_annotations.loc[mask],
                                figsize=(15,5),
                                buffer_s=3.0,
                                cls_col="Source")

In [None]:
# Also called "Jaccard Index"
def IOU(box1, box2):
    # (left, right, top, bottom) is the box order
    l1, r1, t1, b1 = box1
    l2, r2, t2, b2 = box2
    
    # Quick check if boxes do not overlap
    # Time dimension (r/l) checked first since it is more likely to filter
    if r1 < l2 or r2 < l1 or t1 < b2 or t2 < b1:
        return 0.0
    
    # IOU Calculation
    intersection_area = (min(r1, r2) - max(l1, l2)) * (min(t1, t2) - max(b1, b2))
    union_area = (r1 - l1) * (t1 - b1) + (r2 - l2) * (t2 - b2) - intersection_area
    
    return intersection_area / union_area

In [None]:
annotations_to_merge = all_annotations.loc[all_annotations["Species"].isin(["Humpback Whale", "Sea Lion"])]

In [None]:
def calculate_agreements(annotations, verbose=True):
    agreements = np.zeros(shape=(len(annotations), len(annotations)))
    iter1 = range(len(annotations))
    if verbose:
        iter1 = tqdm(iter1, desc='Calculating Agreements')
    for i1 in iter1:
        for i2 in range(i1+1, len(annotations)):
            a1, a2 = annotations.iloc[i1], annotations.iloc[i2]
            # Left, Right, Top, Bottom
            agreements[i1, i2] = IOU((a1["Begin Time (s)"],
                                      a1["End Time (s)"],
                                      a1["High Freq (Hz)"],
                                      a1["Low Freq (Hz)"]),
                                     (a2["Begin Time (s)"],
                                      a2["End Time (s)"],
                                      a2["High Freq (Hz)"],
                                      a2["Low Freq (Hz)"]))
            agreements[i2, i1] = agreements[i1, i2]
    return agreements


def extract_trivial_annotations(annotations, agreements=None, thresh=0.02, verbose=True):
    if agreements is None:
        agreements = calculate_agreements(annotations)
        
    trivials = (agreements <= thresh).all(axis=1)
    
    if verbose:
        print("N Trivial: ", trivials.sum())
        print("% Trivial :", (trivials.sum())/agreements.shape[0])
    
    done_annotations = annotations.loc[trivials]
    remaining_agreements = agreements[~trivials,:][:,~trivials]
    remaining_annotations = annotations.loc[~trivials]
    
    if verbose:
        print("Remaining Annotations: ", len(remaining_annotations))
    
    return done_annotations, remaining_annotations, remaining_agreements


def extract_merged_annotations(annotations, class_col, agreements=None, thresh=0.55, verbose=True):
    if agreements is None:
        agreements = calculate_agreements(annotations)
    
    classes = annotations[class_col].to_numpy()
    pairings = ((agreements == agreements.max(axis=1))
                & (agreements > thresh)
                & (classes.reshape((1,-1)) == classes.reshape((-1,1))))
    matches =  np.triu(pairings & pairings.T, 0)
    
    if verbose:
        print("N Pairings: ", pairings.sum()//2, "N Matches: ", matches.sum())
        print("% Matched :", (matches.sum())/agreements.shape[0])
    
    matched_boxes = []
    for b_i, b_j in zip(*np.nonzero(matches)):
        box_i, box_j = annotations.iloc[b_i], annotations.iloc[b_j]
        new_box = box_i.copy()
        for c in ["Begin Time (s)", "End Time (s)", "High Freq (Hz)", "Low Freq (Hz)", "Species Confidence"]:
            new_box[c] = (box_i[c] + box_j[c]) / 2.0
        new_box["Source"] = "Merged"
        matched_boxes.append(new_box)
    done_annotations = pd.DataFrame(matched_boxes)
    matched_mask =  (matches | matches.T).any(axis=1)
    remaining_annotations = annotations[~matched_mask]
    
    if verbose:
        print("Remaining Annotations: ", len(remaining_annotations))
    
    return done_annotations, remaining_annotations, None


def extract_aggregate_annotations(annotations, agreements=None, ratio_thresh=2.0, verbose=True):
    if agreements is None:
        agreements = calculate_agreements(annotations)
    
    areas = np.zeros(shape=(len(annotations)))
    for i1 in tqdm(range(len(annotations))):
        a1 = annotations.iloc[i1]
        areas[i1] = (a1["End Time (s)"]-a1["Begin Time (s)"])*(a1["High Freq (Hz)"]-a1["Low Freq (Hz)"])
        
    # area_ratios[i,j] = areas[i] / areas[j]
    area_ratios = areas.reshape((-1, 1)) / areas.reshape((1, -1))
    
    # Aggregates (i big)
    aggregate_boxes = ((area_ratios > ratio_thresh) & (agreements > 0.0)).sum(axis=1) > 2
    
    if verbose:
        print("N Aggregators: ", aggregate_boxes.sum())
        total = aggregate_boxes.sum()
        print("N Total: ", total)
        print("% Total: ", total/agreements.shape[0])
    
    remaining_agreements = agreements[~aggregate_boxes,:][:,~aggregate_boxes]
    remaining_annotations = annotations[~aggregate_boxes]
    
    if verbose:
        print("Remaining Annotations: ", len(remaining_annotations))
        
    return None, remaining_annotations, remaining_agreements

In [None]:
done_boxes = []

# 1. Pass along trivial boxes so that future steps don't need to consider them
done, rem_annotations, rem_agreements = extract_trivial_annotations(annotations_to_merge)
done_boxes.append(done)

In [None]:
# 2. Merge matched boxed (Union vs Mean strategies) -- currently using mean strat
done, rem_annotations, _ = extract_merged_annotations(rem_annotations, "Species", agreements=rem_agreements)
done_boxes.append(done)

In [None]:
# 3. Detect aggregate boxes and remove them.
_, rem_annotations, rem_agreements = extract_aggregate_annotations(rem_annotations)

In [None]:
# 4. Finally check whether new trivials have been revealed
done, rem_annotations, rem_agreements = extract_trivial_annotations(rem_annotations,
                                                                    agreements=rem_agreements,
                                                                    thresh=0.04)
done_boxes.append(done)

In [None]:
done_boxes = pd.concat(done_boxes)

In [None]:
print("Produced {} done annotations".format(len(done_boxes)))
print("Handled {} of {} initial annotations ({:.2f}%)".format(
    len(annotations_to_merge)-len(rem_annotations),
    len(annotations_to_merge),
    100*(len(annotations_to_merge)-len(rem_annotations)) / len(annotations_to_merge)))

In [None]:
# Visualize all of the remaining boxes and manually merge
rem_annotations["Index+Class"] = rem_annotations["Species"].str.cat(rem_annotations.index.astype(str))
for i in range(len(rem_annotations)):
    tmp = rem_annotations.iloc[i]
    l_edge, r_edge = tmp["Begin Time (s)"], tmp["End Time (s)"]
    buffer = 2.0
    mask = (rem_annotations["End Time (s)"] > (l_edge-buffer)) & (rem_annotations["Begin Time (s)"] < (r_edge+buffer))
    plot_annotated_mel_spec(data, samplerate,
                            rem_annotations.loc[mask],
                            figsize=(10,5),
                            buffer_s=1.5,
                            cls_col="Index+Class",
                            adjust_fmax=False,
                            title=tmp.name)

In [None]:
# Instructions
# INSTR, ID_1[, ID_2, ..., ID_N]
# r --> remove
# i --> intersection
# u --> union
# m --> mean
# c -->create new box

def validate_instructions(instructions):
    all_nums = []
    for i in instructions:
        words = i.split(",")
        if words[0] == "c":
            continue
        nums = words[1:]
        all_nums.extend([int(n) for n in nums])
    n_occurences = pd.Series(all_nums).value_counts()
    return n_occurences.loc[(n_occurences > 1)]

instructions = [
"r,1159",
"i,10,1163",
"r,14",
"r,1196",
"r,39",
"r,879",
"r,46",
"r,48",
"r,1100",
"r,49",
"r,1102",
"m,55,1249",
"r,1264",
"r,1265",
"r,74",
"r,1268",
"r,1269",
"m,82,1275",
"r,1280",
"r,88",
"m,96,1289",
"r,1292",
"m,105,1296",
"r,1298",
"u,111,1300",
"m,134,1318",
"m,140,1324",
"r,1330",
"r,1336",
"r,1341",
"r,1342",
"m,185,1363",
"u,187,1365",
"u,188,1366",
"r,189",
"m,194,1369",
"u,196,1370",
"r,1371",
"r,1372",
"r,1373",
"m,924,1374",
"r,1375",
"m,204,1376",
"u,206,1380",
"m,207,1381",
"r,214",
"r,1399",
"m,223,1403",
"r,1404",
"r,1405",
"m,228,1409",
"r,233",
"m,239,1419",
"r,245",
"r,259",
"r,260",
"r,1445",
"u,283,1454",
"m,289,1464",
"r,1474",
"m,299,1475",
"r,301",
"m,322,1497",
"u,324,1498",
"u,329,1504",
"u,331,1506",
"u,332,1507",
"r,333",
"r,336",
"r,337",
"u,340,341,1524",
"m,344,1528",
"u,345,1529",
"u,346,1530",
"u,351,1536",
"u,359,1541",
"m,372,1544",
"u,373,1545",
"r,1548",
"u,378,1551",
"u,1038,1550",
"r,1552",
"u,383,1556",
"u,1040,1557",
"u,384,1558",
"m,386,1561",
"u,390,1564",
"u,392,1565",
"u,393,1566",
"r,395",
"r,401",
"r,1573",
"u,409,1579",
"r,1589",
"r,1590",
"r,1595",
"r,1598",
"r,1614",
"m,429,1617",
"r,1618",
"u,431,1619", # Consider lowering bottom to 0Hz
"r,1623",
"r,1627",
"r,1635",
"r,1639",
"r,1640",
"r,1643",
"u,460,1674",
"u,464,1677",
"m,473,1686",
"r,1689",
"u,477,1690", # Consider dropping bottom to 0Hz
"r,1692",
"r,1694",
"m,483,1696",
"u,485,1698",
"u,486,1699",
"r,1704",
"m,492,1706",
"r,1707",
"u,496,1073,1710",
"r,1713",
"r,1716",
"r,506",
"r,1722",
"u,511,1723",
"u,520,1727",
"r,1729",
"r,1732",
"r,1733",
"u,531,1735",
"r,1736",
"r,1737",
"r,1738",
"u,544,1747", # Consider dropping bottom to 0Hz
"u,546,1749",
"u,548,1751", # Consider dropping bottom to 0Hz
"r,549",
"r,553",
"m,556,1765",
"u,558,1779",
"u,559,1783",
"r,1788",
"r,563",
"c,6778.01,6780.9,256,0.0",# l,r,t,b
"r,1791",
"u,570,1796",
"u,571,1797",
"m,573,1799",
"u,575,1800",
"r,576",
"r,1801",
"c,6939.5,6942.7,460,0.0", # l,r,t,b
"r,1802",
"r,1803",
"r,1814",
"r,1815",
"u,610,1817",
"u,620,1827",
"u,625,1830",
"u,626,1833",
"u,629,1836",
"r,631",
"u,634,1842",
"r,1847",
"r,1849",
"r,1857",
"u,658,1859",
"u,660,1861",
"r,1863",
"r,1864",
"r,1865",
"m,666,1866",
"u,670,1869",
"u,677,1873",
"u,678,1874",
"u,679,1875",
"m,681,1877",
"r,682",
"u,689,1882",
"u,690,1883",
"r,1890",
"r,1893",
"r,1894",
"u,701,1897,1898",
"u,703,1900",
"u,705,1888",
"u,712,1903",
"r,1904",
"r,1905",
"u,722,1906",
"r,1911",
"u,757,1913",
"u,769,1917", # Consider dropping bottom to 0Hz
"r,1918",
"m,777,1922",
"r,1923",
"r,779",
"r,780",
"m,781,1928",
"r,1929",
"r,1930",
"r,1931",
"r,1932",
"r,1934",
"u,797,1938",
"u,799,1939",
"u,800,1940",
"r,802",
"u,803,1946",
"u,805,1948",
"u,807,1950",
"u,810,1953",
"u,824,1960",
"u,825,1961",
"m,827,1964",
"r,1965",
"u,834,1967",
"u,847,1980",
"u,848,1981",
"r,1982",
"u,851,1984",
"m,863,1150",
"r,865",
"u,866,1172",
"u,938,1448",
"u,963,1502",
"u,994,1509",
"u,995,1511",
"u,997,1513",
"u,1056,1601",
"u,1057,1602",
"r,1603",
"u,1067,1615",
"u,1070,1662",
"u,1075,1715",
"u,1076,1726",
"m,1089,1205",
"r,1090",
"r,1222",
"r,1223",
"r,1227",
"r,1235",
"u,1116,1637",
"u,1117,1638",
"r,1121",
"m,1122,1645",
"u,1124,1657",
"r,1128",
"m,1134,1767",
"m,1138,1772",
"r,1792"
]

validate_instructions(instructions)

In [None]:
def execute_instructions(instructions, rem_annotations):
    to_drop = np.zeros(len(rem_annotations), dtype=bool)
    new_boxes = []
    for i in instructions:
        words = i.split(",")
        code = words[0]
        parameters = words[1:]
        if code == "r":
            for n in parameters:
                to_drop[rem_annotations.index.to_numpy()==int(n)] = True
        elif code == "i":
            # Intersection
            boxes = rem_annotations.loc[[int(n) for n in parameters]]
            new_box = boxes.iloc[0].copy()
            for c in ["Begin Time (s)", "Low Freq (Hz)", "Species Confidence"]:
                new_box[c] = boxes[c].max()
            for c in ["End Time (s)", "High Freq (Hz)"]:
                new_box[c] = boxes[c].min()
            new_box["Source"] = "Merged"
            new_boxes.append(new_box)
            for n in parameters:
                to_drop[rem_annotations.index.to_numpy()==int(n)] = True
        elif code == "u":
            # Union
            boxes = rem_annotations.loc[[int(n) for n in parameters]]
            new_box = boxes.iloc[0].copy()
            for c in ["End Time (s)", "High Freq (Hz)", "Species Confidence"]:
                new_box[c] = boxes[c].max()
            for c in ["Begin Time (s)", "Low Freq (Hz)"]:
                new_box[c] = boxes[c].min()
            new_box["Source"] = "Merged"
            new_boxes.append(new_box)
            for n in parameters:
                to_drop[rem_annotations.index.to_numpy()==int(n)] = True
        elif code == "m":
            # Mean
            boxes = rem_annotations.loc[[int(n) for n in parameters]]
            new_box = boxes.iloc[0].copy()
            for c in ["Begin Time (s)", "End Time (s)", "High Freq (Hz)", "Low Freq (Hz)"]:
                new_box[c] = boxes[c].mean()
            new_box["Species Confidence"] = boxes["Species Confidence"].max()
            new_box["Source"] = "Merged"
            new_boxes.append(new_box)
            for n in parameters:
                to_drop[rem_annotations.index.to_numpy()==int(n)] = True
        elif code == "c":
            # Create new box
            parameters = [float(p) for p in parameters]
            new_box = pd.Series({
                "Begin Time (s)": parameters[0],
                "End Time (s)": parameters[1],
                "High Freq (Hz)": parameters[2],
                "Low Freq (Hz)": parameters[3]})
            new_boxes.append(new_box)
        else:
            raise ValueError("Instruction not recognized")
    return ~to_drop, pd.DataFrame(new_boxes)

In [None]:
to_keep, new_boxes = execute_instructions(instructions, rem_annotations)

In [None]:
final_annotations = pd.concat([done_boxes, rem_annotations.loc[to_keep], new_boxes], ignore_index=True)
final_annotations = final_annotations.drop(["Source"], axis=1)

In [None]:
final_annotations.to_csv("../data/{}-final.txt".format(recording_name), index=False)