In [None]:
import librosa
import numpy as np
from librosa.display import specshow
import matplotlib
from matplotlib import pyplot as plt
import glob
import os
import datetime
import IPython.display
import pandas as pd
import collections
work_dir = '/beegfs/vl1019/waspaa2019_data/coni_knight'
paths = glob.glob(work_dir + "/*.wav")

%matplotlib inline

# Define PCEN settings.
settings = {
    "fmin": 2000,
    "fmax": 11025,
    "hop_length": 32,
    "n_fft": 1024,
    "n_mels": 128,
    "sr": 22050,
    "win_length": 256,
    "window": "flattop",
    "T": 1.0,
    "alpha": 1.0,
    "delta": 0.0,
    "r": 1.0,
    "eps": 1e-6}

plt.rcParams["font.family"] = "serif"

def parse_coni_knight(path):
    meta = {}
    name = os.path.split(path)[1]
    meta["file_name"] = name
    meta["distance"] = int(name.split("_")[0])
    meta['vocalization_id'] = int(name.split("_")[1])
    meta['nominal_distance'] = int(name.split("_")[2].split("-")[2])
    meta['datetime'] = datetime.datetime(int(name.split("_")[3][:4]), int(name.split("_")[3][4:6]), int(name.split("_")[3][7:]), int(name.split("_")[4][:2]), int(name.split("_")[4][2:4]))
    return meta

data = map(parse_coni_knight, paths)
df = pd.DataFrame(list(data))

voc_ids_counter = collections.Counter(df["vocalization_id"])
voc_ids = sorted(voc_ids_counter.keys())
voc_id = voc_ids[-3]

names_dict = {}
for voc_id in voc_ids:
    filtered_df = df[df["vocalization_id"] == voc_id]
    sorted_df = filtered_df.sort_values(by=['nominal_distance'])
    names = list(sorted_df["file_name"])
    names_dict[voc_id] = names


# Define PCEN settings.
settings = {
    "fmin": 2000,
    "fmax": 11025,
    "hop_length": 32,
    "n_fft": 1024,
    "n_mels": 128,
    "sr": 22050,
    "win_length": 256,
    "window": "flattop",
    "T": 1.0,
    "alpha": 1.0,
    "delta": 0.0,
    "r": 1.0,
    "eps": 1e-6}


voc_id = voc_ids[-5]
#vod_id = 3634

wav_paths = []
for path in names_dict[voc_id]:
    wav_path = work_dir + os.path.expanduser("/") + path
    wav_paths.append(wav_path)
    

subset = range(11)
#subset = [0, 1, 3, 7, 9, 10]
wav_paths = [wav_paths[i] for i in subset]

Es, PCENs = [], []
lms_ranges, pcen_ranges = [], []


for wav_path in wav_paths:
    # Load, resample, rescale, and pad waveform.
    waveform, sample_rate = librosa.load(wav_path, sr=None)
    waveform = librosa.resample(
        waveform, sample_rate, settings["sr"])
    
    waveform = waveform * (2**31)
    waveform = np.concatenate((waveform, waveform, waveform[::-1]))

    # Compute short-term Fourier transform (STFT)
    stft = librosa.stft(
        waveform,
        n_fft=settings["n_fft"],
        win_length=settings["win_length"],
        hop_length=settings["hop_length"],
        window=settings["window"])
    
    # Compute the squared complex modulus ("abs2") of the STFT.
    abs2_stft = (stft.real*stft.real) + (stft.imag*stft.imag)

    # Transform the STFT into a mel-frequency spectrogram.
    E = librosa.feature.melspectrogram(
        y=None,
        S=abs2_stft,
        sr=settings["sr"],
        n_fft=settings["n_fft"],
        n_mels=settings["n_mels"],
        htk=True,
        fmin=settings["fmin"],
        fmax=settings["fmax"])

    # Apply per-channel energy normalization (PCEN).
    PCEN = librosa.pcen(
        S=E,
        time_constant=settings["T"],
        sr=settings["sr"],
        hop_length=settings["hop_length"],
        gain=settings["alpha"],
        power=settings["r"],
        bias=settings["delta"],
        eps=settings["eps"])
    
    # Unpad and append to list.
    E = E[:,(E.shape[1]//3):]
    PCEN = PCEN[:,(PCEN.shape[1]//3):]
    Es.append(E)
    PCENs.append(PCEN)
    
    spectral_flux = np.sum(np.maximum(0, np.diff(np.log(E[:, 10:(E.shape[1]//2 - 1)]), axis=1)), axis=0)
    lms_range = np.max(spectral_flux) - np.min(spectral_flux)
    lms_ranges.append(lms_range)
    
    pcen_range = np.max(PCEN[:, 10:(PCEN.shape[1]//2 - 1)]) - np.min(PCEN[:, 10:(PCEN.shape[1]//2 - 1)])
    pcen_ranges.append(pcen_range)


plt.figure(figsize=(5, 15))

start_col = 5
max_length = max([(E.shape[1]//2) for E in Es])
logE_tensor = np.log1p(np.stack([E[:, start_col:max_length] for E in Es]))
logE_tensor = (logE_tensor - np.min(logE_tensor)) / (np.max(logE_tensor) - np.min(logE_tensor))

log1pPCEN_tensor = np.log1p(np.stack([PCEN[:, start_col:max_length] for PCEN in PCENs]))
log1pPCEN_tensor = (log1pPCEN_tensor - np.min(log1pPCEN_tensor)) / (np.max(log1pPCEN_tensor) - np.min(log1pPCEN_tensor))
cmap = "magma"

# Like in BirdVoxDetect, we trim the top 8 frequency bands to avoid the effect of the Nyquist anti-aliasing filter.

top_bin = 120 


lower_sf_bin = 15
upper_sf_bin = 90

for i in range(len(wav_paths)):

    # Load distance.
    dist_str = str(int(names_dict[voc_id][subset[i]].split("_")[2].split("-")[2])) + " m" + " " * 11
    
    # Display logmelspec on left column.
    plt.subplot(11, 2, 2*i+1)
    specshow(logE_tensor[i, :top_bin, :], cmap=cmap)
    argmax_row, argmax_col = np.unravel_index(
        np.argmax(np.diff(logE_tensor[i, lower_sf_bin:upper_sf_bin, start_col:])),
        logE_tensor[i, lower_sf_bin:upper_sf_bin, :].shape)
    plt.scatter([start_col+argmax_col], [lower_sf_bin+argmax_row], s=80, facecolors='white')
    plt.clim(0, 1)
    h = plt.ylabel(dist_str, rotation=0)
    if i == 0:
        plt.title("Pointwise logarithm")

    # Display PCEN.
    plt.subplot(11, 2, 2*i+2)
    specshow(log1pPCEN_tensor[i, :top_bin, :]**(0.5), cmap=cmap)
    argmax_row, argmax_col = np.unravel_index(
        np.argmax(log1pPCEN_tensor[i, lower_sf_bin:upper_sf_bin, start_col:]),
        log1pPCEN_tensor[i, lower_sf_bin:upper_sf_bin, start_col:].shape)
    plt.scatter([start_col+argmax_col], [lower_sf_bin+argmax_row], s=80, facecolors='white')
    plt.clim(0, 1)
    if i == 0:
        plt.title("PCEN")
        
        
plt.savefig("lostanlen_waspaa2019_coni-pcengrams.eps", bbox_inches="tight")
plt.savefig("lostanlen_waspaa2019_coni-pcengrams.png", bbox_inches="tight", dpi=1000)