In [None]:
from interpretability_utilities import plot_frame_attributions, plot_audio_attributions
from interpretability_utilities import load_workspace_file, zero_crossing_rate

import pandas as pd
import pickle

import numpy as np
import librosa
import librosa.display
from scipy.stats import norm

import mlflow
import torch

import matplotlib.pyplot as plt
import seaborn as sns

from captum.attr import Deconvolution, GuidedBackprop, NeuronDeconvolution, NeuronGuidedBackprop

## Settings and Utils

In [None]:
# Adjust according to your experiment
ref_fold = "8"
run_id = ""
tracking_server = ""
workspace_file = ""
dataset_dir = ""
device = 'cuda' if torch.cuda.is_available() else 'cpu'

mlflow.set_tracking_uri(f"{tracking_server}:5000")
logged_model = mlflow.pytorch.load_model(f"runs:/{run_id}/models")
logged_model = logged_model.eval()


client = mlflow.MlflowClient()
run = client.get_run(run_id)
run_data = run.data
tags = run_data.tags

# Feature visualization
sr = int(tags["sample_rate"])
window_size = int(tags["window_size"])
hop_size = int(tags["hop_size"])
cur_window = 0

zcr_audios = {"avgZcr": [], "label": [], "Período (ms)": []}

In [None]:
inp_data, indexes, labels, _, lb_to_idx, _ = load_workspace_file(workspace_file, ref_fold,
                                dataset_dir, device)

idx_to_label = {idx: label for label, idx in lb_to_idx.items()}
target = [idx for label, idx in lb_to_idx.items() if label.startswith("albilora")]

# check inputs attribution
str_labels = [idx_to_label[int(i.cpu().detach().numpy())] for i in np.argmax(labels, axis=1)]

inp_data.requires_grad_()

In [None]:
def layer_attribution(model, layer, layer_name, inp_data, neuron, algorithm="deconv", verbose=False):
    channels = neuron[0]
    time_steps = neuron[1]
    mel_bins = neuron[2]
    
    tot = 0
    iterations = channels * time_steps * mel_bins
    
    if algorithm == "deconv":
        layer_deconv = NeuronDeconvolution(model, layer)
    elif algorithm == "guided":
        layer_deconv = NeuronGuidedBackprop(model, layer)
    else:
        raise ValueError(f"Incorrect algorithm {algorithm}. Expected 'deconv' or 'guided'")
        
    out_dict = {"input": [], "layer": [], "channel": [], "time_steps": [], "mel_bins": [], "layer_avg_attr": []}
    data_size = inp_data.size()[0]
    for channel in range(channels):
        for time_step in range(time_steps):
            for mel_bin in range(mel_bins):
                conv1_neuron_attr = layer_deconv.attribute(inp_data, (channel, time_step, mel_bin))
                out_dict["layer_avg_attr"].extend(torch.mean(conv1_neuron_attr, dim=1).cpu().detach().numpy().tolist())
                
                if device == 'cuda':
                    del conv1_neuron_attr
                    torch.cuda.empty_cache()
                    
                out_dict["input"].extend([i for i in range(data_size)])
                out_dict["layer"].extend([layer_name]*data_size)
                out_dict["channel"].extend([channel]*data_size)
                out_dict["time_steps"].extend([time_step]*data_size)
                out_dict["mel_bins"].extend([mel_bin]*data_size)
                if verbose:
                    tot += 1
                    print(f"Progress: {tot}/{iterations}-----------{100*tot/iterations:.2f}%", end="\r")
                                                
    return out_dict

In [None]:
# Compute ZCR to possible percussive audios
# https://github.com/tyiannak/pyAudioAnalysis
def zero_crossing_rate(frame):
    count = len(frame)
    count_zero = np.sum(np.abs(np.diff(np.sign(frame)))) / 2
    return np.float64(count_zero) / np.float64(count - 1.0)

In [None]:
rng = np.random.default_rng(135)

In [None]:
# Tentative to centralize as cubic root of 360 as 7.11
samples = 360 # Approximately
channels = rng.integers(64)
time_steps = rng.integers(np.floor(samples/channels))
mel_bins = int(np.floor(samples / (channels * time_steps)))
print(channels * time_steps * mel_bins, channels, time_steps, mel_bins)

## Zeiler and Fergus (2014)- Deconvolution

### Model attribution

In [None]:
deconv = Deconvolution(logged_model)

General attribution

In [None]:
gen_attribution = deconv.attribute(inp_data, target=np.argmax(labels, axis=1))

In [None]:
plot_frame_attributions(gen_attribution, title="Atribuição média do modelo para cada frame do conjunto de validação usando Deconvolution")

In [None]:
plot_audio_attributions(gen_attribution, "Atribuição média do modelo para cada áudio do conjunto de validação usando Deconvolution")

In [None]:
fig, ax = plt.subplots(figsize=(10, 6))

ax.bar(np.arange(gen_attribution.size()[1]), np.mean(gen_attribution.cpu().detach().numpy(), axis=0))

ax.set_xlabel("Tempo [s]")
ax.set_ylabel("Atribuição")

ax.set_xticks(np.arange(0, gen_attribution.size()[1], int(sr/10)))
ax.set_xticklabels(np.arange(0, 0.75, 0.10, dtype=np.float32))

ax.set_title("Atribuição média do modelo para cada frame do conjunto de validação usando Deconvolution")

fig.show()

In [None]:
temp_df = pd.DataFrame({"Espécies": str_labels,
    "Atribuição": np.mean(gen_attribution.cpu().detach().numpy(), axis=1)})

sns.stripplot(x="Atribuição", y="Espécies", data=temp_df, palette="deep", hue="Espécies", legend=False)
del temp_df

### Neuron attribution

- Neuron's indices: (0..63, 0..37, 0..31) - (channels, time_steps or num_frames, mel_bins), i.e, Neuron's output dimension
- channels always doubling
- num_frames = 1+ceil(len_y / hop_length) if center is True
- else 1 + ceil(len_y - n_fft) / hop_length where len_y is the length of the audio

First Convolutional Block

In [None]:
neuron_deconv_conv1 = NeuronDeconvolution(logged_model, logged_model.base.conv_block1)

In [None]:
neuron_ca_attributions = neuron_deconv_conv1.attribute(inp_data, (0, 37, 31))

In [None]:
plot_frame_attributions(neuron_ca_attributions, title="Average Frames importance for a Neuron on 1st Conv Block")

Again for the same block

In [None]:
neuron_ca_attributions_2 = neuron_deconv_conv1.attribute(inp_data, (33, 15, 12))

In [None]:
plot_frame_attributions(neuron_ca_attributions_2, title="Average Frames importance for a Neuron on 1st Conv Block")

### Layer attribution

It takes too much time running the algorithm for all 77824 neurons. I will define a bootrasp distribution from some neurons to speed up processing and minimize biases on analysis

#### First block

In [None]:
%%time
block_attr_block_1_deconv_meta = layer_attribution(
    logged_model,
    logged_model.base.conv_block1,
    "Conv 1 block",
    inp_data,
    (channels, time_steps, mel_bins), verbose=True
)

In [None]:
df = pd.DataFrame(block_attr_block_1_deconv_meta)
df.to_csv("data/block_attr_block_1_deconv_meta.csv", index=None)
uniques = df["input"].unique()

attr_mean_bootstrap = {"input": [], "attribution": []}
for i in range(10000):
    attr_mean_bootstrap["attribution"].extend(rng.choice(df["layer_avg_attr"].values, size=len(uniques)))
    

for i in range(10000):
    attr_mean_bootstrap["input"].extend([j for j in range(len(uniques))])

attr_mean_bootstrap = pd.DataFrame(attr_mean_bootstrap)
std_error = np.std(attr_mean_bootstrap["attribution"], ddof=1)
pop_std_error = std_error * np.sqrt(len(df))
print(std_error, pop_std_error)

In [None]:
plt.figure(figsize=(10, 6))
input_attr = attr_mean_bootstrap.groupby("input")["attribution"].mean()
plt.plot(np.arange(len(uniques)), input_attr);

90% confidence interval

In [None]:
point_estimate = np.mean(attr_mean_bootstrap["attribution"])
lower = norm.ppf(0.05, loc=point_estimate, scale=std_error)
upper = norm.ppf(0.95, loc=point_estimate, scale=std_error)
print(lower, upper)

In [None]:
attr_mean_bootstrap.to_csv("data/block_bootstrap_attr_block_1_deconv_meta.csv", index=None)

#### Second block

In [None]:
%%time
block_attr_block_2_deconv_meta = layer_attribution(
    logged_model,
    logged_model.base.conv_block2,
    "Conv 2 block",
    inp_data,
    (channels, time_steps, mel_bins), verbose=True
)

In [None]:
df = pd.DataFrame(block_attr_block_2_deconv_meta)
df.to_csv("data/block_attr_block_2_deconv_meta.csv", index=None)
uniques = df["input"].unique()

attr_mean_bootstrap = {"input": [], "attribution": []}
for i in range(10000):
    attr_mean_bootstrap["attribution"].extend(rng.choice(df["layer_avg_attr"].values, size=len(uniques)))
    

for i in range(10000):
    attr_mean_bootstrap["input"].extend([j for j in range(len(uniques))])

attr_mean_bootstrap = pd.DataFrame(attr_mean_bootstrap)
std_error = np.std(attr_mean_bootstrap["attribution"], ddof=1)
pop_std_error = std_error * np.sqrt(len(df))
print(std_error, pop_std_error)

In [None]:
plt.figure(figsize=(10, 6))
input_attr = attr_mean_bootstrap.groupby("input")["attribution"].mean()
plt.plot(np.arange(len(uniques)), input_attr);

90% confidence interval

In [None]:
point_estimate = np.mean(attr_mean_bootstrap["attribution"])
lower = norm.ppf(0.05, loc=point_estimate, scale=std_error)
upper = norm.ppf(0.95, loc=point_estimate, scale=std_error)
print(lower, upper)

In [None]:
attr_mean_bootstrap.to_csv("data/block_bootstrap_attr_block_2_deconv_meta.csv", index=None)

#### 5th block

In [None]:
time_steps_5th_layer = 2
mel_bins_5th_layer = 2
channels_5th_layer = 90

In [None]:
%%time
block_attr_block_5_deconv_meta = layer_attribution(
    logged_model,
    logged_model.base.conv_block5,
    "Conv 5 block",
    inp_data,
    (channels_5th_layer, time_steps_5th_layer, mel_bins_5th_layer),
    verbose=True
)

In [None]:
df = pd.DataFrame(block_attr_block_5_deconv_meta)
df.to_csv("data/block_attr_block_5_deconv_meta.csv", index=None)
uniques = df["input"].unique()

attr_mean_bootstrap = {"input": [], "attribution": []}
for i in range(10000):
    attr_mean_bootstrap["attribution"].extend(rng.choice(df["layer_avg_attr"].values, size=len(uniques)))
    

for i in range(10000):
    attr_mean_bootstrap["input"].extend([j for j in range(len(uniques))])

attr_mean_bootstrap = pd.DataFrame(attr_mean_bootstrap)
std_error = np.std(attr_mean_bootstrap["attribution"], ddof=1)
pop_std_error = std_error * np.sqrt(len(df))
print(std_error, pop_std_error)

In [None]:
plt.figure(figsize=(10, 6))
input_attr = attr_mean_bootstrap.groupby("input")["attribution"].mean()
plt.plot(np.arange(len(uniques)), input_attr);

90% confidence interval

In [None]:
point_estimate = np.mean(attr_mean_bootstrap["attribution"])
lower = norm.ppf(0.05, loc=point_estimate, scale=std_error)
upper = norm.ppf(0.95, loc=point_estimate, scale=std_error)
print(lower, upper)

In [None]:
attr_mean_bootstrap.to_csv("data/block_bootstrap_attr_block_5_deconv_meta.csv", index=None)

## Springebenberg et al. 2015- Guided Backpropagation

### Model attribution

In [None]:
guided_backprop = GuidedBackprop(logged_model)

In [None]:
gen_attribution_guided = guided_backprop.attribute(inp_data, target=np.argmax(labels, axis=1))

In [None]:
plot_frame_attributions(gen_attribution_guided, title="Atribuição média do modelo para cada frame do conjunto de validação usando Guided BackPropagation")

In [None]:
plot_audio_attributions(gen_attribution_guided, "Atribuição média do modelo para cada áudio do conjunto de validação usando Guided BackPropagation")

In [None]:
fig, ax = plt.subplots(figsize=(10, 6))

ax.bar(np.arange(gen_attribution_guided.size()[1]), np.mean(gen_attribution_guided.cpu().detach().numpy(), axis=0))

ax.set_xlabel("Tempo [s]")
ax.set_ylabel("Atribuição")

ax.set_xticks(np.arange(0, gen_attribution_guided.size()[1], int(sr/10)))
ax.set_xticklabels(np.arange(0, 0.75, 0.10, dtype=np.float32))

ax.set_title("Atribuição média do modelo para cada frame do conjunto de validação usando Guided BackPropagation")

fig.show()

In [None]:
temp_df = pd.DataFrame({"Espécies": str_labels,
    "Atribuição": np.mean(gen_attribution_guided.cpu().detach().numpy(), axis=1)})

sns.stripplot(x="Atribuição", y="Espécies", data=temp_df, palette="deep", hue="Espécies", legend=False)
del temp_df

### Layer attribution

#### First block

In [None]:
%%time
block_attr_block_1_guided_meta = layer_attribution(
    logged_model,
    logged_model.base.conv_block1,
    "Conv 1 block",
    inp_data,
    (channels, time_steps, mel_bins),
    "guided",
    verbose=True
)

In [None]:
df = pd.DataFrame(block_attr_block_1_guided_meta)
df.to_csv("data/block_attr_block_1_guided_meta.csv", index=None)
uniques = df["input"].unique()

attr_mean_bootstrap = {"input": [], "attribution": []}
for i in range(10000):
    attr_mean_bootstrap["attribution"].extend(rng.choice(df["layer_avg_attr"].values, size=len(uniques)))
    
for i in range(10000):
    attr_mean_bootstrap["input"].extend([j for j in range(len(uniques))])

attr_mean_bootstrap = pd.DataFrame(attr_mean_bootstrap)
std_error = np.std(attr_mean_bootstrap["attribution"], ddof=1)
pop_std_error = std_error * np.sqrt(len(df))
print(std_error, pop_std_error)

In [None]:
plt.figure(figsize=(10, 6))
input_attr = attr_mean_bootstrap.groupby("input")["attribution"].mean()
plt.plot(np.arange(len(uniques)), input_attr);

90% confidence interval

In [None]:
point_estimate = np.mean(attr_mean_bootstrap["attribution"])
lower = norm.ppf(0.05, loc=point_estimate, scale=std_error)
upper = norm.ppf(0.95, loc=point_estimate, scale=std_error)
print(lower, upper)

In [None]:
attr_mean_bootstrap.to_csv("data/block_bootstrap_attr_block_1_guided_meta.csv", index=None)

#### Second block

In [None]:
%%time
block_attr_block_2_guided_meta = layer_attribution(
    logged_model,
    logged_model.base.conv_block2,
    "Conv 2 block",
    inp_data,
    (channels, time_steps, mel_bins),
    "guided",
    verbose=True
)

In [None]:
df = pd.DataFrame(block_attr_block_2_guided_meta)
df.to_csv("data/block_attr_block_2_guided_meta.csv", index=None)
uniques = df["input"].unique()

attr_mean_bootstrap = {"input": [], "attribution": []}
for i in range(10000):
    attr_mean_bootstrap["attribution"].extend(rng.choice(df["layer_avg_attr"].values, size=len(uniques)))
    
for i in range(10000):
    attr_mean_bootstrap["input"].extend([j for j in range(len(uniques))])

attr_mean_bootstrap = pd.DataFrame(attr_mean_bootstrap)
std_error = np.std(attr_mean_bootstrap["attribution"], ddof=1)
pop_std_error = std_error * np.sqrt(len(df))
print(std_error, pop_std_error)

In [None]:
plt.figure(figsize=(10, 6))
input_attr = attr_mean_bootstrap.groupby("input")["attribution"].mean()
plt.plot(np.arange(len(uniques)), input_attr);

90% confidence interval

In [None]:
point_estimate = np.mean(attr_mean_bootstrap["attribution"])
lower = norm.ppf(0.05, loc=point_estimate, scale=std_error)
upper = norm.ppf(0.95, loc=point_estimate, scale=std_error)
print(lower, upper)

In [None]:
attr_mean_bootstrap.to_csv("data/block_bootstrap_attr_block_2_guided_meta.csv", index=None)

#### 5th block

In [None]:
time_steps_5th_layer = 2
mel_bins_5th_layer = 2
channels_5th_layer = 90

In [None]:
%%time
block_attr_block_5_guided_meta = layer_attribution(
    logged_model,
    logged_model.base.conv_block5,
    "Conv 5 block",
    inp_data,
    (channels_5th_layer, time_steps_5th_layer, mel_bins_5th_layer),
    "guided",
    verbose=True
)

In [None]:
df = pd.DataFrame(block_attr_block_5_guided_meta)
df.to_csv("data/block_attr_block_5_guided_meta.csv", index=None)
uniques = df["input"].unique()

attr_mean_bootstrap = {"input": [], "attribution": []}
for i in range(10000):
    attr_mean_bootstrap["attribution"].extend(rng.choice(df["layer_avg_attr"].values, size=len(uniques)))
    
for i in range(10000):
    attr_mean_bootstrap["input"].extend([j for j in range(len(uniques))])

attr_mean_bootstrap = pd.DataFrame(attr_mean_bootstrap)
std_error = np.std(attr_mean_bootstrap["attribution"], ddof=1)
pop_std_error = std_error * np.sqrt(len(df))
print(std_error, pop_std_error)

In [None]:
plt.figure(figsize=(10, 6))
input_attr = attr_mean_bootstrap.groupby("input")["attribution"].mean()
plt.plot(np.arange(len(uniques)), input_attr);

90% confidence interval

In [None]:
point_estimate = np.mean(attr_mean_bootstrap["attribution"])
lower = norm.ppf(0.05, loc=point_estimate, scale=std_error)
upper = norm.ppf(0.95, loc=point_estimate, scale=std_error)
print(lower, upper)

In [None]:
attr_mean_bootstrap.to_csv("data/block_bootstrap_attr_block_5_guided_meta.csv", index=None)

## Feature Visualizations

In [None]:
deconv_1st_block = pd.read_csv("data/block_bootstrap_attr_block_1_deconv_meta.csv")
deconv_2nd_block = pd.read_csv("data/block_bootstrap_attr_block_2_deconv_meta.csv")
deconv_5th_block = pd.read_csv("data/block_bootstrap_attr_block_5_deconv_meta.csv")

guided_1st_block = pd.read_csv("data/block_bootstrap_attr_block_1_guided_meta.csv")
guided_2nd_block = pd.read_csv("data/block_bootstrap_attr_block_2_guided_meta.csv")
guided_5th_block = pd.read_csv("data/block_bootstrap_attr_block_5_guided_meta.csv")


1st block

In [None]:
_, axis = plt.subplots()

grouped_1st_deconv_block = deconv_1st_block.groupby("input")

sns.stripplot(x=grouped_1st_deconv_block["attribution"].mean(), y=str_labels,
        palette="deep", hue=str_labels, ax=axis, legend=False)

axis.set_xlabel("Atribuição")
axis.set_ylabel("Espécies")
plt.show()

_, axis = plt.subplots()

grouped_1st_guided_block = guided_1st_block.groupby("input")

sns.stripplot(x=grouped_1st_guided_block["attribution"].mean(), y=str_labels,
        palette="deep", hue=str_labels, ax=axis, legend=False)

axis.set_xlabel("Atribuição")
axis.set_ylabel("Espécies")
plt.show()

2nd block

In [None]:
_, axis = plt.subplots()

grouped_2nd_deconv_block = deconv_2nd_block.groupby("input")

sns.stripplot(x=grouped_2nd_deconv_block["attribution"].mean(), y=str_labels,
        palette="deep", hue=str_labels, ax=axis, legend=False)

axis.set_xlabel("Atribuição")
axis.set_ylabel("Espécies")
plt.show()

_, axis = plt.subplots()

grouped_2nd_guided_block = guided_2nd_block.groupby("input")

sns.stripplot(x=grouped_2nd_guided_block["attribution"].mean(), y=str_labels,
        palette="deep", hue=str_labels, ax=axis, legend=False)

axis.set_xlabel("Atribuição")
axis.set_ylabel("Espécies")
plt.show()

5th block

In [None]:
_, axis = plt.subplots()

grouped_5th_deconv_block = deconv_5th_block.groupby("input")

sns.stripplot(x=grouped_5th_deconv_block["attribution"].mean(), y=str_labels,
        palette="deep", hue=str_labels, ax=axis, legend=False)

axis.set_xlabel("Atribuição")
axis.set_ylabel("Espécies")
plt.show()

_, axis = plt.subplots()

grouped_5th_guided_block = guided_5th_block.groupby("input")

sns.stripplot(x=grouped_5th_guided_block["attribution"].mean(), y=str_labels,
        palette="deep", hue=str_labels, ax=axis, legend=False)

axis.set_xlabel("Atribuição")
axis.set_ylabel("Espécies")
plt.show()

Largest attributions

In [None]:
largest_attr_1st_block_deconv = deconv_1st_block.groupby("input")["attribution"]\
    .sum().nlargest(3)

largest_attr_2nd_block_deconv = deconv_2nd_block.groupby("input")["attribution"]\
    .sum().nlargest(3)

# largest mean scale (1e-8)
largest_attr_5th_block_deconv = deconv_5th_block.groupby("input")["attribution"]\
    .sum().nlargest(4)

print(largest_attr_1st_block_deconv, largest_attr_2nd_block_deconv, largest_attr_5th_block_deconv)

In [None]:
largest_attr_1st_block_guided = guided_1st_block.groupby("input")["attribution"]\
    .sum().nlargest(3)

largest_attr_2nd_block_guided = guided_2nd_block.groupby("input")["attribution"]\
    .sum().nlargest(3)

# greatest number of impactul attributions
largest_attr_5th_block_guided = guided_5th_block.groupby("input")["attribution"]\
    .sum().nlargest(4)

print(largest_attr_1st_block_guided, largest_attr_2nd_block_guided, largest_attr_5th_block_guided)

In [None]:
unique_inputs = largest_attr_1st_block_deconv.index.union(
    largest_attr_2nd_block_deconv.index
    ).union(
        largest_attr_5th_block_deconv.index
        ).union(
            largest_attr_1st_block_guided.index
        ).union(
            largest_attr_2nd_block_guided.index
        ).union(
            largest_attr_5th_block_guided.index
        )

unique_inputs, len(unique_inputs)

Iterate over each audio considering the parameters for spectrogram generation

In [None]:
start = 0
num_frame = 1
num_audios = len(inp_data)

while cur_window + window_size - 1 < inp_data.shape[1]:

    for i in range(num_audios):
        frame = inp_data[i][cur_window:cur_window+window_size]

        frame_zcr = zero_crossing_rate(frame.cpu().detach().numpy())
        zcr_audios["avgZcr"].append(np.mean(frame_zcr))
        zcr_audios["label"].append(idx_to_label[np.argmax(labels.cpu().detach().numpy())])
        zcr_audios["Período (ms)"].append(num_frame)

    num_frame += 1
    cur_window += hop_size

In [None]:
zcr_audios = pd.DataFrame(zcr_audios)
zcr_audios

In [None]:
_, ax = plt.subplots()
sns.histplot(x="avgZcr", data=zcr_audios, element="step", fill=False, ax=ax);

In [None]:
first_18_frames_zcr_audios = zcr_audios[zcr_audios["Período (ms)"] <= 18]
frames_18_36_zcr_audios = zcr_audios[(zcr_audios["Período (ms)"] > 18) & (zcr_audios["Período (ms)"] <= 36)]
frames_36_54_zcr_audios = zcr_audios[(zcr_audios["Período (ms)"] > 36) & (zcr_audios["Período (ms)"] <= 54)]
frames_54_above_zcr_audios = zcr_audios[zcr_audios["Período (ms)"] > 54]

In [None]:
_, ax = plt.subplots()
sns.histplot(x="avgZcr", data=first_18_frames_zcr_audios, hue="Período (ms)", element="step", fill=False, ax=ax);
ax.set_xlabel("Taxa média de cruzamento de zero")
ax.set_ylabel("Quantidade");

In [None]:
_, ax = plt.subplots()
sns.histplot(x="avgZcr", data=frames_18_36_zcr_audios, hue="Período (ms)", element="step", fill=False, ax=ax);
ax.set_xlabel("Taxa média de cruzamento de zero")
ax.set_ylabel("Quantidade");

In [None]:
_, ax = plt.subplots()
sns.histplot(x="avgZcr", data=frames_36_54_zcr_audios, hue="Período (ms)", element="step", fill=False, ax=ax);
ax.set_xlabel("Taxa média de cruzamento de zero")
ax.set_ylabel("Quantidade");

In [None]:
_, ax = plt.subplots()
sns.histplot(x="avgZcr", data=frames_54_above_zcr_audios, hue="Período (ms)", element="step", fill=False, ax=ax);
ax.set_xlabel("Taxa média de cruzamento de zero")
ax.set_ylabel("Quantidade");

Check spectrograms with largest attribution

In [None]:
axes = []

for i, audio_ind in enumerate(unique_inputs):
    _, axis = plt.subplots(figsize=(6, 3))
    axes.append(axis)

    librosa.display.specshow(np.abs(
        librosa.stft(inp_data[audio_ind].cpu().detach().numpy(),
            n_fft=window_size, win_length=window_size, hop_length=hop_size, center=True)
        ),
        sr=sr, x_axis="time", y_axis="linear", hop_length=hop_size,
        fmin=int(tags["fmin"]), fmax=int(tags["fmax"]), ax=axes[i]
    )
    label = idx_to_label[np.argmax(labels[audio_ind].cpu().detach().numpy())]

    axes[i].set_title(f"Espectrograma- {'OtherBirds' if label == 'others' else label}",
                    {'fontsize': 11})

    axes[i].set_xlabel("Tempo [s]")
    
plt.show()