# Difference Confusion Matrices

## 1. Loading Python packages

In [14]:
# When using Google Colab, some programs need to be installed every runtime:
%%capture
!pip install dcase_util

In [15]:
# Importing needed packages:
import numpy as np
import dcase_util
import pandas as pd
import seaborn as sns
import os
import torchaudio
import torch
import torch.nn.functional as F
from matplotlib import pyplot as plt
from IPython.display import Audio, display

## 2. Model 1

In [16]:
# Load the raw confusion matrix data of linear and mel spectrograms:
confusion_matrix_mel = np.load('.....model 1/results/1 MEL/1_MEL_confusion_matrix_raw.npy')
confusion_matrix_linear = np.load('.....model 1/results/1 LINEAR/1_LINEAR_confusion_matrix_raw.npy')

# Create the difference confusion matrix:
difference_matrix = confusion_matrix_mel - confusion_matrix_linear

In [17]:
# Load the ordered species file:
classes_dict = pd.read_csv('......./66_species_order.csv', delimiter='\t', header=None)
labels = classes_dict[0].tolist()

In [18]:
# Function to calculate the percentage:
def calculate_percentage(difference_matrix, matrix):
    percentages = np.round(difference_matrix / np.sum(matrix, axis=1, keepdims=True) * 100)
    return percentages

# Calculate the percentage of mistakes for each target species:
percentage_matrix = calculate_percentage(difference_matrix, confusion_matrix_mel)

# Define the percentage threshold:
percentage_threshold_max = 50
percentage_threshold_min = -50

# Calculate the total count of files per species:
total_files_per_species = np.sum(confusion_matrix_mel, axis=1)

# Create a figure:
fig, ax = plt.subplots(figsize=(24, 20), dpi=300)

# Filling the target species row based on percentage threshold:
for i in range(len(labels)):
    if np.any(percentage_matrix[i, i] >= percentage_threshold_max or percentage_matrix[i, i] <= percentage_threshold_min):
        ax.add_patch(plt.Rectangle((-0.015, i), len(labels), 1, fill=True, color='lightcoral'))

# Create the heatmap:
sns.heatmap(percentage_matrix, annot=True, ax=ax, fmt='g', cmap='magma', mask=percentage_matrix == 0, vmax=60, vmin=-60,
            xticklabels=labels, yticklabels=labels, linewidths=0.1, linecolor='lightgrey', cbar=False)

# Add borders around the diagonal:
for i in range(len(labels)):
    if difference_matrix[i, i] == 0:
        ax.add_patch(plt.Rectangle((i, i), 1, 1, fill=True, color='black'))

# Adding the outside border:
ax.add_patch(plt.Rectangle((0, 0), len(labels), len(labels), fill=False, edgecolor='black', lw=1))
ax.add_patch(plt.Rectangle((-0.05, -0.05), len(labels), len(labels), fill=False, edgecolor='black', lw=1))

# Adding a red border around rows containing percentage values above the threshold:
for i in range(len(labels)):
    if np.any(percentage_matrix[i, i] >= percentage_threshold_max or percentage_matrix[i, i] <= percentage_threshold_min):
        ax.add_patch(plt.Rectangle((-0.015, i), len(labels), 1, fill=False, edgecolor='red', lw=2))

# Adding the total count of files per species as a column to the right side:
for i, count in enumerate(total_files_per_species):
    ax.text(len(labels) + 1.5, i + 0.50, int(count), va='center', ha='left', fontsize=11, color='white',
            bbox=dict(boxstyle='square,pad=0.25', edgecolor=None, facecolor='black'))

# Adding a fully black column to the right:
for i in range(len(labels)):
    ax.add_patch(plt.Rectangle((len(labels) + 2, i), 0.5, 1, fill=True, color='black'))

# Adjust the plot to make space for the new column:
plt.subplots_adjust(right=0.85)
ax.set_xlim(0, len(labels) + 4)

# Adding vertical text to the right side:
plt.text(len(labels) + 3.9, len(labels) / 2, 'Support Per Species', rotation=90, va='center', ha='center', fontsize=18, color='black')

# Show the model:
ax.xaxis.set_label_position('bottom')
plt.xticks(rotation=90)
ax.set_ylabel('True', fontsize=18)
ax.set_xlabel('Predicted', fontsize=18)
plt.yticks(rotation=0)
plt.title('Difference Confusion Matrix Model 1', fontsize=20)
plt.show()

Output hidden; open in https://colab.research.google.com to view.

In [19]:
# Inspecting species of interest audio with spectrograms:

# Path to the directory containing audio files:
audio_dir = '........baseline 1/test'

# List all files in the directory:
files = os.listdir(audio_dir)

# Filter files that contain the species of interest in their name:
filtered_files = [f for f in files if 'Stenobothrusstigmaticus' in f] #('Stenobothrusstigmaticus', 'Atrapsaltacollina', 'Gomphocerippusrufus', 'Ephippigerdiurnus')

# Function to generate spectrogram and enable listening to the recording:
def generate_spectrogram(audio_path):
    # Generate the spectrogram:
    audioContainer = dcase_util.containers.AudioContainer().load(filename=audio_path, fs=44100)
    audio = audioContainer.data
    audio_torch = torch.tensor(audio)
    spec = torchaudio.transforms.Spectrogram(n_fft=2048, hop_length=round(2048/2), win_length=2048, power=2, normalized=True)(audio_torch)
    spec_db = torchaudio.transforms.AmplitudeToDB(top_db=80)(spec)
    spec_db = spec_db.unsqueeze(0).unsqueeze(0)
    spec_db = F.interpolate(spec_db, size=(128, spec_db.shape[3]), mode='bicubic', align_corners=False, antialias=True)
    logmel = spec_db.squeeze(0).squeeze(0)

    # Show the spectrogram:
    plt.figure(figsize=(10, 4))
    plt.imshow(logmel.numpy(), aspect='auto', origin='lower')
    plt.colorbar(format='%+2.0f dB')
    plt.yticks(np.arange(0, 128, 10))
    plt.title('Linear Spectrogram')
    plt.xlabel('Time Bins')
    plt.ylabel('Frequency Bins')
    plt.show()

    # Display the audio player:
    display(Audio(audio, rate=44100))

# Specify the amount of recording you would like to inspect:
number = 3
index = np.random.randint(0, len(filtered_files), number)

for idx in index:
    generate_spectrogram(os.path.join(audio_dir, filtered_files[idx]))

Output hidden; open in https://colab.research.google.com to view.

## 3. Model 2

In [20]:
# Load the raw confusion matrix data of linear and mel spectrograms:
confusion_matrix_mel = np.load('.....model 2/results/1 MEL/1_MEL_confusion_matrix_raw.npy')
confusion_matrix_linear = np.load('.....model 2/results/1 LINEAR/1_LINEAR_confusion_matrix_raw.npy')

# Create the difference confusion matrix:
difference_matrix = confusion_matrix_mel - confusion_matrix_linear

In [21]:
# Load the ordered species file:
classes_dict = pd.read_csv('.......66_species_order.csv', delimiter='\t', header=None)
labels = classes_dict[0].tolist()

In [22]:
# Function to calculate the percentage:
def calculate_percentage(difference_matrix, matrix):
    percentages = np.round(difference_matrix / np.sum(matrix, axis=1, keepdims=True) * 100)
    return percentages

# Calculate the percentage of mistakes for each target species:
percentage_matrix = calculate_percentage(difference_matrix, confusion_matrix_mel)

# Define the percentage threshold:
percentage_threshold_max = 50
percentage_threshold_min = -50

# Calculate the total count of files per species:
total_files_per_species = np.sum(confusion_matrix_mel, axis=1)

# Create a figure:
fig, ax = plt.subplots(figsize=(24, 20), dpi=300)

# Filling the target species row based on percentage threshold:
for i in range(len(labels)):
    if np.any(percentage_matrix[i, i] >= percentage_threshold_max or percentage_matrix[i, i] <= percentage_threshold_min):
        ax.add_patch(plt.Rectangle((-0.015, i), len(labels), 1, fill=True, color='lightcoral'))

# Create the heatmap:
sns.heatmap(percentage_matrix, annot=True, ax=ax, fmt='g', cmap='magma', mask=percentage_matrix == 0, vmax=60, vmin=-60,
            xticklabels=labels, yticklabels=labels, linewidths=0.1, linecolor='lightgrey', cbar=False)

# Add borders around the diagonal:
for i in range(len(labels)):
    if difference_matrix[i, i] == 0:
        ax.add_patch(plt.Rectangle((i, i), 1, 1, fill=True, color='black'))

# Adding the outside border:
ax.add_patch(plt.Rectangle((0, 0), len(labels), len(labels), fill=False, edgecolor='black', lw=1))
ax.add_patch(plt.Rectangle((-0.05, -0.05), len(labels), len(labels), fill=False, edgecolor='black', lw=1))

# Adding a red border around rows containing percentage values above the threshold:
for i in range(len(labels)):
    if np.any(percentage_matrix[i, i] >= percentage_threshold_max or percentage_matrix[i, i] <= percentage_threshold_min):
        ax.add_patch(plt.Rectangle((-0.015, i), len(labels), 1, fill=False, edgecolor='red', lw=2))

# Adding the total count of files per species as a column to the right side:
for i, count in enumerate(total_files_per_species):
    ax.text(len(labels) + 1.5, i + 0.50, int(count), va='center', ha='left', fontsize=11, color='white',
            bbox=dict(boxstyle='square,pad=0.25', edgecolor=None, facecolor='black'))

# Adding a fully black column to the right:
for i in range(len(labels)):
    ax.add_patch(plt.Rectangle((len(labels) + 2, i), 0.5, 1, fill=True, color='black'))

# Adjust the plot to make space for the new column:
plt.subplots_adjust(right=0.85)
ax.set_xlim(0, len(labels) + 4)

# Adding vertical text to the right side:
plt.text(len(labels) + 3.9, len(labels) / 2, 'Support Per Species', rotation=90, va='center', ha='center', fontsize=18, color='black')

# Show the model:
ax.xaxis.set_label_position('bottom')
plt.xticks(rotation=90)
ax.set_ylabel('True', fontsize=18)
ax.set_xlabel('Predicted', fontsize=18)
plt.yticks(rotation=0)
plt.title('Difference Confusion Matrix Model 2', fontsize=20)
plt.show()

Output hidden; open in https://colab.research.google.com to view.

In [23]:
# Inspecting species of interest audio with spectrograms:

# Path to the directory containing audio files:
audio_dir = '.........baseline 2/test'

# List all files in the directory:
files = os.listdir(audio_dir)

# Filter files that contain the species of interest in their name:
filtered_files = [f for f in files if 'Atrapsaltacollina' in f] #('Atrapsaltacollina', 'Cyclochilaaustralasiae', 'Yoyettacelis', 'Omocestusrufipes', 'Grylluscampestris', 'Eupholidopteraschmidti', 'Tettigoniaviridissima')

# Function to generate spectrogram and enable listening to the recording:
def generate_spectrogram(audio_path):
    # Generate the spectrogram:
    audioContainer = dcase_util.containers.AudioContainer().load(filename=audio_path, fs=44100)
    audio = audioContainer.data
    audio_torch = torch.tensor(audio)
    spec = torchaudio.transforms.Spectrogram(n_fft=1000, hop_length=int((44100*5)/1500), win_length=(int((44100*5)/1500)*2))(audio_torch)
    spec_db = torchaudio.transforms.AmplitudeToDB(top_db=80)(spec)
    spec_db = spec_db.unsqueeze(0).unsqueeze(0)
    spec_db = F.interpolate(spec_db, size=(64, spec_db.shape[3]), mode='bicubic', align_corners=False, antialias=True)
    logmel = spec_db.squeeze(0).squeeze(0)

    # Show the spectrogram:
    plt.figure(figsize=(10, 4))
    plt.imshow(logmel.numpy(), aspect='auto', origin='lower')
    plt.colorbar(format='%+2.0f dB')
    plt.yticks(np.arange(0, 64, 10))
    plt.title('Linear Spectrogram')
    plt.xlabel('Time Bins')
    plt.ylabel('Frequency Bins')
    plt.show()

    # Display the audio player:
    display(Audio(audio, rate=44100))

# Specify the amount of recording you would like to inspect:
number = 3
index = np.random.randint(0, len(filtered_files), number)

for idx in index:
    generate_spectrogram(os.path.join(audio_dir, filtered_files[idx]))

Output hidden; open in https://colab.research.google.com to view.