In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from birdCLEFloaddata import load_audiofile,load_metadata,get_melspectrogram
from birdCLEFFunctions import Dynamic_CNN, Dynamic_CNN2
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision.transforms import Resize
from torch.utils.data import DataLoader, TensorDataset
from efficientnet_pytorch import EfficientNet
from torchvision.transforms import Grayscale, ToPILImage
import torchvision
import soundfile as sf
import random
import math
import seaborn as sns
from sklearn.metrics import confusion_matrix, classification_report, roc_curve, auc

plt.rcParams['figure.dpi'] = 600

In [2]:
from google.colab import drive, files
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!unzip /content/drive/MyDrive/APP_ML/APPML-BirdCLEF/data2022/BirdCLEF2023.zip -d birdclef2023

In [None]:
import zipfile

In [None]:
import sys
sys.path.append('/content/drive/MyDrive/APP_ML/APPML-BirdCLEF/CodeEffecientnet')

In [None]:
def load_metadata(directory,datadir, trim=False):
    if trim:
        df = pd.read_csv(directory+'/train_metadata_trim.csv')
    else:
        df = pd.read_csv(directory+'/train_metadata.csv')
    df['filename'] = datadir+"/train_audio/"+df['filename']
    chosen_coloumns = ['latitude', 'longitude', 'common_name', 'rating', 'filename']
    return df[chosen_coloumns]

path = r"/content/birdclef2023"
meta_data = load_metadata(path,path,trim=True)

In [None]:
# Define the duration of each segment in seconds
segment_duration = 15
max_files_per_bird = 50

# Generate audio data
training_data = []
validation_data = []
birds_with_single_clip = []
random.seed(42)
# Iterate through each unique bird

for i, bird in enumerate(meta_data['common_name'].unique()):
    print(i+1, "/", len(meta_data['common_name'].unique()))
    # Get all audio clips for the bird
    bird_clips = meta_data[meta_data['common_name'] == bird]['filename'].tolist()
    # If the bird has only one clip, split it into two halves
    if len(bird_clips) == 1:
        birds_with_single_clip.append(bird)
    else:
        # Randomly select one clip for validation and the rest for training
        random.shuffle(bird_clips)
        training_clip = bird_clips[1:]
        validation_clip = bird_clips[0]

    # Load and process the training clip
    num_files_sampled = 0
    for clip in training_clip:
        if num_files_sampled >= max_files_per_bird:
            break
        train_audio, sr = load_audiofile(clip)
        num_segments = math.floor(len(train_audio) / (segment_duration * sr))
        if num_segments == 0:
          start_time = 0
          end_time = segment_duration
          pad_size = (segment_duration * sr) - train_audio.shape[0]
          train_audio = np.pad(train_audio, (0,pad_size), mode='wrap')
          segment_audio = train_audio[start_time * sr:end_time * sr]
          training_data.append([get_melspectrogram(segment_audio), bird])
        else:
          for segment in range(num_segments):
            start_time = segment * segment_duration
            end_time = start_time + segment_duration
            segment_audio = train_audio[start_time * sr:end_time * sr]
            training_data.append([get_melspectrogram(segment_audio), bird])
        num_files_sampled += 1

    # Load and process the validation clip
    validation_audio, sr = load_audiofile(validation_clip)
    num_segments = math.floor(len(validation_audio) / (segment_duration * sr))
    if num_segments == 0:
      start_time = 0
      end_time = segment_duration
      pad_size = (segment_duration * sr) - validation_audio.shape[0]
      validation_audio = np.pad(validation_audio, (0,pad_size), mode='wrap')
      segment_audio = validation_audio[start_time * sr:end_time * sr]
      validation_data.append([get_melspectrogram(segment_audio), bird])
    else:
      for segment in range(num_segments):
        start_time = segment * segment_duration
        end_time = start_time + segment_duration
        segment_audio = validation_audio[start_time * sr:end_time * sr]
        validation_data.append([get_melspectrogram(segment_audio), bird])

# Split the single clips into training and validation
random.shuffle(birds_with_single_clip)
split_index = len(birds_with_single_clip) // 2
training_single_clips = birds_with_single_clip[split_index:]
validation_single_clips = birds_with_single_clip[:split_index]

# Append the single clips to the training and validation data
for bird in birds_with_single_clip:
    if len(training_data) >= max_files_per_bird:
        break
    clip = meta_data[meta_data['common_name'] == bird]['filename'].tolist()[0]
    audio, sr = load_audiofile(clip)
    num_segments = math.floor(len(audio) / (segment_duration * sr))
    if num_segments == 0:
      start_time = 0
      end_time = segment_duration
      pad_size = (segment_duration * sr) - validation_audio.shape[0]
      validation_audio = np.pad(validation_audio, (0,pad_size), mode='wrap')
      segment_audio = validation_audio[start_time * sr:end_time * sr]
      validation_data.append([get_melspectrogram(segment_audio), bird])
      training_data.append([get_melspectrogram(segment_audio), bird])
    else:
      for segment in range(num_segments):
        start_time = segment * segment_duration
        end_time = start_time + segment_duration
        segment_audio = audio[start_time * sr:end_time * sr]
        validation_data.append([get_melspectrogram(segment_audio), bird])
        training_data.append([get_melspectrogram(segment_audio), bird])

# Convert to numpy arrays
training_data = np.asarray(training_data)
validation_data = np.asarray(validation_data)

# Map labels to indices
label_mapping = {label: index for index, label in enumerate(set(meta_data['common_name'].unique()))}
training_data[:, 1] = [label_mapping.get(label, -1) + 1 for label in training_data[:, 1]]
validation_data[:, 1] = [label_mapping.get(label, -1) + 1 for label in validation_data[:, 1]]

# Clear temporary data and variables
birds_with_single_clip = None
training_single_clips = None
validation_single_clips = None

In [None]:
val_probabilities = np.read('/content/drive/MyDrive/APP_ML/APPML-BirdCLEF/plot_data/val_probabilities')
val_true_labels = np.read('/content/drive/MyDrive/APP_ML/APPML-BirdCLEF/plot_data/val_true_labels')

In [None]:
val_predictions = np.argmax(val_probabilities, axis=1)
# Compute and print the confusion matrix
cm = confusion_matrix(val_true_labels, val_predictions)
print("Confusion Matrix:")
print(cm)

unique_classes = np.unique(validation_data[:, 1])
target_names = np.unique(validation_data[:,1])

# Compute ROC curves and AUC for each class
fpr = dict()
tpr = dict()
roc_auc = dict()

for class_id in unique_classes:
    # Create a binary target for the current class
    binary_labels = np.where(val_true_labels == class_id, 1, 0)

    # Calculate the ROC curve and AUC for the current class
    fpr[class_id], tpr[class_id], _ = roc_curve(binary_labels, val_probabilities[:, class_id])
    roc_auc[class_id] = auc(fpr[class_id], tpr[class_id])

# Aggregate TPR and FPR for all classes
all_tpr = np.concatenate([tpr[class_id] for class_id in unique_classes])
mean_fpr = np.linspace(0, 1, 100)

# Compute the mean TPR by interpolating at the mean FPR
mean_tpr = np.zeros_like(mean_fpr)
for class_id in unique_classes:
    mean_tpr += np.interp(mean_fpr, fpr[class_id], tpr[class_id])

mean_tpr /= len(unique_classes)
mean_auc = auc(mean_fpr, mean_tpr)

# Plot the aggregated ROC curve
plt.figure()
plt.plot(mean_fpr, mean_tpr, lw=2, label='Aggregate ROC curve (AUC = %0.2f)' % mean_auc)
plt.plot([0, 1], [0, 1], 'k--', lw=2)
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve (Aggregated)')
plt.legend(loc="lower right")
plt.show()

# Plot the confusion matrix
plt.figure()
ax = sns.heatmap(cm, annot=False, fmt="d", cmap="YlGnBu")
ax.set_title('Confusion Matrix')
tick_frequency = 10
x_tick_locs = np.arange(len(unique_classes))
x_tick_labels = unique_classes
plt.xticks(ticks=x_tick_locs[::tick_frequency], labels=x_tick_labels[::tick_frequency], fontsize=8, rotation=90)
y_tick_locs = np.arange(len(unique_classes))
y_tick_labels = unique_classes
plt.yticks(ticks=y_tick_locs[::tick_frequency], labels=y_tick_labels[::tick_frequency], fontsize=8)

plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.show()