In [None]:
%rm -r /kaggle/working/*

In [None]:
%pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118

In [None]:
%pip install noisereduce numpy tqdm librosa optuna timm scikit-metrics pandas pillow

In [None]:
# This create a small subset for testing

import os
import shutil
import sys

def copy_files(src_root, dest_root):
    # Create the destination root directory if it doesn't exist
    os.makedirs(dest_root, exist_ok=True)

    # Get the first 10 subdirectories
    subdirs = [os.path.join(src_root, d) for d in os.listdir(src_root) if os.path.isdir(os.path.join(src_root, d))]
    subdirs = subdirs[:10]

    for subdir in subdirs:
        # Get the first 5 files in the current subdirectory
        files = [os.path.join(subdir, f) for f in os.listdir(subdir) if os.path.isfile(os.path.join(subdir, f))]
        files = files[:5]

        # Create the corresponding subdirectory in the destination
        dest_subdir = os.path.join(dest_root, os.path.basename(subdir))
        os.makedirs(dest_subdir, exist_ok=True)

        for file in files:
            shutil.copy(file, dest_subdir)
    
    print("Copying completed.")

src_root = '/kaggle/input/birdclef-2024/train_audio'
dest_root = '/kaggle/working/train_audio_subset'

copy_files(src_root, dest_root)


In [None]:
import multiprocessing
import os
import time
from pathlib import Path
from io import BytesIO
from PIL import Image
import pandas as pd

import librosa
import matplotlib.pyplot as plt
import noisereduce as nr
import numpy as np
from tqdm import tqdm

import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms, datasets
from torch.utils.data import DataLoader, random_split
import timm
import optuna
from sklearn.metrics import roc_auc_score

_input_folder = "/kaggle/input/birdclef-2024/train_audio"
_input_folder = '/kaggle/working/train_audio_subset'
_test_folder = "/kaggle/input/birdclef-2024/test_soundscapes"
_output_model_folder = "/kaggle/working/models"
_output_log_folder = "/kaggle/working/logs"
_output_submission_folder = "/kaggle/working"
sample_rate = 16000

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

def load_audio(filename):
    audio, sr = librosa.load(filename, sr=sample_rate)
    return audio, sr

def segment_audio(segment, segment_length=5, sr=sample_rate):
    segmented_chunks = []
    samples_per_segment = segment_length * sr
    for start in range(0, len(segment), samples_per_segment):
        end = start + samples_per_segment
        segmented_chunks.append(segment[start:end])
    return segmented_chunks

def generate_square_spectrogram(audio, sr, size=224, fmin=2000, fmax=8000):
    s = librosa.feature.melspectrogram(y=audio, sr=sr, n_mels=128, fmax=fmax, fmin=fmin)
    s_dB = librosa.power_to_db(s, ref=np.max)
    fig, ax = plt.subplots(figsize=(size / 100, size / 100), dpi=100)
    img = librosa.display.specshow(s_dB, sr=sr, x_axis='time', y_axis='mel', fmin=fmin, fmax=fmax, cmap='gray', ax=ax)
    ax.axis('off')
    plt.tight_layout(pad=0)
    buf = BytesIO()
    fig.savefig(buf, format='png', bbox_inches='tight', pad_inches=0)
    plt.close(fig)
    buf.seek(0)
    image = Image.open(buf).convert('RGB')
    return image

class CustomDataset(torch.utils.data.Dataset):
    def __init__(self, file_pairs, transform=None):
        self.file_pairs = file_pairs
        self.transform = transform

    def __len__(self):
        return sum(len(segment) for _, _, segment in self.file_pairs)

    def __getitem__(self, idx):
        current_idx = 0
        for input_file_path, label, segments in self.file_pairs:
            if idx < current_idx + len(segments):
                segment = segments[idx - current_idx]
                image = generate_square_spectrogram(segment, sample_rate)  # Using sample_rate
                if self.transform:
                    image = self.transform(image)
                return image, label
            current_idx += len(segments)
        raise IndexError("Index out of range")

def prepare_file_pairs(input_folder):
    file_pairs = []
    input_folder = Path(input_folder)
    class_names = sorted([f.name for f in input_folder.iterdir() if f.is_dir()])
    class_to_idx = {class_name: idx for idx, class_name in enumerate(class_names)}
    
    for input_path in input_folder.rglob('*.ogg'):
        label = class_to_idx[input_path.parent.name]
        audio, sr = load_audio(input_path)
        audio = nr.reduce_noise(audio, sr)
        segments = segment_audio(audio, segment_length=5, sr=sr)
        file_pairs.append((input_path, label, segments))
    return file_pairs, len(class_names)

def objective(trial):
    model = timm.create_model('resnext50_32x4d', pretrained=True)

    transformation = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])

    file_pairs, num_classes = prepare_file_pairs(_input_folder)
    dataset = CustomDataset(file_pairs, transform=transformation)

    train_size = int(0.8 * len(dataset))
    val_size = len(dataset) - train_size
    train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

    batch_size = trial.suggest_categorical("batch_size", [16, 32, 64])
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, shuffle=False, batch_size=batch_size)

    model.fc = nn.Linear(model.fc.in_features, num_classes)

    model.to(device)

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=trial.suggest_loguniform("lr", 1e-5, 1e-1))

    num_epochs = 5
    train_losses = []
    val_losses = []
    train_accuracies = []
    val_accuracies = []
    val_roc_aucs = []

    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        correct_train = 0
        total_train = 0

        train_loader_tqdm = tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs}")

        for inputs, labels in train_loader_tqdm:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item() * inputs.size(0)
            _, predicted = torch.max(outputs, 1)
            total_train += labels.size(0)
            correct_train += (predicted == labels).sum().item()

            train_loader_tqdm.set_postfix(loss=loss.item())

        epoch_loss = running_loss / len(train_loader.dataset)
        epoch_accuracy = 100 * correct_train / total_train
        train_losses.append(epoch_loss)
        train_accuracies.append(epoch_accuracy)

        val_loss = 0.0
        correct_val = 0
        total_val = 0
        all_labels = []
        all_probs = []

        model.eval()
        val_loader_tqdm = tqdm(val_loader, desc="Validating")

        with torch.no_grad():
            for inputs, labels in val_loader_tqdm:
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                val_loss += loss.item() * inputs.size(0)
                _, predicted = torch.max(outputs, 1)
                total_val += labels.size(0)
                correct_val += (predicted == labels).sum().item()

                all_labels.extend(labels.cpu().numpy())
                all_probs.extend(torch.nn.functional.softmax(outputs, dim=1).cpu().numpy())

                val_loader_tqdm.set_postfix(val_loss=loss.item())

        epoch_val_loss = val_loss / len(val_loader.dataset)
        epoch_val_accuracy = 100 * correct_val / total_val
        val_losses.append(epoch_val_loss)
        val_accuracies.append(epoch_val_accuracy)

        roc_auc = roc_auc_score(all_labels, all_probs, multi_class='ovr')
        val_roc_aucs.append(roc_auc)

        trial.report(epoch_val_accuracy, epoch)

        if trial.should_prune():
            raise optuna.exceptions.TrialPruned()

    # Save the model after tuning
    os.makedirs(_output_model_folder, exist_ok=True)
    model_save_path = f"{_output_model_folder}/model_best_resnext_trial_{trial.number}.pth"
    torch.save(model.state_dict(), model_save_path)

    return val_accuracies[-1]

# Set up logging
os.makedirs(_output_log_folder, exist_ok=True)
log_file = f"{_output_log_folder}/training_log.txt"

def log_message(message):
    print(message)
    with open(log_file, 'a') as f:
        f.write(message + '\n')

# Set up the Optuna study
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=3, timeout=600)

log_message(f"Number of finished trials: {len(study.trials)}")
log_message("Best trial:")
trial = study.best_trial

log_message(f"  Value: {trial.value}")
log_message("  Params: ")
for key, value in trial.params.items():
    log_message(f"    {key}: {value}")

# Save the best model
file_pairs, num_classes = prepare_file_pairs(_input_folder)
best_model = timm.create_model('resnext50_32x4d', pretrained=True)
best_model.fc = nn.Linear(best_model.fc.in_features, num_classes)
best_model.load_state_dict(torch.load(f"{_output_model_folder}/model_best_resnext_trial_{trial.number}.pth"))
best_model_save_path = f"{_output_model_folder}/model_best_resnext.pth"
torch.save(best_model.state_dict(), best_model_save_path)

log_message(f"Best model saved to {best_model_save_path}")

# Load and test the best model
best_model.load_state_dict(torch.load(best_model_save_path))
best_model.eval()
best_model.to(device)

test_transformation = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

test_file_pairs, _ = prepare_file_pairs(_test_folder)
test_dataset = CustomDataset(test_file_pairs, transform=test_transformation)
test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False)

submission = []

with torch.no_grad():
    for idx, (inputs, _) in enumerate(tqdm(test_loader, desc="Testing")):
        inputs = inputs.to(device)
        outputs = best_model(inputs)
        probs = torch.nn.functional.softmax(outputs, dim=1).cpu().numpy().flatten()
        
        # Generate row_id
        row_id = os.path.basename(test_file_pairs[idx][0]).replace('.ogg', '')
        
        submission.append([row_id] + probs.tolist())

# Create submission DataFrame
column_names = ['row_id'] + [f'label_{i}' for i in range(num_classes)]
submission_df = pd.DataFrame(submission, columns=column_names)

# Save to CSV
submission_csv_path = f"{_output_submission_folder}/Submission.csv"
submission_df.to_csv(submission_csv_path, index=False)
log_message(f"Submission file saved to {submission_csv_path}")
