In [None]:
# !pip install efficientnet_pytorch

In [None]:
import librosa
import numpy as np
import torch
import torch.nn as nn
from efficientnet_pytorch import EfficientNet
from torch.utils.data import Dataset, DataLoader
from audiomentations import Compose, AddBackgroundNoise, TimeMask, FrequencyMask
from multiprocessing import Pool
import os

# Constants
SR = 32000
N_MELS = 128
HOP_LENGTH = 512
NUM_CLASSES = 182  # Adjust based on BirdCLEF 2025 species count
BATCH_SIZE = 32
EPOCHS = 10

# Data Preprocessing
def audio_to_mel_spectrogram(audio_path, sr=SR, n_mels=N_MELS, hop_length=HOP_LENGTH):
    y, _ = librosa.load(audio_path, sr=sr)
    S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=n_mels, hop_length=hop_length)
    S_dB = librosa.power_to_db(S, ref=np.max)
    return S_dB

NOISE_PATH = "/kaggle/input/audio-noise-dataset"

# Data Augmentation
augment = Compose([
    AddBackgroundNoise(sounds_path=NOISE_PATH, min_snr=0.0, max_snr=2.0, p=0.5),
    TimeMask(min_band_part=0.0, max_band_part=0.2, p=0.5),
    FrequencyMask(min_frequency_band=0.0, max_frequency_band=0.2, p=0.5),
])

# Custom Dataset
class BirdCLEFDataset(Dataset):
    def __init__(self, audio_paths, labels=None, augment=None):
        self.audio_paths = audio_paths
        self.labels = labels
        self.augment = augment

    def __len__(self):
        return len(self.audio_paths)

    def __getitem__(self, idx):
        spec = audio_to_mel_spectrogram(self.audio_paths[idx])
        if self.augment and self.labels is not None:
            spec = self.augment(samples=spec, sample_rate=SR)
        spec = torch.tensor(spec, dtype=torch.float32).unsqueeze(0)  # Add channel dim
        if self.labels is not None:
            label = torch.tensor(self.labels[idx], dtype=torch.float32)
            return spec, label
        return spec

# Weighted Loss Function
class WeightedBCELoss(nn.Module):
    def __init__(self, weight):
        super(WeightedBCELoss, self).__init__()
        self.weight = weight

    def forward(self, input, target):
        return nn.functional.binary_cross_entropy_with_logits(input, target, pos_weight=self.weight)

# Model Setup
def get_model(num_classes=NUM_CLASSES):
    model = EfficientNet.from_pretrained('efficientnet-b0')
    model._fc = nn.Linear(model._fc.in_features, num_classes)
    return model

# Training Loop
def train_model(model, train_loader, device, optimizer, criterion, epochs=EPOCHS):
    model.train()
    for epoch in range(epochs):
        running_loss = 0.0
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        print(f"Epoch {epoch+1}/{epochs}, Loss: {running_loss/len(train_loader):.4f}")

# Inference
def predict(model, test_loader, device):
    model.eval()
    predictions = []
    with torch.no_grad():
        for inputs in test_loader:
            inputs = inputs.to(device)
            outputs = model(inputs)
            preds = torch.sigmoid(outputs).cpu().numpy()
            predictions.extend(preds)
    return np.array(predictions)

# Parallel Spectrogram Computation
def compute_spectrogram(audio_path):
    return audio_to_mel_spectrogram(audio_path)

def preprocess_in_parallel(audio_paths, num_workers=4):
    with Pool(processes=num_workers) as pool:
        spectrograms = pool.map(compute_spectrogram, audio_paths)
    return spectrograms

# Main Execution
def main():
    # Example paths and labels (replace with actual data)
    train_audio_paths = ["path/to/train/audio1.wav", "path/to/train/audio2.wav"]
    train_labels = np.random.randint(0, 2, (len(train_audio_paths), NUM_CLASSES))  # Dummy labels
    test_audio_paths = ["path/to/test/audio1.wav", "path/to/test/audio2.wav"]

    # Datasets and Loaders
    train_dataset = BirdCLEFDataset(train_audio_paths, train_labels, augment=augment)
    test_dataset = BirdCLEFDataset(test_audio_paths)
    train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

    # Model, Optimizer, Loss
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = get_model().to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    class_weights = torch.tensor([1.0] * NUM_CLASSES).to(device)  # Adjust weights as needed
    criterion = WeightedBCELoss(weight=class_weights)

    # Train
    train_model(model, train_loader, device, optimizer, criterion)

    # Predict
    predictions = predict(model, test_loader, device)
    print("Predictions:", predictions)

    # Save model to ONNX (optional)
    dummy_input = torch.randn(1, 1, N_MELS, 313).to(device)  # Adjust shape as needed
    torch.onnx.export(model, dummy_input, "birdclef_model.onnx")

if __name__ == "__main__":
    main()