In [None]:
import os
import numpy as np
import pandas as pd
import librosa
import soundfile as sf
from tqdm import tqdm
from sklearn.utils import resample
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import f1_score, roc_auc_score
from xgboost import XGBClassifier
import warnings
warnings.filterwarnings('ignore')

# Settings
AUDIO_BASE = "/kaggle/input/birdclef-2025/train_audio/"
df = pd.read_csv("/kaggle/input/birdclef-2025/train.csv")
top10 = ['grekis', 'compau', 'trokin', 'roahaw', 'banana', 'whtdov', 'socfly1', 'yeofly1', 'bobfly1', 'wbwwre1']
target_birds = ['grekis', 'compau', 'trokin']
models = {}

# Main loop
for bird in target_birds:
    print(f"\n==================== 🐦 Training for '{bird}' ====================")

    # Create labels
    pos_df = df[df['primary_label'] == bird].copy()
    pos_df['label'] = 1
    neg_df = df[df['primary_label'].isin(top10) & (df['primary_label'] != bird)].copy()
    neg_df['label'] = 0

    # Balance
    neg_df_balanced = resample(neg_df, replace=False, n_samples=min(len(pos_df), len(neg_df)), random_state=42)
    combined_df = pd.concat([pos_df, neg_df_balanced]).sample(frac=1, random_state=42)

    X, y = [], []

    for _, row in tqdm(combined_df.iterrows(), total=len(combined_df), desc=f"Extracting {bird}"):
        full_path = os.path.join(AUDIO_BASE, row['filename'])
        try:
            y_raw, sr = sf.read(full_path)
            samples_per_chunk = sr * 5
            num_chunks = len(y_raw) // samples_per_chunk
            if num_chunks == 0:
                continue

            chunk_features = []
            for i in range(num_chunks):
                chunk = y_raw[i*samples_per_chunk : (i+1)*samples_per_chunk]
                S = librosa.stft(chunk, n_fft=1024, hop_length=512)
                S_db = librosa.power_to_db(np.abs(S)**2, ref=np.max)
                freqs = librosa.fft_frequencies(sr=sr, n_fft=1024)
                bin_edges = np.linspace(0, 16000, 3201)
                binary_vector = np.zeros(3200, dtype=int)
                energy_per_freq = S_db.max(axis=1)
                adaptive_threshold = np.percentile(energy_per_freq, 76.7)

                for j in range(3200):
                    bin_mask = (freqs >= bin_edges[j]) & (freqs < bin_edges[j+1])
                    if np.any(bin_mask) and np.max(energy_per_freq[bin_mask]) > adaptive_threshold:
                        binary_vector[j] = 1

                chunk_features.append(binary_vector)

            # Aggregate chunks
            median_vector = np.median(np.array(chunk_features), axis=0)
            final_vector = (median_vector > 0.5).astype(int)

            X.append(final_vector)
            y.append(row['label'])

        except Exception as e:
            print(f"❌ {row['filename']} failed: {e}")

    # Final dataset
    X = np.stack(X)
    y = np.array(y)

    # Train with Stratified 5-Fold CV
    f1s, aucs = [], []
    skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

    for fold, (train_idx, val_idx) in enumerate(skf.split(X, y)):
        X_train, X_val = X[train_idx], X[val_idx]
        y_train, y_val = y[train_idx], y[val_idx]

        clf = XGBClassifier(
            n_estimators=500,
            scale_pos_weight=(len(y_train) - sum(y_train)) / sum(y_train),
            use_label_encoder=False,
            eval_metric='logloss',
            random_state=42
        )

        clf.fit(X_train, y_train)
        y_proba = clf.predict_proba(X_val)[:, 1]
        y_pred = (y_proba > 0.5).astype(int)

        f1s.append(f1_score(y_val, y_pred))
        aucs.append(roc_auc_score(y_val, y_proba))

    print(f"📊 {bird} | Avg F1 Score: {np.mean(f1s):.4f} | Avg AUC-ROC: {np.mean(aucs):.4f}")
    models[bird] = clf

In [None]:
import os
import librosa
import numpy as np
import pandas as pd

# Paths
AUDIO_BASE_TEST = '/kaggle/input/birdclef-2025/test_soundscapes/'
class_labels = sorted(os.listdir('/kaggle/input/birdclef-2025/train_audio/'))  # All 206 birds

# Submission storage
submission_rows = []

# Loop over test soundscapes
test_files = sorted([f for f in os.listdir(AUDIO_BASE_TEST) if f.endswith('.ogg')])
for file in test_files:
    full_path = os.path.join(AUDIO_BASE_TEST, file)
    signal, sr = librosa.load(full_path, sr=32000)
    samples_per_chunk = sr * 5

    for i in range(0, len(signal), samples_per_chunk):
        chunk = signal[i:i+samples_per_chunk]
        if len(chunk) < samples_per_chunk:
            continue  # skip short ones

        try:
            # Binary feature extraction
            S = librosa.stft(chunk, n_fft=1024, hop_length=512)
            S_power = np.abs(S) ** 2
            S_db = librosa.power_to_db(S_power, ref=np.max)
            freqs = librosa.fft_frequencies(sr=sr, n_fft=1024)
            bin_edges = np.linspace(0, 16000, 3201)
            binary_vector = np.zeros(3200, dtype=int)
            energy_per_freq = S_db.max(axis=1)
            adaptive_threshold = np.percentile(energy_per_freq, 76.7)

            for j in range(3200):
                bin_mask = (freqs >= bin_edges[j]) & (freqs < bin_edges[j+1])
                if np.any(bin_mask) and np.max(energy_per_freq[bin_mask]) > adaptive_threshold:
                    binary_vector[j] = 1

            # Predict with models
            row_id = file.replace(".ogg", "") + f"_{(i//samples_per_chunk+1)*5}"
            row = [row_id]

            for bird in class_labels:
                if bird in models:
                    prob = models[bird].predict_proba(binary_vector.reshape(1, -1))[:, 1][0]
                    row.append(prob)
                else:
                    row.append(0.001)  # default prob for birds you didn’t train

            submission_rows.append(row)

        except Exception as e:
            print(f"❌ Error processing chunk from {file}: {e}")

# Save submission
submission_df = pd.DataFrame(submission_rows, columns=['row_id'] + class_labels)
submission_df.to_csv("submission.csv", index=False)