Getting data from `ml_model_gpux2_predictions`

In [1]:
import os
import torch
import torchaudio
import pandas as pd
import numpy as np
from torch.utils.data import DataLoader
from tqdm import tqdm
import torchaudio.transforms as T

In [2]:
GITHUB_DIR = r'/kaggle/working/BirdCLEF_2025_KaggleCompetition'
BASE_DIR = r'/kaggle/input/birdclef-2025/'

In [3]:
# Clone the repo normally (default branch is probably main)
!git clone https://Sai-Sam-N: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@github.com/Sai-Sam-N/BirdCLEF_2025_KaggleCompetition.git $GITHUB_DIR

# Go into the repo
%cd $GITHUB_DIR

# Checkout the correct branch
!git checkout ml_model_gpux2_predictions

Cloning into '/kaggle/working/BirdCLEF_2025_KaggleCompetition'...
remote: Enumerating objects: 41896, done.[K
remote: Counting objects: 100% (5/5), done.[K
remote: Compressing objects: 100% (5/5), done.[K
remote: Total 41896 (delta 0), reused 0 (delta 0), pack-reused 41891 (from 2)[K
Receiving objects: 100% (41896/41896), 3.85 GiB | 34.94 MiB/s, done.
Resolving deltas: 100% (8531/8531), done.
Updating files: 100% (28572/28572), done.
/kaggle/working/BirdCLEF_2025_KaggleCompetition
Branch 'ml_model_gpux2_predictions' set up to track remote branch 'ml_model_gpux2_predictions' from 'origin'.
Switched to a new branch 'ml_model_gpux2_predictions'


In [4]:
MODEL_PATH = os.path.join(GITHUB_DIR, "baseline_model.pt")
TEST_DIR = "/kaggle/input/birdclef-2025/test_soundscapes"
SAMPLE_SUBMISSION_PATH = os.path.join(BASE_DIR, "sample_submission.csv")

In [5]:
import os
import torchaudio
import pandas as pd
from pathlib import Path
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm

# Model - redefining

In [6]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, random_split
from torchvision import models

In [7]:
# ========== MODEL ==========
class BirdCLEFModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.base = models.resnet18(pretrained=True)
        self.base.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False)
        self.base.fc = nn.Linear(self.base.fc.in_features, NUM_CLASSES)

    def forward(self, x):
        return self.base(x)

In [8]:
# ========== CONFIG ==========
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
SAMPLE_RATE = 32000
DURATION = 5
NUM_SAMPLES = SAMPLE_RATE * DURATION
N_MELS = 128
TEST_AUDIO_DIR = "/kaggle/input/birdclef-2025/test_soundscapes"
MODEL_PATH = os.path.join(GITHUB_DIR, "baseline_model.pt")
LABELS_CSV = os.path.join(GITHUB_DIR, "mels_tracker_updated.csv")
SUBMISSION_PATH = os.path.join(r'/kaggle/working/', "submission.csv")
NUM_CLASSES = 206

In [9]:
# Instantiate the model
model = BirdCLEFModel().to(DEVICE)

# Load the trained weights
model.load_state_dict(torch.load(MODEL_PATH, map_location=DEVICE))

# Set model to evaluation mode
model.eval()

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 103MB/s]


BirdCLEFModel(
  (base): ResNet(
    (conv1): Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (1): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_

# Predictions

In [10]:
# --- Load taxonomy to get species list (column order for submission) ---
taxonomy_df = pd.read_csv(os.path.join(BASE_DIR, "taxonomy.csv"))
species_list = taxonomy_df['primary_label'].unique().tolist()  # all species labels

In [11]:
# --- Reconstruct label encoder (primary_label <-> class_index) from training labels ---
labels_df = pd.read_csv(os.path.join(GITHUB_DIR, "mels_tracker_updated.csv"))
labels_df['class_index'] = labels_df['primary_label'].astype('category').cat.codes
class_map = labels_df[['primary_label', 'class_index']].drop_duplicates().sort_values('class_index')
idx_to_label = class_map['primary_label'].tolist()

In [12]:
# ========== MEL TRANSFORMS ==========
mel_transform = T.MelSpectrogram(
    sample_rate=SAMPLE_RATE,
    n_fft=1024,
    hop_length=512,
    n_mels=N_MELS
)

In [13]:
amplitude_to_db = T.AmplitudeToDB()

In [14]:
def preprocess_waveform(waveform, sr):
    if sr != SAMPLE_RATE:
        resampler = T.Resample(sr, SAMPLE_RATE)
        waveform = resampler(waveform)
    if waveform.ndim > 1:
        waveform = waveform.mean(dim=0)
    return waveform

def waveform_to_logmel(waveform):
    mel_spec = mel_transform(waveform)
    logmel = amplitude_to_db(mel_spec)
    logmel = (logmel - logmel.mean()) / logmel.std()
    return logmel.unsqueeze(0)  # shape: [1, 1, n_mels, time]

In [15]:
# ========== GET CLASS MAPPING ==========
# df = pd.read_csv(LABELS_CSV)
# class_map = df[['primary_label', 'class_index']].drop_duplicates().sort_values('class_index')
# idx_to_label = class_map['primary_label'].tolist()
num_classes = len(idx_to_label)

In [16]:
# # ========== LOAD MODEL ==========
# model = BirdCLEFModel()
# model.load_state_dict(torch.load(MODEL_PATH, map_location=DEVICE))
# model.to(DEVICE)
# model.eval()

In [17]:
# ========== INFERENCE ==========
submission_rows = []
segment_duration = 5  # seconds
stride = 5  # non-overlapping

for file in tqdm(os.listdir(TEST_AUDIO_DIR)):
    if not file.endswith(".ogg"):
        continue

    path = os.path.join(TEST_AUDIO_DIR, file)
    waveform, sr = torchaudio.load(path)
    waveform = preprocess_waveform(waveform, sr)

    total_samples = waveform.shape[0]
    num_segments = total_samples // NUM_SAMPLES

    for i in range(num_segments):
        start_sample = i * NUM_SAMPLES
        segment = waveform[start_sample:start_sample + NUM_SAMPLES]

        if segment.shape[0] < NUM_SAMPLES:
            pad = NUM_SAMPLES - segment.shape[0]
            segment = torch.nn.functional.pad(segment, (0, pad))

        logmel = waveform_to_logmel(segment)
        logmel = logmel.to(DEVICE)

        with torch.no_grad():
            preds = model(logmel).sigmoid().cpu().numpy()[0]

        row_id = f"{file.replace('.ogg','')}_{(i+1)*5}"
        row = [row_id] + preds.tolist()
        submission_rows.append(row)

100%|██████████| 1/1 [00:00<00:00, 6393.76it/s]


In [18]:
# ========== FORMAT SUBMISSION ==========
sample_sub = pd.read_csv("/kaggle/input/birdclef-2025/sample_submission.csv")
species_cols = sample_sub.columns[1:]  # skip 'row_id'
submission_df = pd.DataFrame(submission_rows, columns=['row_id'] + idx_to_label)

In [19]:
submission_rows

[]

In [20]:
# Reorder columns to match sample_submission.csv
submission_df = submission_df[['row_id'] + list(species_cols)]

In [21]:
print(submission_df.head())

Empty DataFrame
Columns: [row_id, 1139490, 1192948, 1194042, 126247, 1346504, 134933, 135045, 1462711, 1462737, 1564122, 21038, 21116, 21211, 22333, 22973, 22976, 24272, 24292, 24322, 41663, 41778, 41970, 42007, 42087, 42113, 46010, 47067, 476537, 476538, 48124, 50186, 517119, 523060, 528041, 52884, 548639, 555086, 555142, 566513, 64862, 65336, 65344, 65349, 65373, 65419, 65448, 65547, 65962, 66016, 66531, 66578, 66893, 67082, 67252, 714022, 715170, 787625, 81930, 868458, 963335, amakin1, amekes, ampkin1, anhing, babwar, bafibi1, banana, baymac, bbwduc, bicwre1, bkcdon, bkmtou1, blbgra1, blbwre1, blcant4, blchaw1, blcjay1, blctit1, blhpar1, blkvul, bobfly1, bobher1, brtpar1, bubcur1, bubwre1, bucmot3, bugtan, butsal1, cargra1, cattyr, chbant1, chfmac1, cinbec1, cocher1, cocwoo1, colara1, colcha1, compau, compot1, ...]
Index: []

[0 rows x 207 columns]


In [22]:
submission_df.to_csv(SUBMISSION_PATH, index=False) # making submission
submission_df.to_csv(os.path.join(GITHUB_DIR, 'submission.csv'), index=False) # also adding to github

In [23]:
print("Submission file saved to :", SUBMISSION_PATH)

Submission file saved to : /kaggle/working/submission.csv


In [24]:
SUBMISSION_PATH

'/kaggle/working/submission.csv'

In [25]:
%cd $GITHUB_DIR
!git add .
!git commit -m "Add inference results and submission from test run"
!git push origin ml_model_gpux2_predictions

/kaggle/working/BirdCLEF_2025_KaggleCompetition
Author identity unknown

*** Please tell me who you are.

Run

  git config --global user.email "you@example.com"
  git config --global user.name "Your Name"

to set your account's default identity.
Omit --global to set the identity only in this repository.

fatal: unable to auto-detect email address (got 'root@5a5434e62718.(none)')
Everything up-to-date
