In [None]:
import os
import librosa
import librosa.display
import matplotlib.pyplot as plt
import numpy as np
import os
from tqdm import tqdm
import torch
import torch.nn as nn
import torchvision.transforms as transforms
from torchvision import models
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import random


input_dir = '/kaggle/input/birdclef-2025/train_audio'

In [None]:
def audio_to_melspectrogram(file_path, save_path):
    y, sr = librosa.load(file_path, sr=None)
    S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128)
    S_DB = librosa.power_to_db(S, ref=np.max)

    plt.figure(figsize=(2.56, 2.56), dpi=100)
    librosa.display.specshow(S_DB, sr=sr, cmap='magma')
    plt.axis('off')
    plt.tight_layout(pad=0)
    plt.savefig(save_path, bbox_inches='tight', pad_inches=0)
    plt.close()

file_list = []
for root, _, files in os.walk(input_dir):
    for file in files:
        if file.endswith('.ogg'):
            full_path = os.path.join(root, file)
            file_list.append(full_path)


output_dir = '/kaggle/working/train_images'
os.makedirs(output_dir, exist_ok=True)

for input_path in tqdm(file_list):
    base_name = os.path.basename(input_path).replace('.ogg', '.png')
    output_path = os.path.join(output_dir, base_name)
    if not os.path.exists(output_path):
        audio_to_melspectrogram(input_path, output_path)

  5%|▌         | 1555/28564 [15:52<2:57:58,  2.53it/s] 

In [None]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])

# Define a dummy dataset using 10 random images
class SpectrogramDataset(Dataset):
    def __init__(self, image_dir, transform=None):
        self.image_dir = image_dir
        self.image_files = os.listdir(image_dir)
        random.seed(42)
        self.image_files = random.sample(self.image_files, 10)  # select 10 files
        self.transform = transform

    def __len__(self):
        return len(self.image_files)

    def __getitem__(self, idx):
        img_path = os.path.join(self.image_dir, self.image_files[idx])
        image = Image.open(img_path).convert("RGB")
        if self.transform:
            image = self.transform(image)
        label = torch.tensor([1.0])  # dummy binary label for demonstration
        return image, label

dataset = SpectrogramDataset('/kaggle/working/train_images', transform=transform)
dataloader = DataLoader(dataset, batch_size=4, shuffle=True)

# Load pretrained ResNet18 and modify the output layer
model = models.resnet18(weights=None)
model.fc = nn.Linear(model.fc.in_features, 1)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Define optimizer and loss
criterion = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

# Training loop (10 epochs)
model.train()
for epoch in range(10):
    total_loss = 0.0
    for images, labels in dataloader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"Epoch {epoch+1}, Loss: {total_loss:.4f}")

In [None]:
import pandas as pd

# Load sample submission format
sub = pd.read_csv('/kaggle/input/birdclef-2025/sample_submission.csv')

img_path = os.path.join('/kaggle/working/train_images', os.listdir('/kaggle/working/train_images')[0])
img = Image.open(img_path).convert('RGB')
img_tensor = transform(img).unsqueeze(0).to(device)

model.eval()
with torch.no_grad():
    output = model(img_tensor)
    prob = torch.sigmoid(output).item()

print(f"Predicted probability used for submission: {prob:.4f}")

for col in sub.columns[1:]:
    sub[col] = prob

sub.to_csv('/kaggle/working/submission.csv', index=False)