In [23]:
import os
import random
import pandas as pd
from sklearn.model_selection import train_test_split

spectrogram_root = "E:/birdclef-2024/spectrograms"
samples_per_class = 250

data = []
species_dirs = os.listdir(spectrogram_root)

for species in species_dirs:
    species_dir = os.path.join(spectrogram_root, species)
    if not os.path.isdir(species_dir):
        continue
    
    all_files = [f for f in os.listdir(species_dir) if f.endswith(".png")]
    selected_files = all_files if len(all_files) <= samples_per_class else random.sample(all_files, samples_per_class)

    for f in selected_files:
        data.append({
            "filepath": os.path.join(species_dir, f),
            "label": species
        })

# Create DataFrame and shuffle
df = pd.DataFrame(data)
df = df.sample(frac=1, random_state=42).reset_index(drop=True)
df.to_csv("balanced_dataset_inc.csv", index=False)

# ✅ Stratified Train/Val Split
train_df, val_df = train_test_split(df, test_size=0.2, stratify=df["label"], random_state=42)
train_df.to_csv("train_split_inc.csv", index=False)
val_df.to_csv("val_split_inc.csv", index=False)

print(f"Species included: {df['label'].nunique()}")
print(f"Total: {len(df)}, Train: {len(train_df)}, Val: {len(val_df)}")


Species included: 181
Total: 40335, Train: 32268, Val: 8067


In [25]:
import pandas as pd
from torchvision import transforms
from torch.utils.data import DataLoader, Dataset
from PIL import Image
import torch
import os

# Read CSVs
train_df = pd.read_csv("train_split_inc.csv")
val_df = pd.read_csv("val_split_inc.csv")

label2idx = {label: idx for idx, label in enumerate(sorted(train_df['label'].unique()))}

image_transforms = transforms.Compose([
    transforms.Resize((300, 300)),
    transforms.ToTensor(),
    transforms.Normalize([0.5]*3, [0.5]*3)  # Adjust if needed
])


In [27]:
class BirdCLEFDataset(Dataset):
    def __init__(self, df, transform=None, label2idx=None):
        self.df = df.reset_index(drop=True)
        self.transform = transform
        self.label2idx = label2idx

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.loc[idx]
        image = Image.open(row['filepath']).convert("RGB")
        label = self.label2idx[row['label']]
        if self.transform:
            image = self.transform(image)
        return image, label


In [29]:
train_dataset = BirdCLEFDataset(train_df, transform=image_transforms, label2idx=label2idx)
val_dataset = BirdCLEFDataset(val_df, transform=image_transforms, label2idx=label2idx)

# Dataloaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32)


In [31]:
import timm
import torch.nn as nn
import torch

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = timm.create_model("efficientnet_b3", pretrained=True, num_classes=181)
model = model.to(device)


In [33]:
import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', patience=2, factor=0.5)

In [35]:
def evaluate(model, loader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, preds = torch.max(outputs, 1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)
    return 100 * correct / total


In [None]:
from tqdm import tqdm
import gc
import torch

best_val_acc = 0
patience = 3
counter = 0
num_epochs = 6
early_stop = False

for epoch in range(num_epochs):
    if early_stop:
        print("⏹️ Early stopping triggered.")
        break

    model.train()
    running_loss = 0
    correct = 0
    total = 0

    loop = tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs}", leave=False)

    for images, labels in loop:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, preds = torch.max(outputs, 1)
        correct += (preds == labels).sum().item()
        total += labels.size(0)

        loop.set_postfix(loss=loss.item())

        # ✅ Inside-loop cleanup
        del images, labels, outputs
        torch.cuda.empty_cache()

    # ✅ Epoch-level metrics
    avg_loss = running_loss / len(train_loader)
    train_acc = 100 * correct / total
    val_acc = evaluate(model, val_loader)

    # Optional scheduler
    if "scheduler" in locals():
        scheduler.step(avg_loss)

    print(f"Epoch {epoch+1} | Loss: {avg_loss:.4f} | Train Acc: {train_acc:.2f}% | Val Acc: {val_acc:.2f}%")

    # ✅ Save model after every epoch
    torch.save(model.state_dict(), f"efficientnet_b3_epoch{epoch+1}.pth")
    print(f"💾 Saved model after epoch {epoch+1} as efficientnet_b3_epoch{epoch+1}.pth")

    # ✅ Save best separately
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        torch.save(model.state_dict(), "best_efficientnet_b3_model.pth")
        print(f"🏅 Saved best model at epoch {epoch+1} with Val Acc: {val_acc:.2f}%")
        counter = 0
    else:
        counter += 1
        if counter >= patience:
            early_stop = True
            print("🛑 No improvement for 3 epochs. Early stopping.")

    gc.collect()
    torch.cuda.empty_cache()


                                                                            

Epoch 1 | Loss: 3.5246 | Train Acc: 28.18% | Val Acc: 53.03%
💾 Saved model after epoch 1 as efficientnet_b3_epoch1.pth
🏅 Saved best model at epoch 1 with Val Acc: 53.03%


                                                                            

Epoch 2 | Loss: 1.5879 | Train Acc: 65.10% | Val Acc: 68.67%
💾 Saved model after epoch 2 as efficientnet_b3_epoch2.pth
🏅 Saved best model at epoch 2 with Val Acc: 68.67%


                                                                            

Epoch 3 | Loss: 0.8213 | Train Acc: 82.03% | Val Acc: 73.83%
💾 Saved model after epoch 3 as efficientnet_b3_epoch3.pth
🏅 Saved best model at epoch 3 with Val Acc: 73.83%


                                                                            

Epoch 4 | Loss: 0.3841 | Train Acc: 92.13% | Val Acc: 75.33%
💾 Saved model after epoch 4 as efficientnet_b3_epoch4.pth
🏅 Saved best model at epoch 4 with Val Acc: 75.33%


Epoch 5/6:  86%|████████▌ | 866/1009 [1:19:26<13:01,  5.46s/it, loss=0.112] 

In [None]:
model.load_state_dict(torch.load("efficientnet_b3_epoch4.pth", map_location=device))
model.to(device)
model.eval()
