In [None]:
import os
import random
import pandas as pd
from sklearn.model_selection import train_test_split

# Path to your spectrograms folder
spectrogram_root = "E:/birdclef-2024/spectrograms"
samples_per_class = 50  # Max number of samples per species

data = []
species_dirs = os.listdir(spectrogram_root)

for species in species_dirs:
    species_dir = os.path.join(spectrogram_root, species)
    if not os.path.isdir(species_dir):
        continue
    
    all_files = [f for f in os.listdir(species_dir) if f.endswith(".png")]
    
    # Take all if less than 50, else randomly sample 50
    selected_files = all_files if len(all_files) <= samples_per_class else random.sample(all_files, samples_per_class)

    for f in selected_files:
        data.append({
            "filepath": os.path.join(species_dir, f),
            "label": species
        })

# Create DataFrame and save full balanced dataset
df = pd.DataFrame(data)
df.to_csv("balanced_dataset.csv", index=False)

# Stratified Train/Validation Split
train_df, val_df = train_test_split(df, test_size=0.2, stratify=df["label"], random_state=42)
train_df.to_csv("train_split.csv", index=False)
val_df.to_csv("val_split.csv", index=False)

print(f"Species included: {df['label'].nunique()}")
print(f"Total: {len(df)}, Train: {len(train_df)}, Val: {len(val_df)}")


In [None]:
import pandas as pd
from torchvision import transforms
from torch.utils.data import DataLoader, Dataset
from PIL import Image
import torch
import os

# Read CSVs
train_df = pd.read_csv("train_split.csv")
val_df = pd.read_csv("val_split.csv")

label2idx = {label: idx for idx, label in enumerate(sorted(train_df['label'].unique()))}

# Image Transform
image_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.5]*3, [0.5]*3)
])


In [None]:
class BirdCLEFDataset(Dataset):
    def __init__(self, df, transform=None, label2idx=None):
        self.df = df.reset_index(drop=True)
        self.transform = transform
        self.label2idx = label2idx

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.loc[idx]
        image = Image.open(row['filepath']).convert("RGB")
        label = self.label2idx[row['label']]
        
        if self.transform:
            image = self.transform(image)
        return image, label


In [None]:
train_dataset = BirdCLEFDataset(train_df, transform=image_transforms, label2idx=label2idx)
val_dataset = BirdCLEFDataset(val_df, transform=image_transforms, label2idx=label2idx)

# Dataloaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32)


In [None]:
import torchvision.models as models
import torch.nn as nn

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = models.efficientnet_b0(pretrained=True)
model.classifier[1] = nn.Linear(model.classifier[1].in_features, 181)  # 181 classes
model = model.to(device)


In [None]:
import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)


In [None]:
for epoch in range(10):  # You can increase this
    model.train()
    total_loss = 0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
    
    print(f"Epoch {epoch+1}, Training Loss: {total_loss:.4f}")


In [None]:
def evaluate(model, loader):
    model.eval()
    correct = 0
    total = 0

    with torch.no_grad():
        for images, labels in loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, preds = torch.max(outputs, 1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)

    print(f"Validation Accuracy: {100 * correct / total:.2f}%")

evaluate(model, val_loader)


In [None]:
torch.save(model.state_dict(), "birdclef_efficientnet_b0.pth")


In [None]:
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, top_k_accuracy_score
import numpy as np

def get_validation_metrics(model, loader, label_names):
    model.eval()
    all_preds = []
    all_probs = []
    all_labels = []

    with torch.no_grad():
        for images, labels in loader:
            images = images.to(device)
            labels = labels.to(device)

            outputs = model(images)
            probs = torch.softmax(outputs, dim=1)
            preds = torch.argmax(probs, dim=1)

            all_preds.extend(preds.cpu().numpy())
            all_probs.extend(probs.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    # Metrics
    print("🔍 Classification Report:\n")
    print(classification_report(all_labels, all_preds, target_names=label_names, zero_division=0))

    print("🔍 Confusion Matrix:\n")
    cm = confusion_matrix(all_labels, all_preds)
    print(cm)

    # Optional: Top-3 Accuracy
    top3 = top_k_accuracy_score(all_labels, np.array(all_probs), k=3)
    print(f"\n🎯 Top-3 Accuracy: {top3 * 100:.2f}%")

    return all_preds, all_labels



In [None]:
label_names = list(train_dataset.label2idx.keys())


In [None]:
get_validation_metrics(model, val_loader, label_names)


In [None]:
def collect_predictions(model, loader, label_names, dataframe):
    model.eval()
    results = []

    with torch.no_grad():
        for i, (images, labels) in enumerate(loader):
            images = images.to(device)
            labels = labels.to(device)

            outputs = model(images)
            preds = torch.argmax(outputs, dim=1)

            for j in range(len(labels)):
                results.append({
                    "filepath": dataframe.iloc[i * loader.batch_size + j]["filepath"],
                    "true_label": label_names[labels[j].item()],
                    "pred_label": label_names[preds[j].item()],
                    "correct": labels[j].item() == preds[j].item()
                })

    return pd.DataFrame(results)


In [None]:
prediction_df = collect_predictions(model, val_loader, label_names, val_df)
prediction_df.to_csv("val_predictions.csv", index=False)

# Preview
print(prediction_df.head())


In [None]:
correct_preds = prediction_df[prediction_df["correct"] == True]
incorrect_preds = prediction_df[prediction_df["correct"] == False]

print(f"✅ Correct Predictions: {len(correct_preds)}")
print(f"❌ Incorrect Predictions: {len(incorrect_preds)}")
print(f"🔢 Total Samples Evaluated: {len(prediction_df)}")


In [None]:
from PIL import Image
import matplotlib.pyplot as plt

# Show 5 correct predictions
for _, row in correct_preds.sample(5).iterrows():
    img = Image.open(row['filepath'])
    plt.imshow(img)
    plt.title(f"✅ Predicted: {row['pred_label']} | True: {row['true_label']}")
    plt.axis("off")
    plt.show()

# Show 2 incorrect predictions
for _, row in incorrect_preds.sample(2).iterrows():
    img = Image.open(row['filepath'])
    plt.imshow(img)
    plt.title(f"❌ Predicted: {row['pred_label']} | True: {row['true_label']}")
    plt.axis("off")
    plt.show()
