In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import import_ipynb
# from data_processing import BirdClefDataset
# from data_processing import get_training_data
import data_processing
import numpy as np
from torch import optim
from tqdm import tqdm
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import roc_auc_score
import csv

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


Total classes: 206


In [2]:
class BirdClefModel(nn.Module):
    def __init__(self, labels, bins, frames):
        super(BirdClefModel, self).__init__()
        self.labels = labels
        self.bins = bins
        self.frames = frames

        self.modelSetup()

    def modelSetup(self):
        self.conv1 = nn.Conv2d(1, 16, kernel_size=3, stride=1, padding=1)
        self.bn1 = nn.BatchNorm2d(16)
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1)
        self.bn2 = nn.BatchNorm2d(32)
        self.conv3 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.bn3 = nn.BatchNorm2d(64)

        self.pool = nn.MaxPool2d(2,2)
        self.dropout = nn.Dropout(0.2)

        self.fc1 = nn.Linear(64 * (self.bins//8) * (self.frames//8), 128)
        self.fc2 = nn.Linear(128, self.labels)

    def forward(self, x):
        x = self.pool(F.relu(self.bn1(self.conv1(x))))
        x = self.pool(F.relu(self.bn2(self.conv2(x))))
        x = self.pool(F.relu(self.bn3(self.conv3(x))))

        x = x.view(x.size(0), -1)
        x = self.dropout(F.relu(self.fc1(x)))
        x = self.fc2(x)
        return x
        
        
        

In [3]:
def create_batch(melspecs):
    # melspecs shape: [1, n_mels, time_frames]
    # return shape: [batch, 1, n_mels, time_frames]

    batch = np.stack(melspecs)
    batch = torch.from_numpy(batch).to(device)

    return batch

In [4]:
train_loader, val_loader = data_processing.get_training_data('melspec', 
                                                 '/Users/rohitbogulla/Desktop/Sem 2/ML/Project/BirdClef/birdclef-2025/train.csv')

In [5]:
model = BirdClefModel(206, bins=128, frames=309).to(device)
optimizer = optim.Adam(model.parameters(), lr=1e-4)
loss_fn = nn.BCEWithLogitsLoss()

In [6]:
def train_one_epoch(model, train_loader, optimizer, loss_fn, device, epoch):
    model.train()
    running_loss = 0.0
    for x, y in tqdm(train_loader, desc=f"[Epoch {epoch}] Train", leave=False):
        x, y = x.to(device), y.to(device)
        optimizer.zero_grad()
        # print(x.shape)
        logits = model(x)
        loss = loss_fn(logits, y)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    return running_loss / len(train_loader)

In [9]:
def safe_macro_auc(y_true, y_pred):
    """Compute macro-averaged ROC AUC, skipping invalid classes."""
    scores = []
    for i in range(y_true.shape[1]):
        y_col = y_true[:, i]
        p_col = y_pred[:, i]
        if (y_col == 1).sum() > 0 and (y_col == 0).sum() > 0:
            try:
                score = roc_auc_score(y_col, p_col)
                scores.append(score)
            except:
                continue
    return np.mean(scores) if scores else float("nan")

def validate_one_epoch(model, dataloader, loss_fn, device, epoch):
    model.eval()
    running_loss = 0.0
    all_targets = []
    all_preds = []
    with torch.no_grad():
        for x, y in tqdm(dataloader, desc=f"[Epoch {epoch}] Val", leave=False):
            x, y = x.to(device), y.to(device)
            logits = model(x)
            loss = loss_fn(logits, y)
            probs = torch.sigmoid(logits)
            all_preds.append(probs.cpu().numpy())
            all_targets.append(y.cpu().numpy())
            running_loss += loss.item()
    y_true = np.vstack(all_targets)
    y_pred = np.vstack(all_preds)
    val_auc = safe_macro_auc(y_true, y_pred)
    return running_loss / len(dataloader), val_auc

In [11]:
NUM_EPOCHS = 5
best_auc = -1.0
LOG_PATH = '/Users/rohitbogulla/Desktop/Sem 2/ML/Project/BirdClef/birdclef-2025/train_LOGFILE.csv'

for epoch in range(1, NUM_EPOCHS + 1):
    train_loss = train_one_epoch(model, train_loader, optimizer, loss_fn, device, epoch)
    val_loss, val_auc = validate_one_epoch(model, val_loader, loss_fn, device, epoch)

    print(f"[Epoch {epoch}] Train Loss: {train_loss:.4f} | Val Loss: {val_loss:.4f} | Val ROC AUC: {val_auc:.5f}")

    # Сохраняем в лог-файл
    with open(LOG_PATH, "a", newline="") as f:
        writer = csv.writer(f)
        writer.writerow([epoch, train_loss, val_loss, val_auc])

    # Сохраняем модель, если улучшилась
    if val_auc > best_auc:
        best_auc = val_auc
        model_filename = f"cnn_model_{epoch}_auc{val_auc:.5f}.pth"
        torch.save(model.state_dict(), f"{model_filename}")
        print(f"✅ New best AUC: {best_auc:.5f} — saved as {model_filename}")

                                                                                

[Epoch 1] Train Loss: 0.0271 | Val Loss: 0.0248 | Val ROC AUC: 0.72606
✅ New best AUC: 0.72606 — saved as cnn_model_1_auc0.72606.pth


                                                                                

[Epoch 2] Train Loss: 0.0258 | Val Loss: 0.0236 | Val ROC AUC: 0.79394
✅ New best AUC: 0.79394 — saved as cnn_model_2_auc0.79394.pth


                                                                                

[Epoch 3] Train Loss: 0.0246 | Val Loss: 0.0229 | Val ROC AUC: 0.81257
✅ New best AUC: 0.81257 — saved as cnn_model_3_auc0.81257.pth


                                                                                

[Epoch 4] Train Loss: 0.0240 | Val Loss: 0.0225 | Val ROC AUC: 0.83450
✅ New best AUC: 0.83450 — saved as cnn_model_4_auc0.83450.pth


                                                                                

[Epoch 5] Train Loss: 0.0235 | Val Loss: 0.0221 | Val ROC AUC: 0.84153
✅ New best AUC: 0.84153 — saved as cnn_model_5_auc0.84153.pth
