# üê¶ Nestkast Model Training - EMSN2
**Colab Pro+ - H100 GPU MAX SPEED**

In [None]:
# Mount Drive en kopieer data lokaal voor MAX SPEED
from google.colab import drive
drive.mount('/content/drive')

import shutil, os
DRIVE_DATA = '/content/drive/MyDrive/EMSN/nestbox-training/data'
LOCAL_DATA = '/content/data'
OUTPUT_DIR = '/content/drive/MyDrive/EMSN/nestbox-training/models'

if not os.path.exists(LOCAL_DATA):
    print("‚ö° Kopi√´ren naar lokale SSD...")
    shutil.copytree(DRIVE_DATA, LOCAL_DATA)
print(f"‚úÖ Data klaar: {len(os.listdir(f'{LOCAL_DATA}/leeg'))} leeg, {len(os.listdir(f'{LOCAL_DATA}/bezet'))} bezet")

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset, random_split
from torchvision import transforms, models
from PIL import Image
from pathlib import Path
import numpy as np

device = torch.device('cuda')
print(f"üöÄ GPU: {torch.cuda.get_device_name(0)}")
torch.backends.cudnn.benchmark = True
torch.set_float32_matmul_precision('high')  # TensorFloat-32 voor H100

In [None]:
# === CONFIG H100 ===
BATCH_SIZE = 128     # H100 heeft 80GB VRAM
NUM_EPOCHS = 25
LR = 0.002           # Hogere LR bij grotere batch
NUM_WORKERS = 8      # Meer workers voor H100

# Augmentatie
train_tf = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.RandomCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(15),
    transforms.ColorJitter(0.3, 0.3, 0.3, 0.1),
    transforms.RandomGrayscale(0.2),
    transforms.ToTensor(),
    transforms.Normalize([0.485,0.456,0.406], [0.229,0.224,0.225])
])
val_tf = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485,0.456,0.406], [0.229,0.224,0.225])
])

In [None]:
# Dataset
class NestboxDataset(Dataset):
    def __init__(self, root, transform):
        self.transform = transform
        self.samples = []
        for label, cls in enumerate(['leeg', 'bezet']):
            for f in Path(root).glob(f'{cls}/*.jpg'):
                self.samples.append((f, label))
    def __len__(self): return len(self.samples)
    def __getitem__(self, i):
        path, label = self.samples[i]
        return self.transform(Image.open(path).convert('RGB')), label

# Split
full = NestboxDataset(LOCAL_DATA, train_tf)
n_train = int(0.8 * len(full))
train_ds, val_ds = random_split(full, [n_train, len(full)-n_train], generator=torch.Generator().manual_seed(42))
val_ds.dataset.transform = val_tf

train_loader = DataLoader(train_ds, BATCH_SIZE, shuffle=True, num_workers=NUM_WORKERS, pin_memory=True, persistent_workers=True)
val_loader = DataLoader(val_ds, BATCH_SIZE, num_workers=NUM_WORKERS, pin_memory=True, persistent_workers=True)
print(f"üìä Train: {len(train_ds)}, Val: {len(val_ds)}")

In [None]:
# Model met torch.compile voor H100
model = models.mobilenet_v2(weights='IMAGENET1K_V1')
model.classifier = nn.Sequential(nn.Dropout(0.3), nn.Linear(1280, 2))
model = model.cuda()
model = torch.compile(model)  # PyTorch 2.0 compiler - massive speedup op H100

criterion = nn.CrossEntropyLoss()
optimizer = optim.AdamW(model.parameters(), lr=LR)
scaler = torch.cuda.amp.GradScaler()
print("‚úÖ Model compiled & klaar")

In [None]:
# === TRAINING ===
best_acc = 0
best_state = None

for epoch in range(NUM_EPOCHS):
    # Train
    model.train()
    train_correct, train_total = 0, 0
    for imgs, labels in train_loader:
        imgs, labels = imgs.cuda(non_blocking=True), labels.cuda(non_blocking=True)
        optimizer.zero_grad(set_to_none=True)
        with torch.cuda.amp.autocast(dtype=torch.bfloat16):  # bfloat16 voor H100
            out = model(imgs)
            loss = criterion(out, labels)
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        train_correct += (out.argmax(1)==labels).sum().item()
        train_total += imgs.size(0)
    
    # Val
    model.eval()
    val_correct, val_total = 0, 0
    with torch.no_grad():
        for imgs, labels in val_loader:
            imgs, labels = imgs.cuda(non_blocking=True), labels.cuda(non_blocking=True)
            with torch.cuda.amp.autocast(dtype=torch.bfloat16):
                out = model(imgs)
            val_correct += (out.argmax(1)==labels).sum().item()
            val_total += imgs.size(0)
    
    train_acc = 100*train_correct/train_total
    val_acc = 100*val_correct/val_total
    
    if val_acc > best_acc:
        best_acc = val_acc
        best_state = {k: v.cpu().clone() for k, v in model.state_dict().items()}
        mark = ' ‚≠ê'
    else:
        mark = ''
    
    print(f"Epoch {epoch+1:2d}/{NUM_EPOCHS} | Train: {train_acc:.1f}% | Val: {val_acc:.1f}%{mark}")

print(f"\nüèÜ Beste accuracy: {best_acc:.1f}%")

In [None]:
# === OPSLAAN ===
os.makedirs(OUTPUT_DIR, exist_ok=True)

save_path = f'{OUTPUT_DIR}/nestbox_model_latest.pt'
torch.save({
    'model_state_dict': best_state,
    'classes': ['leeg', 'bezet'],
    'accuracy': best_acc
}, save_path)

print(f"‚úÖ Model opgeslagen: {save_path}")
print(f"\nüì• Kopieer naar Pi met:")
print(f"rclone copy gdrive:EMSN/nestbox-training/models/nestbox_model_latest.pt /mnt/nas-birdnet-archive/nestbox/models/")