# Nestkast Bezetting Detector - EMSN

Training van een CNN model om te detecteren of een nestkast bezet is (vogel aanwezig) of leeg.

**Data:**
- `bezet/` - Screenshots van nestkast midden (Koolmees slaapt daar)
- `leeg/` - Screenshots van nestkast voor en achter (geen vogels)

**Model:** MobileNetV2 met transfer learning (snel, werkt goed met weinig data)

## 1. Setup en Data Upload

In [None]:
# Installeer benodigde packages
!pip install -q torch torchvision pillow matplotlib scikit-learn

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset, random_split
from torchvision import transforms, models
from PIL import Image
import matplotlib.pyplot as plt
import numpy as np
import os
from pathlib import Path
from sklearn.metrics import confusion_matrix, classification_report
import seaborn as sns
from google.colab import files
import zipfile
import io

# Check GPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Using device: {device}')
if torch.cuda.is_available():
    print(f'GPU: {torch.cuda.get_device_name(0)}')

In [None]:
# Data laden van Google Drive
# De data staat in: EMSN/training_data/nestbox_data.zip

from google.colab import drive
import zipfile

# Mount Google Drive
print("Google Drive mounten...")
drive.mount('/content/drive')

# Pad naar data
zip_path = '/content/drive/MyDrive/EMSN/training_data/nestbox_data.zip'

# Uitpakken
print(f'Uitpakken van {zip_path}...')
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall('/content/')

# Check structuur
!ls -la /content/nestbox_data/
print("\nAantal bestanden per klasse:")
!echo "bezet: $(ls /content/nestbox_data/bezet/*.jpg 2>/dev/null | wc -l)"
!echo "leeg: $(ls /content/nestbox_data/leeg/*.jpg 2>/dev/null | wc -l)"

## 2. Dataset en DataLoader

In [None]:
class NestboxDataset(Dataset):
    """Dataset voor nestkast bezetting detectie"""
    
    def __init__(self, data_dir, transform=None):
        self.data_dir = Path(data_dir)
        self.transform = transform
        self.classes = ['leeg', 'bezet']  # 0 = leeg, 1 = bezet
        
        self.images = []
        self.labels = []
        
        # Laad alle afbeeldingen
        for class_idx, class_name in enumerate(self.classes):
            class_dir = self.data_dir / class_name
            if class_dir.exists():
                for img_path in class_dir.glob('*.jpg'):
                    self.images.append(str(img_path))
                    self.labels.append(class_idx)
        
        print(f"Geladen: {len(self.images)} afbeeldingen")
        print(f"  Leeg: {self.labels.count(0)}")
        print(f"  Bezet: {self.labels.count(1)}")
    
    def __len__(self):
        return len(self.images)
    
    def __getitem__(self, idx):
        img_path = self.images[idx]
        image = Image.open(img_path).convert('RGB')
        label = self.labels[idx]
        
        if self.transform:
            image = self.transform(image)
        
        return image, label

In [None]:
# Data transformaties
# Training: met augmentatie voor meer variatie
train_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomRotation(10),
    transforms.ColorJitter(brightness=0.2, contrast=0.2),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], 
                         std=[0.229, 0.224, 0.225])
])

# Validatie: geen augmentatie
val_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], 
                         std=[0.229, 0.224, 0.225])
])

# Laad volledige dataset (zonder transform eerst, voor split)
full_dataset = NestboxDataset('/content/nestbox_data', transform=None)

In [None]:
# Split dataset: 80% train, 20% validatie
train_size = int(0.8 * len(full_dataset))
val_size = len(full_dataset) - train_size

# Gebruik generator voor reproduceerbare split
generator = torch.Generator().manual_seed(42)
train_indices, val_indices = random_split(
    range(len(full_dataset)), 
    [train_size, val_size],
    generator=generator
)

print(f"Train set: {len(train_indices)} afbeeldingen")
print(f"Validatie set: {len(val_indices)} afbeeldingen")

In [None]:
# Custom dataset classes met juiste transforms
class SubsetDataset(Dataset):
    def __init__(self, dataset, indices, transform):
        self.dataset = dataset
        self.indices = list(indices)
        self.transform = transform
    
    def __len__(self):
        return len(self.indices)
    
    def __getitem__(self, idx):
        real_idx = self.indices[idx]
        img_path = self.dataset.images[real_idx]
        image = Image.open(img_path).convert('RGB')
        label = self.dataset.labels[real_idx]
        
        if self.transform:
            image = self.transform(image)
        
        return image, label

# Maak train en val datasets met juiste transforms
train_dataset = SubsetDataset(full_dataset, train_indices, train_transform)
val_dataset = SubsetDataset(full_dataset, val_indices, val_transform)

# DataLoaders
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True, num_workers=2)
val_loader = DataLoader(val_dataset, batch_size=16, shuffle=False, num_workers=2)

print(f"Batches per epoch: {len(train_loader)} train, {len(val_loader)} val")

## 3. Bekijk Voorbeelden

In [None]:
# Toon enkele voorbeelden
def show_samples(dataset, num_samples=8):
    fig, axes = plt.subplots(2, 4, figsize=(16, 8))
    
    # Denormalize functie
    mean = torch.tensor([0.485, 0.456, 0.406]).view(3, 1, 1)
    std = torch.tensor([0.229, 0.224, 0.225]).view(3, 1, 1)
    
    indices = np.random.choice(len(dataset), num_samples, replace=False)
    
    for i, idx in enumerate(indices):
        image, label = dataset[idx]
        
        # Denormalize
        image = image * std + mean
        image = image.permute(1, 2, 0).numpy()
        image = np.clip(image, 0, 1)
        
        ax = axes[i // 4, i % 4]
        ax.imshow(image)
        ax.set_title(f"{'BEZET' if label == 1 else 'LEEG'}", 
                     color='green' if label == 1 else 'red',
                     fontsize=14, fontweight='bold')
        ax.axis('off')
    
    plt.tight_layout()
    plt.show()

print("Voorbeelden uit training set:")
show_samples(train_dataset)

## 4. Model Definitie (MobileNetV2 Transfer Learning)

In [None]:
def create_model(num_classes=2, freeze_backbone=True):
    """Maak MobileNetV2 model met aangepaste classifier"""
    
    # Laad pretrained MobileNetV2
    model = models.mobilenet_v2(weights=models.MobileNet_V2_Weights.IMAGENET1K_V1)
    
    # Freeze backbone (optioneel)
    if freeze_backbone:
        for param in model.features.parameters():
            param.requires_grad = False
    
    # Vervang classifier
    num_features = model.classifier[1].in_features
    model.classifier = nn.Sequential(
        nn.Dropout(p=0.2),
        nn.Linear(num_features, 128),
        nn.ReLU(),
        nn.Dropout(p=0.2),
        nn.Linear(128, num_classes)
    )
    
    return model

# Maak model
model = create_model(num_classes=2, freeze_backbone=True)
model = model.to(device)

# Tel trainbare parameters
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
total_params = sum(p.numel() for p in model.parameters())
print(f"Trainbare parameters: {trainable_params:,} / {total_params:,} ({100*trainable_params/total_params:.1f}%)")

## 5. Training

In [None]:
# Loss en optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', patience=3, factor=0.5)

# Training functie
def train_epoch(model, loader, criterion, optimizer, device):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    
    for images, labels in loader:
        images, labels = images.to(device), labels.to(device)
        
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()
    
    return running_loss / len(loader), 100. * correct / total

# Validatie functie
def validate(model, loader, criterion, device):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0
    all_preds = []
    all_labels = []
    
    with torch.no_grad():
        for images, labels in loader:
            images, labels = images.to(device), labels.to(device)
            
            outputs = model(images)
            loss = criterion(outputs, labels)
            
            running_loss += loss.item()
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()
            
            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
    
    return running_loss / len(loader), 100. * correct / total, all_preds, all_labels

In [None]:
# Training loop
num_epochs = 20
best_val_acc = 0.0
history = {'train_loss': [], 'train_acc': [], 'val_loss': [], 'val_acc': []}

print("Start training...\n")
print(f"{'Epoch':>6} {'Train Loss':>12} {'Train Acc':>10} {'Val Loss':>12} {'Val Acc':>10}")
print("-" * 54)

for epoch in range(num_epochs):
    train_loss, train_acc = train_epoch(model, train_loader, criterion, optimizer, device)
    val_loss, val_acc, _, _ = validate(model, val_loader, criterion, device)
    
    # Learning rate scheduler
    scheduler.step(val_loss)
    
    # Save history
    history['train_loss'].append(train_loss)
    history['train_acc'].append(train_acc)
    history['val_loss'].append(val_loss)
    history['val_acc'].append(val_acc)
    
    # Save best model
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        torch.save(model.state_dict(), 'best_nestbox_model.pt')
    
    print(f"{epoch+1:>6} {train_loss:>12.4f} {train_acc:>9.1f}% {val_loss:>12.4f} {val_acc:>9.1f}%")

print(f"\nBeste validatie accuracy: {best_val_acc:.1f}%")

## 6. Resultaten Visualisatie

In [None]:
# Plot training curves
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 5))

# Loss
ax1.plot(history['train_loss'], label='Train Loss', marker='o')
ax1.plot(history['val_loss'], label='Val Loss', marker='s')
ax1.set_xlabel('Epoch')
ax1.set_ylabel('Loss')
ax1.set_title('Training en Validatie Loss')
ax1.legend()
ax1.grid(True)

# Accuracy
ax2.plot(history['train_acc'], label='Train Acc', marker='o')
ax2.plot(history['val_acc'], label='Val Acc', marker='s')
ax2.set_xlabel('Epoch')
ax2.set_ylabel('Accuracy (%)')
ax2.set_title('Training en Validatie Accuracy')
ax2.legend()
ax2.grid(True)

plt.tight_layout()
plt.savefig('training_curves.png', dpi=150)
plt.show()

In [None]:
# Laad beste model en evalueer
model.load_state_dict(torch.load('best_nestbox_model.pt'))
val_loss, val_acc, all_preds, all_labels = validate(model, val_loader, criterion, device)

# Confusion matrix
cm = confusion_matrix(all_labels, all_preds)
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
            xticklabels=['Leeg', 'Bezet'],
            yticklabels=['Leeg', 'Bezet'])
plt.xlabel('Voorspeld')
plt.ylabel('Werkelijk')
plt.title(f'Confusion Matrix (Accuracy: {val_acc:.1f}%)')
plt.savefig('confusion_matrix.png', dpi=150)
plt.show()

# Classification report
print("\nClassification Report:")
print(classification_report(all_labels, all_preds, target_names=['Leeg', 'Bezet']))

## 7. Model Opslaan en Downloaden

In [None]:
# Sla model op met metadata
model_info = {
    'model_state_dict': model.state_dict(),
    'classes': ['leeg', 'bezet'],
    'input_size': 224,
    'architecture': 'mobilenet_v2',
    'best_val_acc': best_val_acc,
    'num_epochs': num_epochs,
    'train_samples': len(train_dataset),
    'val_samples': len(val_dataset)
}

torch.save(model_info, 'nestbox_occupancy_model.pt')
print("Model opgeslagen als 'nestbox_occupancy_model.pt'")
print(f"Bestandsgrootte: {os.path.getsize('nestbox_occupancy_model.pt') / 1024 / 1024:.2f} MB")

In [None]:
# Download model en resultaten
from google.colab import files

print("Bestanden om te downloaden:")
print("1. nestbox_occupancy_model.pt - Het getrainde model")
print("2. training_curves.png - Training grafieken")
print("3. confusion_matrix.png - Confusion matrix")

# Download
files.download('nestbox_occupancy_model.pt')
files.download('training_curves.png')
files.download('confusion_matrix.png')

## 8. Test met Nieuwe Afbeelding (Optioneel)

In [None]:
def predict_image(model, image_path, device):
    """Voorspel of een nestkast bezet is"""
    model.eval()
    
    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], 
                             std=[0.229, 0.224, 0.225])
    ])
    
    image = Image.open(image_path).convert('RGB')
    image_tensor = transform(image).unsqueeze(0).to(device)
    
    with torch.no_grad():
        outputs = model(image_tensor)
        probabilities = torch.softmax(outputs, dim=1)
        confidence, predicted = probabilities.max(1)
    
    classes = ['Leeg', 'Bezet']
    result = classes[predicted.item()]
    conf = confidence.item() * 100
    
    # Toon afbeelding met resultaat
    plt.figure(figsize=(8, 6))
    plt.imshow(Image.open(image_path))
    color = 'green' if result == 'Bezet' else 'red'
    plt.title(f"{result} ({conf:.1f}% confidence)", fontsize=16, color=color, fontweight='bold')
    plt.axis('off')
    plt.show()
    
    return result, conf

# Test met een willekeurige afbeelding uit de validatie set
test_idx = np.random.randint(len(val_dataset))
test_path = full_dataset.images[val_indices[test_idx]]
print(f"Test afbeelding: {test_path}")
result, conf = predict_image(model, test_path, device)

In [None]:
# Of upload een eigen afbeelding om te testen
print("Upload een nestkast screenshot om te testen:")
uploaded = files.upload()

for filename in uploaded.keys():
    result, conf = predict_image(model, filename, device)
    print(f"\nResultaat: {result} ({conf:.1f}% confidence)")

---

## Volgende Stappen

Als het model goed werkt, kun je:

1. **Meer data verzamelen** - Meer foto's = beter model
2. **Fine-tuning** - Backbone unfreezen voor betere accuracy
3. **Multi-class** - Uitbreiden naar: leeg, bezet, nestbouw, eieren, jongen
4. **Integreren** - Model inzetten in EMSN voor automatische detectie

Model deployen op Pi:
```python
# Laad model
checkpoint = torch.load('nestbox_occupancy_model.pt', map_location='cpu')
model = create_model(num_classes=2)
model.load_state_dict(checkpoint['model_state_dict'])
model.eval()
```