# EMSN 2.0 - Nordic Species Training
## 25 Scandinavische soorten voor birdnet-vocalization

### Doelgroep:
- Zweden, Noorwegen, Finland
- Noord-Europese gebruikers

### Vereisten:
- **GPU:** A100 (40GB) aanbevolen
- **RAM:** High RAM runtime (52GB)
- **Tijd:** ~2-3 uur

### Colab Pro instellingen:
1. Runtime ‚Üí Change runtime type
2. Hardware accelerator: **GPU**
3. GPU type: **A100** (als beschikbaar)
4. High-RAM: **‚úì Aan**

In [None]:
# Check GPU en RAM
!nvidia-smi

import torch
import gc
import psutil

torch.cuda.empty_cache()
gc.collect()

print(f"\nPyTorch: {torch.__version__}")
print(f"CUDA: {torch.cuda.is_available()}")

ram_gb = psutil.virtual_memory().total / 1e9
print(f"RAM: {ram_gb:.1f} GB")

if torch.cuda.is_available():
    gpu_name = torch.cuda.get_device_name(0)
    gpu_mem = torch.cuda.get_device_properties(0).total_memory / 1e9
    print(f"GPU: {gpu_name}")
    print(f"GPU Memory: {gpu_mem:.1f} GB")
    
    torch.backends.cuda.matmul.allow_tf32 = False
    torch.backends.cudnn.allow_tf32 = False
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.deterministic = True
    
    if 'A100' in gpu_name:
        GPU_TYPE = 'A100'
        BATCH_SIZE = 64
        print(f"\nüöÄ A100 gedetecteerd")
    elif 'V100' in gpu_name:
        GPU_TYPE = 'V100'
        BATCH_SIZE = 48
    else:
        GPU_TYPE = 'T4'
        BATCH_SIZE = 32
else:
    GPU_TYPE = 'CPU'
    BATCH_SIZE = 16

In [None]:
# Install dependencies
!pip install librosa scikit-learn scikit-image matplotlib tqdm requests -q
print("‚úÖ Dependencies ge√Ønstalleerd")

In [None]:
# Mount Google Drive voor persistente opslag
from google.colab import drive
import os

# Mount Drive
drive.mount('/content/drive')

# Opslag in Drive (blijft bewaard!)
DRIVE_BASE = '/content/drive/MyDrive/EMSN-Nordic'
MODELS_DIR = f'{DRIVE_BASE}/models'
AUDIO_DIR = f'{DRIVE_BASE}/audio'

os.makedirs(MODELS_DIR, exist_ok=True)
os.makedirs(AUDIO_DIR, exist_ok=True)

print(f"‚úÖ Google Drive gemount")
print(f"üìÅ Modellen worden opgeslagen in: {MODELS_DIR}")

In [None]:
# === CONFIGURATIE ===
VERSION = '2026_nordic'

# Training parameters
EPOCHS = 50
LEARNING_RATE = 0.001
MIN_LR = 0.00001
PATIENCE = 10

# Data parameters
MAX_RECORDINGS_PER_TYPE = 50
MAX_SEGMENTS_PER_RECORDING = 5
NUM_WORKERS = 4
MAX_CONCURRENT_DOWNLOADS = 10

# Augmentation
USE_AUGMENTATION = True
AUGMENTATION_FACTOR = 2

# Xeno-canto API key
XC_API_KEY = '14258afd1c8a8e055387d012f2620e20f59ef3a2'

print(f"üìä NORDIC CONFIGURATIE:")
print(f"   GPU: {GPU_TYPE}")
print(f"   Batch size: {BATCH_SIZE}")
print(f"   Epochs: {EPOCHS}")

In [None]:
# === 25 SCANDINAVISCHE/ONTBREKENDE SOORTEN ===
# Format: (NL naam, Scientific name, filename)

NORDIC_SPECIES = [
    # Svardsten53's ontbrekende soorten
    ("Ringmus", "Passer montanus", "Passer_montanus"),
    ("Witbandkruisbek", "Loxia leucoptera", "Loxia_leucoptera"),
    
    # Kruisbekken
    ("Grote Kruisbek", "Loxia pytyopsittacus", "Loxia_pytyopsittacus"),
    
    # Mezen
    ("Bruinkopmees", "Poecile cinctus", "Poecile_cinctus"),
    
    # Uilen
    ("Laplanduil", "Strix nebulosa", "Strix_nebulosa"),
    ("Oeraluil", "Strix uralensis", "Strix_uralensis"),
    ("Ruigpootuil", "Aegolius funereus", "Aegolius_funereus"),
    ("Dwerguil", "Glaucidium passerinum", "Glaucidium_passerinum"),
    ("Sneeuwuil", "Bubo scandiacus", "Bubo_scandiacus"),
    ("Sperweruil", "Surnia ulula", "Surnia_ulula"),
    
    # Spechten
    ("Witrugspecht", "Dendrocopos leucotos", "Dendrocopos_leucotos"),
    ("Drieteenspecht", "Picoides tridactylus", "Picoides_tridactylus"),
    
    # Kraaiachtigen
    ("Taigagaai", "Perisoreus infaustus", "Perisoreus_infaustus"),
    ("Notenkraker", "Nucifraga caryocatactes", "Nucifraga_caryocatactes"),
    
    # Hoenders
    ("Alpensneeuwhoen", "Lagopus muta", "Lagopus_muta"),
    ("Moerassneeuwhoen", "Lagopus lagopus", "Lagopus_lagopus"),
    ("Auerhoen", "Tetrao urogallus", "Tetrao_urogallus"),
    ("Korhoen", "Lyrurus tetrix", "Lyrurus_tetrix"),
    ("Hazelhoen", "Bonasa bonasia", "Bonasa_bonasia"),
    
    # Gorzen
    ("IJsgors", "Calcarius lapponicus", "Calcarius_lapponicus"),
    ("Bosgors", "Emberiza rustica", "Emberiza_rustica"),
    
    # Overig
    ("Waterspreeuw", "Cinclus cinclus", "Cinclus_cinclus"),
    ("Roodmus", "Carpodacus erythrinus", "Carpodacus_erythrinus"),
    ("Kleine Vliegenvanger", "Ficedula parva", "Ficedula_parva"),
    ("Roodkeelpieper", "Anthus cervinus", "Anthus_cervinus"),
]

print(f"Te trainen: {len(NORDIC_SPECIES)} Scandinavische soorten")
for nl, sci, _ in NORDIC_SPECIES:
    print(f"  ‚Ä¢ {nl} ({sci})")

In [None]:
# Xeno-canto API
import requests
from pathlib import Path
from concurrent.futures import ThreadPoolExecutor, as_completed

def search_xeno_canto(scientific_name, voc_type='song', max_results=100):
    parts = scientific_name.split()
    if len(parts) < 2:
        return []
    
    genus, species = parts[0].lower(), parts[1].lower()
    
    if ' ' in voc_type:
        type_query = f'type:"{voc_type}"'
    else:
        type_query = f'type:{voc_type}'
    
    query = f'gen:{genus} sp:{species} {type_query} q:A'
    url = f'https://xeno-canto.org/api/3/recordings?query={query}&key={XC_API_KEY}'
    
    try:
        response = requests.get(url, timeout=30)
        if response.status_code == 200:
            return response.json().get('recordings', [])[:max_results]
        return []
    except:
        return []

def download_single(args):
    recording, output_dir = args
    xc_id = recording['id']
    file_url = recording.get('file', '')
    
    if not file_url:
        return None
    
    if file_url.startswith('//'):
        file_url = 'https:' + file_url
    elif not file_url.startswith('http'):
        file_url = 'https://xeno-canto.org' + file_url
    
    output_path = output_dir / f"XC{xc_id}.mp3"
    
    if output_path.exists():
        return output_path
    
    try:
        response = requests.get(file_url, timeout=60)
        if response.status_code == 200:
            with open(output_path, 'wb') as f:
                f.write(response.content)
            return output_path
    except:
        pass
    return None

def download_recordings_parallel(recordings, output_dir, max_workers=10):
    output_dir = Path(output_dir)
    output_dir.mkdir(parents=True, exist_ok=True)
    
    downloaded = []
    args_list = [(rec, output_dir) for rec in recordings]
    
    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        futures = {executor.submit(download_single, args): args[0]['id'] for args in args_list}
        for future in as_completed(futures):
            result = future.result()
            if result:
                downloaded.append(result)
    
    return downloaded

print("‚úÖ Download functies geladen")

In [None]:
# Spectrogram generatie met augmentation
import librosa
import numpy as np
from concurrent.futures import ThreadPoolExecutor
from functools import partial

SAMPLE_RATE = 48000
N_MELS = 128
N_FFT = 2048
HOP_LENGTH = 512
FMIN = 500
FMAX = 8000
SEGMENT_DURATION = 3.0

def augment_audio(audio, sr):
    augmented = [audio]
    
    try:
        shifted_up = librosa.effects.pitch_shift(audio, sr=sr, n_steps=2)
        shifted_down = librosa.effects.pitch_shift(audio, sr=sr, n_steps=-2)
        augmented.extend([shifted_up, shifted_down])
    except:
        pass
    
    try:
        stretched_slow = librosa.effects.time_stretch(audio, rate=0.9)
        stretched_fast = librosa.effects.time_stretch(audio, rate=1.1)
        target_len = len(audio)
        if len(stretched_slow) > target_len:
            stretched_slow = stretched_slow[:target_len]
        else:
            stretched_slow = np.pad(stretched_slow, (0, target_len - len(stretched_slow)))
        if len(stretched_fast) > target_len:
            stretched_fast = stretched_fast[:target_len]
        else:
            stretched_fast = np.pad(stretched_fast, (0, target_len - len(stretched_fast)))
        augmented.extend([stretched_slow, stretched_fast])
    except:
        pass
    
    noise = np.random.normal(0, 0.005, len(audio))
    noisy = audio + noise
    augmented.append(noisy)
    
    return augmented

def audio_to_spectrogram(audio, sr=SAMPLE_RATE):
    mel_spec = librosa.feature.melspectrogram(
        y=audio, sr=sr,
        n_mels=N_MELS, n_fft=N_FFT, hop_length=HOP_LENGTH,
        fmin=FMIN, fmax=FMAX
    )
    mel_db = librosa.power_to_db(mel_spec, ref=np.max)
    mel_norm = (mel_db - mel_db.min()) / (mel_db.max() - mel_db.min() + 1e-8)
    
    if mel_norm.shape != (128, 128):
        from skimage.transform import resize
        mel_norm = resize(mel_norm, (128, 128), anti_aliasing=True)
    
    return mel_norm

def process_single_audio(audio_path, max_segments=5, use_augmentation=True):
    try:
        audio, sr = librosa.load(str(audio_path), sr=SAMPLE_RATE, mono=True)
    except:
        return []
    
    segment_samples = int(SEGMENT_DURATION * SAMPLE_RATE)
    spectrograms = []
    
    for i in range(0, len(audio), segment_samples):
        if len(spectrograms) >= max_segments * (6 if use_augmentation else 1):
            break
        
        segment = audio[i:i + segment_samples]
        if len(segment) < segment_samples // 2:
            continue
        
        if len(segment) < segment_samples:
            segment = np.pad(segment, (0, segment_samples - len(segment)))
        
        if use_augmentation:
            augmented_segments = augment_audio(segment, SAMPLE_RATE)
            for aug_segment in augmented_segments[:AUGMENTATION_FACTOR]:
                spec = audio_to_spectrogram(aug_segment)
                spectrograms.append(spec)
        else:
            spec = audio_to_spectrogram(segment)
            spectrograms.append(spec)
    
    return spectrograms

def process_audio_files_parallel(audio_paths, max_segments=5, max_workers=4, use_augmentation=True):
    all_specs = []
    
    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        func = partial(process_single_audio, max_segments=max_segments, use_augmentation=use_augmentation)
        results = list(executor.map(func, audio_paths))
    
    for specs in results:
        all_specs.extend(specs)
    
    return all_specs

print("‚úÖ Spectrogram functies geladen")

In [None]:
# Ultimate CNN Model (4 conv blocks)
import torch
import torch.nn as nn

class VocalizationCNN(nn.Module):
    def __init__(self, input_shape=(128, 128), num_classes=3):
        super().__init__()
        
        self.features = nn.Sequential(
            nn.Conv2d(1, 32, kernel_size=3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Dropout2d(0.25),
            
            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Dropout2d(0.25),
            
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Dropout2d(0.25),
            
            nn.Conv2d(128, 256, kernel_size=3, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Dropout2d(0.25),
        )
        
        h, w = input_shape[0] // 16, input_shape[1] // 16
        flatten_size = 256 * h * w
        
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(flatten_size, 512),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(512, 256),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(256, num_classes)
        )
    
    def forward(self, x):
        x = self.features(x)
        x = self.classifier(x)
        return x

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"‚úÖ CNN model klaar voor {device}")

In [None]:
# Training Pipeline
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
from torch.optim.lr_scheduler import ReduceLROnPlateau
import time

def train_species(dutch_name, scientific_name, dirname):
    print(f"\n{'='*60}")
    print(f"üê¶ {dutch_name} ({scientific_name})")
    print(f"{'='*60}")
    
    start_time = time.time()
    audio_dir = Path(f'{DRIVE_BASE}/audio/{dirname}')
    
    X_all, y_all = [], []
    voc_types = [('song', 0), ('call', 1), ('alarm call', 2)]
    
    for voc_type, label in voc_types:
        print(f"  üì• {voc_type}...", end=' ')
        recordings = search_xeno_canto(scientific_name, voc_type, max_results=MAX_RECORDINGS_PER_TYPE)
        
        if not recordings:
            print("0 gevonden")
            continue
        
        type_dir = audio_dir / voc_type.replace(' ', '_')
        audio_files = download_recordings_parallel(
            recordings[:MAX_RECORDINGS_PER_TYPE], 
            type_dir, 
            max_workers=MAX_CONCURRENT_DOWNLOADS
        )
        print(f"{len(audio_files)} files", end=' ')
        
        if audio_files:
            specs = process_audio_files_parallel(
                audio_files, 
                max_segments=MAX_SEGMENTS_PER_RECORDING, 
                max_workers=NUM_WORKERS,
                use_augmentation=USE_AUGMENTATION
            )
            if specs:
                for spec in specs:
                    X_all.append(spec)
                    y_all.append(label)
            print(f"‚Üí {len(specs)} specs")
        else:
            print()
    
    if len(X_all) < 30:
        print(f"  ‚ö†Ô∏è Te weinig data ({len(X_all)})")
        return None, 'insufficient_data'
    
    X = np.array(X_all)
    y = np.array(y_all)
    
    unique_labels = np.unique(y)
    num_classes = len(unique_labels)
    
    if num_classes < 2:
        print(f"  ‚ö†Ô∏è Slechts 1 klasse")
        return None, 'single_class'
    
    label_map = {old: new for new, old in enumerate(unique_labels)}
    y_remapped = np.array([label_map[l] for l in y])
    
    all_class_names = ['song', 'call', 'alarm']
    class_names = [all_class_names[l] for l in unique_labels]
    
    unique, counts = np.unique(y_remapped, return_counts=True)
    class_dist = {class_names[i]: int(counts[i]) for i in range(len(counts))}
    print(f"  üìä {len(X)} specs: {class_dist}")
    
    X_train, X_val, y_train, y_val = train_test_split(
        X, y_remapped, test_size=0.2, random_state=42, stratify=y_remapped
    )
    
    train_loader = DataLoader(
        TensorDataset(torch.FloatTensor(X_train).unsqueeze(1), torch.LongTensor(y_train)),
        batch_size=BATCH_SIZE, shuffle=True, num_workers=NUM_WORKERS, pin_memory=True
    )
    val_loader = DataLoader(
        TensorDataset(torch.FloatTensor(X_val).unsqueeze(1), torch.LongTensor(y_val)),
        batch_size=BATCH_SIZE, num_workers=NUM_WORKERS, pin_memory=True
    )
    
    model = VocalizationCNN(num_classes=num_classes).to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.AdamW(model.parameters(), lr=LEARNING_RATE, weight_decay=0.01)
    scheduler = ReduceLROnPlateau(optimizer, mode='max', factor=0.5, patience=5, min_lr=MIN_LR)
    
    best_acc = 0
    best_state = None
    patience_counter = 0
    
    try:
        for epoch in range(EPOCHS):
            model.train()
            for X_batch, y_batch in train_loader:
                X_batch = X_batch.to(device, non_blocking=True)
                y_batch = y_batch.to(device, non_blocking=True)
                
                optimizer.zero_grad()
                outputs = model(X_batch)
                loss = criterion(outputs, y_batch)
                loss.backward()
                optimizer.step()
            
            model.eval()
            val_correct = 0
            with torch.no_grad():
                for X_batch, y_batch in val_loader:
                    X_batch = X_batch.to(device, non_blocking=True)
                    y_batch = y_batch.to(device, non_blocking=True)
                    outputs = model(X_batch)
                    val_correct += (outputs.argmax(1) == y_batch).sum().item()
            
            val_acc = val_correct / len(y_val)
            scheduler.step(val_acc)
            
            if val_acc > best_acc:
                best_acc = val_acc
                best_state = {k: v.cpu().clone() for k, v in model.state_dict().items()}
                patience_counter = 0
            else:
                patience_counter += 1
            
            if patience_counter >= PATIENCE:
                print(f"  ‚èπÔ∏è Early stop @ epoch {epoch+1}")
                break
                
    except RuntimeError as e:
        if 'CUDA' in str(e):
            print(f"  ‚ö†Ô∏è CUDA error")
            torch.cuda.empty_cache()
            gc.collect()
            if best_state is None:
                return None, 'cuda_error'
        else:
            raise e
    
    if best_state is None:
        return None, 'training_failed'
    
    # Save with scientific name as filename!
    model_path = Path(f'{DRIVE_BASE}/models/{dirname}.pt')
    torch.save({
        'model_state_dict': best_state,
        'num_classes': num_classes,
        'class_names': class_names,
        'label_map': label_map,
        'accuracy': best_acc,
        'species_name': dutch_name,
        'scientific_name': scientific_name,
        'version': VERSION,
        'class_distribution': class_dist
    }, model_path)
    
    del model, train_loader, val_loader
    torch.cuda.empty_cache()
    gc.collect()
    
    elapsed = time.time() - start_time
    print(f"  ‚úÖ {model_path.name} | Acc: {best_acc:.1%} | {elapsed:.0f}s")
    
    return best_acc, 'success'

print("‚úÖ Training pipeline geladen")

In [None]:
# üöÄ START NORDIC TRAINING
from datetime import datetime
import pandas as pd

results = []
start_all = time.time()

print(f"{'='*60}")
print(f"üöÄ EMSN NORDIC VOCALIZATION TRAINING")
print(f"{'='*60}")
print(f"Start: {datetime.now().strftime('%H:%M:%S')}")
print(f"Soorten: {len(NORDIC_SPECIES)}")
print(f"GPU: {GPU_TYPE}")
print(f"{'='*60}")

successful = 0
failed = 0

for i, (dutch, scientific, dirname) in enumerate(NORDIC_SPECIES):
    try:
        acc, status = train_species(dutch, scientific, dirname)
        results.append({
            'species': dutch,
            'scientific': scientific,
            'filename': dirname,
            'accuracy': acc,
            'status': status
        })
        
        if status == 'success':
            successful += 1
        else:
            failed += 1
            
    except Exception as e:
        print(f"  ‚ùå Error: {str(e)[:50]}")
        results.append({
            'species': dutch,
            'scientific': scientific,
            'filename': dirname,
            'accuracy': None,
            'status': f'error'
        })
        failed += 1
    
    # Progress update
    if (i + 1) % 5 == 0:
        pd.DataFrame(results).to_csv(f'{DRIVE_BASE}/checkpoint.csv', index=False)
        elapsed = time.time() - start_all
        eta = (elapsed / (i + 1)) * (len(NORDIC_SPECIES) - i - 1)
        print(f"\n  üíæ [{i+1}/{len(NORDIC_SPECIES)}] ‚úÖ{successful} ‚ùå{failed} | ETA: {eta/60:.0f}min\n")

elapsed_all = time.time() - start_all
print(f"\n{'='*60}")
print(f"üèÅ TRAINING VOLTOOID!")
print(f"{'='*60}")
print(f"Tijd: {elapsed_all/60:.1f} minuten")
print(f"Succesvol: {successful}/{len(NORDIC_SPECIES)}")
print(f"Mislukt: {failed}/{len(NORDIC_SPECIES)}")

In [None]:
# üìä Resultaten
import pandas as pd

df = pd.DataFrame(results)
df.to_csv(f'{DRIVE_BASE}/results_nordic.csv', index=False)

successful_df = df[df['status'] == 'success']

print(f"\n{'='*60}")
print(f"üìä RESULTATEN")
print(f"{'='*60}")
print(f"Getraind: {len(successful_df)}/{len(df)}")

if len(successful_df) > 0:
    print(f"\nAccuracy:")
    print(f"  Gemiddeld: {successful_df['accuracy'].mean():.1%}")
    print(f"  Min: {successful_df['accuracy'].min():.1%}")
    print(f"  Max: {successful_df['accuracy'].max():.1%}")
    
    print(f"\nüèÜ Resultaten:")
    for _, row in successful_df.iterrows():
        print(f"  {row['accuracy']:.1%} - {row['species']} ({row['scientific']})")

In [None]:
# üì• DOWNLOAD MODELLEN
from pathlib import Path
from google.colab import files
import shutil

models_dir = Path(f'{DRIVE_BASE}/models')
models = sorted(models_dir.glob('*.pt'))

print(f"{'='*60}")
print(f"üìÅ NORDIC MODELLEN")
print(f"{'='*60}")
print(f"Totaal: {len(models)} modellen")

if models:
    total_size = sum(m.stat().st_size for m in models) / 1e6
    print(f"Grootte: {total_size:.1f} MB")
    
    print(f"\nModellen (met scientific names):")
    for m in models:
        print(f"  ‚Ä¢ {m.name}")
    
    print(f"\nüì¶ ZIP maken...")
    zip_path = '/content/emsn_models_nordic.zip'
    shutil.make_archive('/content/emsn_models_nordic', 'zip', models_dir)
    zip_size = Path(zip_path).stat().st_size / 1e6
    print(f"‚úÖ {zip_path} ({zip_size:.1f} MB)")
    
    print(f"\nüì• Downloading...")
    files.download(zip_path)
else:
    print("‚ö†Ô∏è Geen modellen")