# Flood Detection Model Training
## Using SAR Sentinel-1 Data from Google Earth Engine

This notebook trains a U-Net model for flood segmentation using free GPU on Google Colab.

**Requirements:**
- Run in Google Colab (free GPU tier)
- Google Drive mounted
- Earth Engine authenticated

**Output:**
- Trained model saved to Drive
- Training metrics and visualizations
- Baseline features for drift detection

In [None]:
# Cell 1: Setup Colab Environment
print("üöÄ Setting up Colab environment for Flood Model Training")
print("=" * 60)

# Add utils to path
import sys
sys.path.append('/content/drive/MyDrive/disaster_detection/notebooks/utils')

# Import helpers
from colab_helpers import (
    setup_colab_environment,
    get_drive_paths,
    ensure_drive_directories,
    save_model_to_drive,
    plot_training_history
)

# Run setup
setup_colab_environment()
ensure_drive_directories()

# Import standard libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime

# TensorFlow
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau

# Earth Engine
import ee
import geemap

print(f"\n‚úÖ TensorFlow version: {tf.__version__}")
print(f"‚úÖ GPU Available: {tf.config.list_physical_devices('GPU')}")
print(f"‚úÖ Earth Engine initialized")
print("=" * 60)

## Step 1: Define Area of Interest and Download SAR Data

We'll use Sentinel-1 SAR data which is excellent for flood detection because:
- Works day/night and through clouds
- VV and VH polarizations detect water surfaces
- Free access via Google Earth Engine

In [None]:
# Cell 2: Download SAR Training Data
print("üì• Downloading SAR training data from Google Earth Engine...")

# Define AOI (example: Mumbai flood-prone region)
aoi = ee.Geometry.Rectangle([72.7, 18.9, 73.0, 19.3])

# Visualization
Map = geemap.Map()
Map.centerObject(aoi, zoom=10)
Map.addLayer(aoi, {'color': 'red'}, 'Area of Interest')
Map

In [None]:
# Cell 3: Download Function
def download_sar_images(aoi, start_date, end_date, max_images=50):
    """
    Download Sentinel-1 SAR imagery for flood detection
    """
    paths = get_drive_paths()
    save_dir = f"{paths['data']}/sar_training"
    
    # Query Sentinel-1 collection
    s1_collection = (ee.ImageCollection('COPERNICUS/S1_GRD')
        .filterBounds(aoi)
        .filterDate(start_date, end_date)
        .filter(ee.Filter.listContains('transmitterReceiverPolarisation', 'VV'))
        .filter(ee.Filter.eq('instrumentMode', 'IW'))
        .select(['VV', 'VH']))
    
    count = s1_collection.size().getInfo()
    print(f"Found {count} SAR images")
    
    # Download images (limited to max_images)
    image_list = s1_collection.toList(min(count, max_images))
    
    for i in range(min(count, max_images)):
        try:
            image = ee.Image(image_list.get(i))
            
            # Calculate flood index (VV/VH ratio)
            flood_index = image.select('VV').divide(image.select('VH')).rename('flood_index')
            image_with_index = image.addBands(flood_index)
            
            # Export (simplified - in production use geemap.ee_export_image)
            print(f"  Processing image {i+1}/{min(count, max_images)}")
            
        except Exception as e:
            print(f"  Error processing image {i}: {e}")
    
    print(f"‚úÖ Downloaded {min(count, max_images)} images to {save_dir}")

# Run download
download_sar_images(aoi, '2023-01-01', '2024-12-31', max_images=30)

## Step 2: Build U-Net Model

U-Net is ideal for image segmentation tasks like flood detection:
- Encoder-decoder architecture
- Skip connections preserve spatial information
- Works well with limited training data

In [None]:
# Cell 4: Build U-Net Model
def build_unet_flood_model(input_shape=(256, 256, 3)):
    """
    Build U-Net architecture for flood segmentation
    Input: 3 channels (VV, VH, flood_index)
    Output: Binary flood mask
    """
    inputs = layers.Input(shape=input_shape)
    
    # Encoder (downsampling)
    c1 = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(inputs)
    c1 = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(c1)
    p1 = layers.MaxPooling2D((2, 2))(c1)
    
    c2 = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(p1)
    c2 = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(c2)
    p2 = layers.MaxPooling2D((2, 2))(c2)
    
    c3 = layers.Conv2D(256, (3, 3), activation='relu', padding='same')(p2)
    c3 = layers.Conv2D(256, (3, 3), activation='relu', padding='same')(c3)
    p3 = layers.MaxPooling2D((2, 2))(c3)
    
    # Bottleneck
    c4 = layers.Conv2D(512, (3, 3), activation='relu', padding='same')(p3)
    c4 = layers.Conv2D(512, (3, 3), activation='relu', padding='same')(c4)
    
    # Decoder (upsampling)
    u5 = layers.UpSampling2D((2, 2))(c4)
    u5 = layers.concatenate([u5, c3])
    c5 = layers.Conv2D(256, (3, 3), activation='relu', padding='same')(u5)
    c5 = layers.Conv2D(256, (3, 3), activation='relu', padding='same')(c5)
    
    u6 = layers.UpSampling2D((2, 2))(c5)
    u6 = layers.concatenate([u6, c2])
    c6 = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(u6)
    c6 = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(c6)
    
    u7 = layers.UpSampling2D((2, 2))(c6)
    u7 = layers.concatenate([u7, c1])
    c7 = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(u7)
    c7 = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(c7)
    
    # Output layer
    outputs = layers.Conv2D(1, (1, 1), activation='sigmoid')(c7)
    
    model = models.Model(inputs=[inputs], outputs=[outputs])
    
    # Compile
    model.compile(
        optimizer='adam',
        loss='binary_crossentropy',
        metrics=['accuracy', tf.keras.metrics.MeanIoU(num_classes=2)]
    )
    
    return model

# Build model
model = build_unet_flood_model()
print("‚úÖ U-Net model created")
model.summary()

## Step 3: Train Model

Training on Colab's free GPU (usually Tesla T4 or P100)

In [None]:
# Cell 5: Training
print("üî• Starting model training...")

# Setup callbacks
paths = get_drive_paths()
checkpoint_path = f"{paths['models']}/flood/checkpoints/model_{{epoch:02d}}_{{val_loss:.4f}}.h5"

callbacks = [
    EarlyStopping(patience=10, restore_best_weights=True, verbose=1),
    ModelCheckpoint(checkpoint_path, save_best_only=True, monitor='val_loss', verbose=1),
    ReduceLROnPlateau(factor=0.5, patience=5, verbose=1)
]

# Note: In production, load actual data here
# For this scaffold, we'll create dummy data
print("‚ö†Ô∏è  Using dummy data for demonstration")
X_train = np.random.randn(20, 256, 256, 3)
y_train = np.random.randint(0, 2, (20, 256, 256, 1)).astype(np.float32)
X_val = np.random.randn(5, 256, 256, 3)
y_val = np.random.randint(0, 2, (5, 256, 256, 1)).astype(np.float32)

# Train
history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=3,  # Use 50+ in production
    batch_size=4,
    callbacks=callbacks,
    verbose=1
)

print("‚úÖ Training complete!")

In [None]:
# Cell 6: Evaluate and Save
val_loss, val_acc, val_iou = model.evaluate(X_val, y_val)

print(f"\nüìä Validation Metrics:")
print(f"   Loss: {val_loss:.4f}")
print(f"   Accuracy: {val_acc:.4f}")
print(f"   IoU: {val_iou:.4f}")

# Save model
model_name = f"flood_model_{datetime.now().strftime('%Y%m%d_%H%M%S')}.h5"
model_path = save_model_to_drive(model, model_name, 'flood')

# Save metrics
import json
metrics = {
    'model_type': 'flood_detection',
    'architecture': 'unet',
    'training_date': datetime.now().isoformat(),
    'val_loss': float(val_loss),
    'val_accuracy': float(val_acc),
    'val_iou': float(val_iou),
    'epochs_trained': len(history.history['loss'])
}

from colab_helpers import save_metrics_to_drive
save_metrics_to_drive(metrics, model_path)

print(f"\n‚úÖ Model and metrics saved!")
print(f"   Model: {model_path}")

In [None]:
# Cell 7: Create Baseline for Drift Detection
print("üìä Creating baseline features for drift detection...")

def extract_baseline_features(X_data):
    """Extract features for drift monitoring"""
    features = {
        'mean_intensity': [],
        'std_intensity': [],
        'vv_mean': [],
        'vh_mean': [],
        'flood_index_mean': []
    }
    
    for img in X_data:
        features['mean_intensity'].append(np.mean(img))
        features['std_intensity'].append(np.std(img))
        features['vv_mean'].append(np.mean(img[:, :, 0]))
        features['vh_mean'].append(np.mean(img[:, :, 1]))
        features['flood_index_mean'].append(np.mean(img[:, :, 2]))
    
    return features

# Extract from validation set
baseline_features = extract_baseline_features(X_val)

# Save
baseline_df = pd.DataFrame(baseline_features)
paths = get_drive_paths()
baseline_path = f"{paths['baseline']}/flood_baseline_features.csv"
baseline_df.to_csv(baseline_path, index=False)

print(f"‚úÖ Baseline saved to: {baseline_path}")
print(f"\nüìà Baseline Statistics:")
print(baseline_df.describe())

## ‚úÖ Training Complete!

Next steps:
1. Download this model to local server via Jenkins
2. Deploy in FastAPI inference server
3. Monitor for drift in production
4. Auto-retrain when drift detected

**Note:** Upload this notebook to your Google Drive and note its file ID for the Jenkins pipeline.