In [None]:
# Parameters
region_name = "Lakshadweep"
safe_file_path = None
output_dir = None

# Satellite-Derived Bathymetry Data Preprocessing

This notebook processes Sentinel-2 data for deriving bathymetry in the selected study area. We'll go through:

1. Loading the region configuration
2. Extracting bands from SAFE file
3. Preprocessing and atmospheric correction
4. Water index calculation
5. Feature extraction for bathymetry modeling

In [None]:
import os
import sys
import json
import numpy as np
import rasterio
import matplotlib.pyplot as plt
import logging
from pathlib import Path
import glob

# Add project root to path
project_dir = Path().absolute().parent
if project_dir.name != 'sdb_project':
    project_dir = project_dir / 'sdb_project'
sys.path.append(str(project_dir))

# Set up logging
logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s')
logger = logging.getLogger(__name__)

# Load configuration
config_path = project_dir / 'config' / 'location_config.json'
with open(config_path) as f:
    config = json.load(f)

# Use parameters if provided, otherwise use config
if region_name is None:
    region_name = config['region_name']

# Set up region-specific paths
region_slug = region_name.lower().replace(' ', '_')
processed_data_dir = project_dir / 'data' / 'sentinel' / region_slug / 'processed'
if output_dir is None:
    output_dir = project_dir / 'outputs' / region_slug

processed_data_dir.mkdir(parents=True, exist_ok=True)
Path(output_dir).mkdir(parents=True, exist_ok=True)

# Import our modules
from src.extract_bands import extract_bands_from_safe, load_band_as_array, calculate_water_indices, create_feature_stack

print(f"‚úÖ Processing region: {region_name}")
print(f"Area of Interest:")
print(f"  Latitude:  {config['aoi']['min_lat']}¬∞ to {config['aoi']['max_lat']}¬∞")
print(f"  Longitude: {config['aoi']['min_lon']}¬∞ to {config['aoi']['max_lon']}¬∞")
print(f"\nDirectories:")
print(f"  Processed data: {processed_data_dir}")
print(f"  Output: {output_dir}")

Processing region: Lakshadweep
Area of Interest:
Latitude:  10.75¬∞ to 10.95¬∞
Longitude: 72.35¬∞ to 72.65¬∞


## Extract Sentinel-2 Bands from SAFE File

Extract the required bands (B02, B03, B04, B08) from the SAFE file for processing.

In [None]:
# Find SAFE file if not provided as parameter
if safe_file_path is None:
    # Search for SAFE files in multiple locations
    search_paths = [
        project_dir / 'data' / 'sentinel' / region_slug / 'raw',
        project_dir.parent / 'sentinel2_pipeline' / 'data' / 'sentinel',
        Path('D:/Project/sentinel2_pipeline/data/sentinel')
    ]
    
    safe_file_path = None
    for search_path in search_paths:
        if search_path.exists():
            # Look for .SAFE directories or .zip files
            safe_files = list(search_path.glob('*.SAFE')) + list(search_path.glob('*.SAFE.zip')) + list(search_path.glob('*SAFE*.zip'))
            if safe_files:
                safe_file_path = safe_files[0]  # Use first found
                break
    
    if safe_file_path is None:
        raise FileNotFoundError("No SAFE file found. Please provide safe_file_path parameter.")

print(f"üîç Using SAFE file: {safe_file_path}")

# Extract bands from SAFE file
try:
    extracted_bands = extract_bands_from_safe(
        str(safe_file_path), 
        str(processed_data_dir),
        bands=['B02', 'B03', 'B04', 'B08']
    )
    
    print(f"‚úÖ Successfully extracted {len(extracted_bands)} bands:")
    for band, path in extracted_bands.items():
        print(f"   {band}: {Path(path).name}")
        
except Exception as e:
    logger.error(f"Failed to extract bands: {e}")
    raise

# Load bands as arrays
bands = {}
for band_name, band_path in extracted_bands.items():
    bands[band_name] = load_band_as_array(band_path)
    print(f"Loaded {band_name} with shape: {bands[band_name].shape}")

FileNotFoundError: Band file not found: d:\Project\sdb_project\data\processed\lakshadweep\B02.jp2

## Visualize Raw Data

Create a true color composite to visualize the area of interest.

In [None]:
def create_true_color(r, g, b, percentile=98):
    """Create true color composite with contrast enhancement"""
    # Normalize to 0-1 range
    def normalize_band(band):
        valid_mask = ~np.isnan(band)
        if np.sum(valid_mask) == 0:
            return np.zeros_like(band)
        
        p_low = np.nanpercentile(band, 2)
        p_high = np.nanpercentile(band, percentile)
        normalized = (band - p_low) / (p_high - p_low)
        return np.clip(normalized, 0, 1)
    
    # Normalize each band
    r_norm = normalize_band(r)
    g_norm = normalize_band(g) 
    b_norm = normalize_band(b)
    
    # Stack into RGB
    rgb = np.dstack((r_norm, g_norm, b_norm))
    return rgb

# Create true color composite (Red=B04, Green=B03, Blue=B02)
true_color = create_true_color(bands['B04'], bands['B03'], bands['B02'])

plt.figure(figsize=(12, 8))
plt.imshow(true_color)
plt.title(f'True Color Composite (Sentinel-2) - {region_name}')
plt.axis('off')
plt.tight_layout()
plt.savefig(Path(output_dir) / 'true_color_composite.png', dpi=150, bbox_inches='tight')
plt.show()

print("‚úÖ True color composite created and saved")

## Calculate Water Indices

Calculate water-related spectral indices for bathymetry analysis.

In [None]:
# Calculate water indices
print("üîÑ Calculating water indices...")
try:
    water_indices = calculate_water_indices(bands)
    print(f"‚úÖ Calculated indices: {list(water_indices.keys())}")
    
    # Display indices
    n_indices = len(water_indices)
    if n_indices > 0:
        fig, axes = plt.subplots(1, min(n_indices, 3), figsize=(15, 4))
        if n_indices == 1:
            axes = [axes]
        
        for i, (index_name, index_data) in enumerate(list(water_indices.items())[:3]):
            axes[i].imshow(index_data, cmap='RdYlBu', vmin=-1, vmax=1)
            axes[i].set_title(f'{index_name}')
            axes[i].axis('off')
        
        plt.tight_layout()
        plt.savefig(Path(output_dir) / 'water_indices.png', dpi=150, bbox_inches='tight')
        plt.show()
        
except Exception as e:
    logger.error(f"Failed to calculate water indices: {e}")
    water_indices = {}

## Create Feature Stack

Combine bands and indices into a feature array for machine learning.

In [None]:
# Create feature stack from bands and indices
print("üîÑ Creating feature stack...")
try:
    features, feature_names = create_feature_stack(bands, water_indices)
    print(f"‚úÖ Created feature stack with shape: {features.shape}")
    print(f"   Features: {feature_names}")
    
    # Create simple water mask using NDWI if available
    water_mask = np.zeros(features.shape[:2], dtype=bool)
    if 'NDWI' in water_indices:
        # Water typically has NDWI > 0
        water_mask = water_indices['NDWI'] > 0
        water_pixels = np.sum(water_mask)
        water_percentage = (water_pixels / water_mask.size) * 100
        print(f"   Water pixels: {water_pixels} ({water_percentage:.1f}%)")
        
        # Visualize water mask
        plt.figure(figsize=(10, 6))
        plt.subplot(1, 2, 1)
        plt.imshow(water_indices['NDWI'], cmap='RdYlBu', vmin=-1, vmax=1)
        plt.title('NDWI')
        plt.colorbar()
        plt.axis('off')
        
        plt.subplot(1, 2, 2)
        plt.imshow(water_mask, cmap='Blues')
        plt.title('Water Mask')
        plt.colorbar()
        plt.axis('off')
        
        plt.tight_layout()
        plt.savefig(Path(output_dir) / 'water_mask.png', dpi=150, bbox_inches='tight')
        plt.show()
    else:
        print("   No NDWI available, using all pixels for analysis")
        water_mask = np.ones(features.shape[:2], dtype=bool)
        
except Exception as e:
    logger.error(f"Failed to create feature stack: {e}")
    raise

## Save Processed Data

Save features and metadata for the next processing steps.

In [None]:
# Apply water mask to features (set non-water pixels to NaN)
masked_features = np.where(water_mask[..., None], features, np.nan)

# Save features and water mask
features_path = processed_data_dir / 'features.npy'
mask_path = processed_data_dir / 'water_mask.npy'

np.save(features_path, masked_features)
np.save(mask_path, water_mask)

print(f"‚úÖ Saved features to: {features_path}")
print(f"‚úÖ Saved water mask to: {mask_path}")

# Visualize first feature (typically B02 - blue band) 
plt.figure(figsize=(10, 8))
plt.imshow(masked_features[..., 0], cmap='viridis')
plt.colorbar(label='Blue Band Reflectance (Water Only)')
plt.title(f'Water-Masked Blue Band - {region_name}')
plt.axis('off')
plt.tight_layout()
plt.savefig(Path(output_dir) / 'water_masked_blue.png', dpi=150, bbox_inches='tight')
plt.show()

## Save Processing Metadata

Save information about the processing steps and outputs.

In [None]:
# Create processing metadata
processing_metadata = {
    'region_name': region_name,
    'aoi': config['aoi'],
    'safe_file_used': str(safe_file_path),
    'data_paths': {
        'features': str(features_path),
        'water_mask': str(mask_path),
        'processed_data_dir': str(processed_data_dir)
    },
    'processing_info': {
        'image_shape': features.shape,
        'feature_names': feature_names,
        'n_water_pixels': int(np.sum(water_mask)),
        'water_percentage': float(np.mean(water_mask) * 100),
        'total_pixels': int(water_mask.size)
    }
}

# Save metadata
metadata_path = processed_data_dir / 'processing_metadata.json'
with open(metadata_path, 'w') as f:
    json.dump(processing_metadata, f, indent=2)

print("‚úÖ Data preprocessing complete!")
print(f"üìÅ Metadata saved: {metadata_path}")
print(f"üìä Processing summary:")
print(f"   - Image shape: {features.shape}")
print(f"   - Features: {len(feature_names)} ({', '.join(feature_names)})")
print(f"   - Water pixels: {processing_metadata['processing_info']['n_water_pixels']:,}")
print(f"   - Water coverage: {processing_metadata['processing_info']['water_percentage']:.1f}%")
print(f"\nüéØ Ready for model training!")