# 🌍 Disaster COG Processing

This simplified notebook converts disaster imagery to Cloud Optimized GeoTIFFs (COGs).
---

## 📋 Step 1: Configuration

Modify the configuration below with your event details:

In [None]:
# ========================================
# CONFIGURATION - MODIFY THESE VALUES
# ========================================

import re
import os

# Event Details
EVENT_NAME = '202408_TropicalStorm_Debby'  # Your disaster event name
PRODUCT_NAME = 'landsat8'                   # Product type (sentinel1, sentinel2, landsat, etc.)

# S3 Paths
BUCKET = 'nasa-disasters'                                          # S3 bucket
SOURCE_PATH = f'drcs_activations/{EVENT_NAME}/{PRODUCT_NAME}'      # Where your files are
DESTINATION_BASE = 'drcs_activations_new'                          # Where to save COGs

# Processing Options
OVERWRITE = False  # Set to True to replace existing files
VERIFY = True      # Set to True to verify results after processing

# ========================================
# FILENAME GENERATION FUNCTIONS
# Modify these to control output filenames
# ========================================

def extract_date_from_filename(filename):
    """Extract date from filename in YYYYMMDD format."""
    dates = re.findall(r'\d{8}', filename)
    if dates:
        date_str = dates[0]
        return f"{date_str[0:4]}-{date_str[4:6]}-{date_str[6:8]}"
    return None

def create_truecolor_filename(original_path, event_name):
    """Create filename for trueColor products."""
    filename = os.path.basename(original_path)
    stem = os.path.splitext(filename)[0]
    date = extract_date_from_filename(stem)
    
    if date:
        stem_clean = re.sub(r'_\d{8}', '', stem)
        return f"{event_name}_{stem_clean}_{date}_day.tif"
    return f"{event_name}_{stem}_day.tif"

def create_colorinfrared_filename(original_path, event_name):
    """Create filename for colorInfrared products."""
    filename = os.path.basename(original_path)
    stem = os.path.splitext(filename)[0]
    date = extract_date_from_filename(stem)
    
    if date:
        stem_clean = re.sub(r'_\d{8}', '', stem)
        return f"{event_name}_{stem_clean}_{date}_day.tif"
    return f"{event_name}_{stem}_day.tif"

def create_naturalcolor_filename(original_path, event_name):
    """Create filename for naturalColor products."""
    filename = os.path.basename(original_path)
    stem = os.path.splitext(filename)[0]
    date = extract_date_from_filename(stem)
    
    if date:
        stem_clean = re.sub(r'_\d{8}', '', stem)
        return f"{event_name}_{stem_clean}_{date}_day.tif"
    return f"{event_name}_{stem}_day.tif"

# Map product types to filename creators
FILENAME_CREATORS = {
    'trueColor': create_truecolor_filename,
    'colorInfrared': create_colorinfrared_filename,
    'naturalColor': create_naturalcolor_filename,
    # Add more as needed for your products
}

# Optional: Override output directories for each category
OUTPUT_DIRS = {
    'trueColor': 'Landsat/trueColor',
    'colorInfrared': 'Landsat/colorIR',
    'naturalColor': 'Landsat/naturalColor',
    # Defaults will be used for categories not listed here
}

# Optional: Manual no-data values (None = auto-detect)
NODATA_VALUES = {
    'NDVI': -9999,
    'MNDWI': -9999,
    # Leave empty or set to None for auto-detection
}

print("✅ Configuration loaded")
print(f"Event: {EVENT_NAME}")
print(f"Source: s3://{BUCKET}/{SOURCE_PATH}")
print(f"Destination: s3://{BUCKET}/{DESTINATION_BASE}/")

## 🚀 Step 2: Import and Initialize

This cell imports all necessary modules and connects to S3:

In [None]:
# Import the simplified processor
import sys
from pathlib import Path

# Add parent directory to path
sys.path.insert(0, str(Path('..').resolve()))

# Import our simplified helper
from notebooks.notebook_helpers import SimpleProcessor

# Create configuration
config = {
    'event_name': EVENT_NAME,
    'bucket': BUCKET,
    'source_path': SOURCE_PATH,
    'destination_base': DESTINATION_BASE,
    'overwrite': OVERWRITE,
    'verify': VERIFY,
    'filename_creators': FILENAME_CREATORS,
    'output_dirs': OUTPUT_DIRS,
    'nodata_values': NODATA_VALUES
}

# Initialize processor
processor = SimpleProcessor(config)

# Connect to S3
if processor.connect_to_s3():
    print("✅ Ready to process files")
else:
    print("❌ Could not connect to S3. Check your AWS credentials.")

## 🔍 Step 3: Discover Files and Preview

This cell finds your files and shows what will be processed:

In [None]:
# Discover files
num_files = processor.discover_files()

if num_files > 0:
    # Show preview of what will be processed
    processor.preview_processing()
    
    print("\n📌 Review the preview above. If filenames look incorrect,")
    print("   modify the filename creator functions in Step 1.")
else:
    print("⚠️ No files found. Check your SOURCE_PATH setting.")

## ⚙️ Step 4: Process Files

Run this cell to start processing all files:

In [None]:
# Process all files
if num_files > 0:
    print("🚀 Starting processing...")
    print("This may take several minutes depending on file sizes.\n")
    
    # Process everything
    results = processor.process_all()
    
    # Display results
    if not results.empty:
        print("\n📊 Detailed Results:")
        display(results) if 'display' in dir() else print(results)
else:
    print("No files to process. Run Step 3 first.")

## 📈 Step 5: Review Results (Optional)

View detailed results and statistics:

In [None]:
# Analyze results
if 'results' in locals() and not results.empty:
    print("📊 PROCESSING STATISTICS")
    print("="*40)
    
    # Success rate
    total = len(results)
    success = len(results[results['status'] == 'success'])
    failed = len(results[results['status'] == 'failed'])
    skipped = len(results[results['status'] == 'skipped'])
    
    print(f"Total files: {total}")
    print(f"✅ Success: {success}")
    print(f"❌ Failed: {failed}")
    print(f"⏭️ Skipped: {skipped}")
    print(f"\nSuccess rate: {(success/total*100):.1f}%")
    
    # Failed files
    if failed > 0:
        print("\n❌ Failed files:")
        failed_df = results[results['status'] == 'failed']
        for idx, row in failed_df.iterrows():
            print(f"  - {row['file']}: {row.get('error', 'Unknown error')}")
    
    # Processing times
    if 'time_seconds' in results.columns:
        success_df = results[results['status'] == 'success']
        if not success_df.empty:
            avg_time = success_df['time_seconds'].mean()
            max_time = success_df['time_seconds'].max()
            print(f"\n⏱️ Timing:")
            print(f"Average: {avg_time:.1f} seconds per file")
            print(f"Slowest: {max_time:.1f} seconds")
else:
    print("No results to analyze. Run Step 4 first.")

## 💡 Tips & Troubleshooting

### Common Issues:

1. **"No files found"**
   - Check that `SOURCE_PATH` is correct
   - Verify files exist in S3: `aws s3 ls s3://bucket/path/`

2. **"Failed to connect to S3"**
   - Check AWS credentials: `aws configure list`
   - Ensure you have access to the bucket

3. **Files being skipped**
   - Files already exist in destination
   - Set `OVERWRITE = True` to reprocess

4. **Processing is slow**
   - Large files take time
   - System uses optimized GDAL processing automatically

5. **Wrong filenames**
   - Modify the filename creator functions in Step 1
   - Re-run from Step 3 to see preview

### Need More Control?

Use the full template at `disaster_processing_template.ipynb` for:
- Manual chunk configuration
- Custom processing parameters
- Detailed verification options
- Advanced memory management