# 🔥 Fire Detection AI - 5M Dataset Training

This notebook implements an optimized training pipeline for Fire Detection AI models using a 5M sample from the 50M dataset. The goal is to significantly reduce training time from 43 hours while maintaining reasonable accuracy.

## Key Features

- **Optimized Data Sampling**: Stratified sampling with temporal pattern preservation
- **Optimized Model Architecture**: Reduced transformer size with efficient ML ensemble
- **Enhanced Visualizations**: Real-time training dashboard and comprehensive visualizations
- **Comprehensive Error Handling**: Custom exceptions, recovery mechanisms, and checkpointing
- **Detailed Logging**: Multi-destination logging with structured format

## Expected Outcomes

- **Training Time**: 2-4 hours (vs 43 hours for full dataset)
- **Model Accuracy**: 94-96% (vs 97-98% for full dataset)
- **Cost Savings**: ~90% reduction in training costs

## 1. Setup and Configuration

### 1.1 Import Libraries

In [None]:
# Standard libraries
import numpy as np
import pandas as pd
import json
import os
import time
import logging
import sys
import traceback
import functools
import threading
import gc
from datetime import datetime
from IPython import display

# Machine learning libraries
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
import joblib

# Visualization libraries
import matplotlib.pyplot as plt
import matplotlib.animation as animation
import seaborn as sns
from matplotlib.colors import LinearSegmentedColormap
from scipy import stats

# AWS libraries
try:
    import boto3
    import sagemaker
    from botocore.exceptions import ClientError
    AWS_AVAILABLE = True
    print("✅ AWS libraries available")
except ImportError:
    AWS_AVAILABLE = False
    print("❌ AWS libraries not available - install with: pip install boto3 sagemaker")

# Optional ML libraries
try:
    import xgboost as xgb
    XGB_AVAILABLE = True
    print("✅ XGBoost available")
except ImportError:
    XGB_AVAILABLE = False
    print("❌ XGBoost not available - install with: pip install xgboost")

try:
    import lightgbm as lgb
    LGB_AVAILABLE = True
    print("✅ LightGBM available")
except ImportError:
    LGB_AVAILABLE = False
    print("❌ LightGBM not available - install with: pip install lightgbm")

# Configure notebook settings
%matplotlib inline
plt.style.use('seaborn-v0_8-whitegrid')
sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (12, 8)
plt.rcParams['font.size'] = 12

### 1.2 Configuration Parameters

In [None]:
# Dataset configuration
DATASET_BUCKET = "processedd-synthetic-data"
DATASET_PREFIX = "cleaned-data/"
SAMPLE_SIZE_PER_AREA = 1000000  # 1M samples per area = 5M total
RANDOM_SEED = 42

# Sampling configuration
PRESERVE_TEMPORAL_PATTERNS = True
ENSURE_CLASS_BALANCE = True
CLASS_DISTRIBUTION_TARGET = {
    0: 0.70,  # Normal: 70%
    1: 0.20,  # Warning: 20%
    2: 0.10   # Fire: 10%
}

# Training configuration
EPOCHS = 50  # Reduced from 100
BATCH_SIZE = 256
EARLY_STOPPING_PATIENCE = 5
LEARNING_RATE = 0.002

# Model configuration
TRANSFORMER_CONFIG = {
    'd_model': 128,  # Reduced from 256
    'num_heads': 4,  # Reduced from 8
    'num_layers': 3,  # Reduced from 6
    'dropout': 0.1
}

# Visualization configuration
VISUALIZATION_CONFIG = {
    'update_interval': 1,  # Update visualizations every N epochs
    'save_figures': True,
    'figure_dir': 'figures',
    'create_animations': True
}

# Create directories
os.makedirs('logs', exist_ok=True)
os.makedirs('checkpoints', exist_ok=True)
os.makedirs('models', exist_ok=True)
os.makedirs(VISUALIZATION_CONFIG['figure_dir'], exist_ok=True)

### 1.3 Logging Configuration

In [None]:
def setup_logging():
    """Configure comprehensive logging for the training process"""
    
    # Generate timestamp for log files
    timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
    log_file = f'logs/fire_detection_5m_training_{timestamp}.log'
    
    # Configure root logger
    logging.basicConfig(
        level=logging.INFO,
        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
        datefmt='%Y-%m-%d %H:%M:%S'
    )
    
    # Get root logger
    logger = logging.getLogger()
    
    # Clear existing handlers
    logger.handlers = []
    
    # Create file handler
    file_handler = logging.FileHandler(log_file)
    file_handler.setLevel(logging.DEBUG)
    file_formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
    file_handler.setFormatter(file_formatter)
    
    # Create console handler
    console_handler = logging.StreamHandler()
    console_handler.setLevel(logging.INFO)
    console_formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
    console_handler.setFormatter(console_formatter)
    
    # Add handlers to logger
    logger.addHandler(file_handler)
    logger.addHandler(console_handler)
    
    # Create S3 handler if AWS is available
    if AWS_AVAILABLE:
        try:
            s3_handler = S3LogHandler(
                bucket=DATASET_BUCKET,
                key_prefix=f'logs/fire_detection_5m_training_{timestamp}/'
            )
            s3_handler.setLevel(logging.INFO)
            s3_handler.setFormatter(file_formatter)
            logger.addHandler(s3_handler)
            logger.info("✅ S3 logging configured")
        except Exception as e:
            logger.warning(f"⚠️ Failed to configure S3 logging: {e}")
    
    logger.info("🔧 Logging configured successfully")
    logger.info(f"📝 Log file: {log_file}")
    
    return logger

class S3LogHandler(logging.Handler):
    """Custom logging handler that writes logs to S3"""
    
    def __init__(self, bucket, key_prefix):
        super().__init__()
        self.bucket = bucket
        self.key_prefix = key_prefix
        self.buffer = []
        self.buffer_size = 100  # Number of logs to buffer before writing to S3
        self.s3_client = boto3.client('s3')
        
        # Start background thread for uploading logs
        self.stop_event = threading.Event()
        self.upload_thread = threading.Thread(target=self._upload_logs_periodically)
        self.upload_thread.daemon = True
        self.upload_thread.start()
    
    def emit(self, record):
        """Process a log record"""
        log_entry = self.format(record)
        self.buffer.append(log_entry)
        
        # Upload logs if buffer is full
        if len(self.buffer) >= self.buffer_size:
            self._upload_logs()
    
    def _upload_logs(self):
        """Upload buffered logs to S3"""
        if not self.buffer:
            return
        
        try:
            # Create log content
            log_content = '\n'.join(self.buffer)
            
            # Generate key with timestamp
            timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
            key = f"{self.key_prefix}log_{timestamp}.txt"
            
            # Upload to S3
            self.s3_client.put_object(
                Bucket=self.bucket,
                Key=key,
                Body=log_content
            )
            
            # Clear buffer
            self.buffer = []
            
        except Exception as e:
            # Don't raise exception, just print to stderr
            print(f"Error uploading logs to S3: {e}", file=sys.stderr)
    
    def _upload_logs_periodically(self):
        """Upload logs periodically in background thread"""
        while not self.stop_event.is_set():
            time.sleep(60)  # Upload every minute
            self._upload_logs()
    
    def close(self):
        """Clean up handler resources"""
        self.stop_event.set()
        self._upload_logs()  # Final upload
        super().close()

# Initialize logger
logger = setup_logging()

### 1.4 Error Handling Framework

In [None]:
class TrainingError(Exception):
    """Base class for training-related exceptions"""
    pass

class DataLoadingError(TrainingError):
    """Exception raised for errors during data loading"""
    pass

class ModelInitializationError(TrainingError):
    """Exception raised for errors during model initialization"""
    pass

class TrainingProcessError(TrainingError):
    """Exception raised for errors during training process"""
    pass

class EvaluationError(TrainingError):
    """Exception raised for errors during model evaluation"""
    pass

class ModelSavingError(TrainingError):
    """Exception raised for errors during model saving"""
    pass

def error_handler(func):
    """Decorator for handling errors in functions"""
    @functools.wraps(func)
    def wrapper(*args, **kwargs):
        try:
            return func(*args, **kwargs)
        except TrainingError as e:
            # Log specific training errors
            logger.error(f"❌ {e.__class__.__name__}: {str(e)}")
            # Re-raise to be caught by global handler
            raise
        except Exception as e:
            # Log unexpected errors
            logger.error(f"❌ Unexpected error in {func.__name__}: {str(e)}")
            logger.error(traceback.format_exc())
            # Wrap in TrainingProcessError
            raise TrainingProcessError(f"Error in {func.__name__}: {str(e)}") from e
    return wrapper