In [2]:
"""
Topic: Consistent Error Handling (using logger.error)
=====================================================
This script demonstrates how to use Python's logging module
for consistent and centralized error handling in ETL pipelines.
"""

# -------------------------------------------------------------------
# Slide 1 ‚Äì Overview
# -------------------------------------------------------------------
"""
Consistent error handling ensures failures are logged, not hidden.
Use logging instead of print() to maintain visibility across jobs.

‚úÖ Key Idea:
All exceptions should be handled with logger.error() or logger.critical().
"""

# -------------------------------------------------------------------
# Slide 2 ‚Äì Imports and Logger Setup
# -------------------------------------------------------------------
import logging
import pandas as pd
import os
from datetime import datetime

# Configure logger
logging.basicConfig(
    filename="etl_error_handling.log",
    level=logging.INFO,
    format="%(asctime)s - %(levelname)s - %(message)s"
)
logger = logging.getLogger(__name__)

logger.info("üöÄ Logger initialized successfully")

# -------------------------------------------------------------------
# Slide 3 ‚Äì Simple Example with try/except
# -------------------------------------------------------------------
def read_input(file_path):
    """Read input CSV safely with error handling."""
    try:
        logger.info(f"Reading input file: {file_path}")
        df = pd.read_csv(file_path)
        logger.info(f"‚úÖ Successfully read {len(df)} records.")
        return df
    except FileNotFoundError as e:
        logger.error(f"‚ùå File not found: {file_path} | Error: {e}")
    except Exception as e:
        logger.error(f"‚ùå Unexpected error while reading file: {e}")

# -------------------------------------------------------------------
# Slide 4 ‚Äì Transformation Function
# -------------------------------------------------------------------
def transform(df):
    """Simulate transformation with exception handling."""
    try:
        logger.info("Starting transformation step...")
        if "price" not in df.columns:
            raise KeyError("Missing 'price' column")

        df["price_usd"] = df["price"] * 0.012
        df["processed_ts"] = datetime.now()
        logger.info("‚úÖ Transformation completed.")
        return df
    except KeyError as e:
        logger.error(f"‚ùå Transformation failed: {e}")
    except Exception as e:
        logger.error(f"‚ùå Unknown error in transform(): {e}")

# -------------------------------------------------------------------
# Slide 5 ‚Äì Persist Function
# -------------------------------------------------------------------
def persist(df, output_path):
    """Persist DataFrame to storage with logging."""
    try:
        logger.info(f"Saving DataFrame to {output_path}")
        # Simulate write
        if "price_usd" not in df.columns:
            raise ValueError("Data not transformed correctly")

        df.to_csv(output_path, index=False)
        logger.info(f"‚úÖ Data saved successfully to {output_path}")
    except Exception as e:
        logger.error(f"‚ùå Failed to persist DataFrame: {e}")

# -------------------------------------------------------------------
# Slide 6 ‚Äì Custom Error Example
# -------------------------------------------------------------------
def validate_records(df):
    """Validate that no price is zero or negative."""
    try:
        invalid = df[df["price"] <= 0]
        if not invalid.empty:
            raise ValueError(f"Invalid prices found: {len(invalid)} rows")
        logger.info("‚úÖ Record validation passed.")
    except ValueError as e:
        logger.error(f"‚ö†Ô∏è Validation Error: {e}")
    except Exception as e:
        logger.error(f"‚ùå Unexpected error in validation: {e}")

# -------------------------------------------------------------------
# Slide 7 ‚Äì Raising vs Logging Example
# -------------------------------------------------------------------
def safe_divide(a, b):
    """Example showing raise after log."""
    try:
        result = a / b
        logger.info(f"Result: {result}")
        return result
    except ZeroDivisionError as e:
        logger.error(f"‚ùå Division by zero: {e}")
        raise  # Re-raise after logging

# -------------------------------------------------------------------
# Slide 8 ‚Äì Global Error Handling Wrapper
# -------------------------------------------------------------------
def main_pipeline():
    """Full ETL pipeline with consistent error logging."""
    try:
        df = read_input("orders.csv")  # Nonexistent to trigger log
        if df is not None:
            df = transform(df)
            validate_records(df)
            persist(df, "output/orders.csv")
        else:
            logger.warning("‚ö†Ô∏è Skipping transform step due to missing data.")
    except Exception as e:
        logger.critical(f"üî• Pipeline crashed unexpectedly: {e}")

# -------------------------------------------------------------------
# Slide 9 ‚Äì Real-World Pattern
# -------------------------------------------------------------------
"""
In real ETL:
1Ô∏è‚É£ Extract ‚Üí Log API call status (200, 404, etc.)
2Ô∏è‚É£ Transform ‚Üí Log bad rows, missing columns
3Ô∏è‚É£ Load ‚Üí Log success/failure for each batch
All messages go to one structured log file.
"""

# -------------------------------------------------------------------
# Slide 10 ‚Äì Best Practices & Summary
# -------------------------------------------------------------------
"""
‚úÖ Best Practices:
- Initialize logger once per script
- Use logger.error() for handled exceptions
- Use logger.critical() for full job failures
- Include IDs, filenames, or batch references
- Never use print() for error messages
"""

# Run example
if __name__ == "__main__":
    logger.info("üèÅ Starting error handling demo pipeline")
    main_pipeline()
    try:
        safe_divide(10, 0)
    except Exception:
        logger.info("Handled division error safely.")

    logger.info("‚úÖ Script execution completed.")
