In [1]:
"""
Topic: Config-Driven Approach
=============================
This script demonstrates how to build flexible, environment-aware Python
pipelines using external configuration files (YAML/JSON) instead of
hardcoding paths, URLs, and credentials.
"""

# -------------------------------------------------------------------
# Slide 1 ‚Äì Overview
# -------------------------------------------------------------------
"""
The config-driven approach separates code from configuration.
Instead of changing logic, you modify only configuration files (YAML/JSON).

‚úÖ Advantages:
- One codebase for all environments
- Easier maintenance and debugging
- No redeploys when paths/URLs change
"""

print("üìò Config-Driven Approach Loaded")

# -------------------------------------------------------------------
# Slide 2 ‚Äì Why Configs Matter
# -------------------------------------------------------------------
"""
Without configs, every environment (Dev, Stage, Prod) requires code edits.
Configs solve this by externalizing environment differences.
"""

# -------------------------------------------------------------------
# Slide 3 ‚Äì Config File Example (YAML)
# -------------------------------------------------------------------
example_yaml = """
env: dev
input_path: "s3://dev-bucket/raw/orders.csv"
output_path: "s3://dev-bucket/processed/orders.parquet"
api:
  base_url: "https://api-dev.company.com"
  token: "DEV123TOKEN"
"""
print("\n--- Sample YAML Config ---")
print(example_yaml)

# -------------------------------------------------------------------
# Slide 4 ‚Äì Loading Config in Python
# -------------------------------------------------------------------
import yaml

def load_config(file_path):
    """Load configuration from YAML file"""
    with open(file_path, "r") as f:
        cfg = yaml.safe_load(f)
    return cfg

# Example: cfg = load_config("config_dev.yaml")

# -------------------------------------------------------------------
# Slide 5 ‚Äì Using Config in Code
# -------------------------------------------------------------------
import pandas as pd

def read_input(config):
    """Reads input CSV path from config"""
    print(f"üì• Reading from: {config['input_path']}")
    # In real scenario: df = pd.read_csv(config['input_path'])
    df = pd.DataFrame([{"order_id": 1, "price": 100}, {"order_id": 2, "price": 150}])
    return df

def persist_output(df, config):
    """Writes transformed data to target location"""
    print(f"üíæ Writing output to: {config['output_path']}")
    # In real scenario: df.to_parquet(config['output_path'])
    print("‚úÖ Data persisted successfully!")

# -------------------------------------------------------------------
# Slide 6 ‚Äì Environment Selection
# -------------------------------------------------------------------
import os

def get_env_config():
    """Select config file dynamically based on environment"""
    env = os.getenv("ENV", "dev")
    file_name = f"config_{env}.yaml"
    print(f"üåç Active environment: {env}")
    return file_name

# Example:
# os.environ["ENV"] = "prod"
# file = get_env_config()

# -------------------------------------------------------------------
# Slide 7 ‚Äì Secure Secrets Handling
# -------------------------------------------------------------------
from dotenv import load_dotenv

load_dotenv()  # loads .env file if present
API_KEY = os.getenv("API_KEY", "NO_KEY_FOUND")

print(f"üîê Loaded API Key: {API_KEY[:4]}******")

# -------------------------------------------------------------------
# Slide 8 ‚Äì Config Validation
# -------------------------------------------------------------------
def validate_config(cfg):
    """Ensure all required keys exist"""
    required_keys = ["input_path", "output_path", "api"]
    for key in required_keys:
        assert key in cfg, f"‚ùå Missing required key: {key}"
    print("‚úÖ Config validation passed!")

# -------------------------------------------------------------------
# Slide 9 ‚Äì Real-World Example
# -------------------------------------------------------------------
def run_pipeline(config):
    """Simulated supply chain pipeline"""
    print("\nüöö Starting supply chain data pipeline...")
    df = read_input(config)
    df["price_usd"] = df["price"] * 0.012
    persist_output(df, config)
    print("‚úÖ Pipeline completed successfully")

# -------------------------------------------------------------------
# Slide 10 ‚Äì Main + Summary
# -------------------------------------------------------------------
if __name__ == "__main__":
    print("\nüß© Running Config-Driven Pipeline Example...\n")

    # Simulate loading from YAML
    sample_cfg = yaml.safe_load(example_yaml)
    validate_config(sample_cfg)

    # Run example pipeline
    run_pipeline(sample_cfg)

    print("\nüìò Summary:")
    print("‚Ä¢ Keep configs external (YAML/JSON)")
    print("‚Ä¢ Never hardcode secrets")
    print("‚Ä¢ Use environment variables for security")
    print("‚Ä¢ Validate configs at startup")


üìò Config-Driven Approach Loaded

--- Sample YAML Config ---

env: dev
input_path: "s3://dev-bucket/raw/orders.csv"
output_path: "s3://dev-bucket/processed/orders.parquet"
api:
  base_url: "https://api-dev.company.com"
  token: "DEV123TOKEN"

üîê Loaded API Key: NO_K******

üß© Running Config-Driven Pipeline Example...

‚úÖ Config validation passed!

üöö Starting supply chain data pipeline...
üì• Reading from: s3://dev-bucket/raw/orders.csv
üíæ Writing output to: s3://dev-bucket/processed/orders.parquet
‚úÖ Data persisted successfully!
‚úÖ Pipeline completed successfully

üìò Summary:
‚Ä¢ Keep configs external (YAML/JSON)
‚Ä¢ Never hardcode secrets
‚Ä¢ Use environment variables for security
‚Ä¢ Validate configs at startup
