In [1]:
# Lifecycle of a Data Pipeline
# ----------------------------

import pandas as pd
import logging
from datetime import datetime

# 1Ô∏è‚É£ Setup logging
logging.basicConfig(
    filename="pipeline_lifecycle.log",
    level=logging.INFO,
    format="%(asctime)s - %(levelname)s - %(message)s"
)

print("üöÄ Starting Data Pipeline Lifecycle Simulation...")
logging.info("Pipeline lifecycle started")

# Stage 1Ô∏è‚É£: Design
print("\nüß© Stage 1: Designing pipeline schema...")
schema = {"source": "orders.csv", "target": "processed_orders.csv", "fields": ["order_id", "region", "amount"]}
logging.info(f"Schema defined: {schema}")

# Stage 2Ô∏è‚É£: Build
print("\nüîß Stage 2: Building and testing pipeline...")
raw_data = [
    {"order_id": 1, "region": "APAC", "amount": 200},
    {"order_id": 2, "region": "EMEA", "amount": 300},
]
df = pd.DataFrame(raw_data)
df["amount_usd"] = df["amount"] * 0.012
print(df)
logging.info("Pipeline build successful")

# Stage 3Ô∏è‚É£: Deploy
print("\nüöÄ Stage 3: Deploying pipeline...")
logging.info("Deploying to AWS Glue or Step Functions (simulated)")
print("‚úÖ Code deployed successfully!")

# Stage 4Ô∏è‚É£: Monitor
print("\nüìä Stage 4: Monitoring pipeline...")
for step, msg in enumerate(["Job started", "Processing data", "Completed successfully"], start=1):
    logging.info(f"Stage 4.{step}: {msg}")
    print(msg)
print("‚úÖ Logs stored in CloudWatch (simulated)")

# Stage 5Ô∏è‚É£: Optimize
print("\n‚öôÔ∏è Stage 5: Optimizing pipeline performance...")
df["amount_usd_optimized"] = df["amount"] * 0.0125  # example of improved conversion logic
logging.info("Optimized data transformation applied")
print(df)
print("‚úÖ Optimization completed successfully")

# Completion
print("\nüéâ Pipeline lifecycle simulation completed successfully!")
logging.info("Pipeline lifecycle completed successfully")

# Summary
"""
Lifecycle Summary:
- Design: Define schema and logic
- Build: Create and test transformations
- Deploy: Run via Glue/Step Functions
- Monitor: Log and observe performance
- Optimize: Tune and improve continuously
"""


üöÄ Starting Data Pipeline Lifecycle Simulation...

üß© Stage 1: Designing pipeline schema...

üîß Stage 2: Building and testing pipeline...
   order_id region  amount  amount_usd
0         1   APAC     200         2.4
1         2   EMEA     300         3.6

üöÄ Stage 3: Deploying pipeline...
‚úÖ Code deployed successfully!

üìä Stage 4: Monitoring pipeline...
Job started
Processing data
Completed successfully
‚úÖ Logs stored in CloudWatch (simulated)

‚öôÔ∏è Stage 5: Optimizing pipeline performance...
   order_id region  amount  amount_usd  amount_usd_optimized
0         1   APAC     200         2.4                  2.50
1         2   EMEA     300         3.6                  3.75
‚úÖ Optimization completed successfully

üéâ Pipeline lifecycle simulation completed successfully!


'\nLifecycle Summary:\n- Design: Define schema and logic\n- Build: Create and test transformations\n- Deploy: Run via Glue/Step Functions\n- Monitor: Log and observe performance\n- Optimize: Tune and improve continuously\n'