In [1]:
# Monitoring Simulation for Data Pipeline
# ---------------------------------------

import pandas as pd
import logging
import time
import random
from datetime import datetime

# 1Ô∏è‚É£ Setup logging
logging.basicConfig(
    filename="pipeline_monitoring.log",
    level=logging.INFO,
    format="%(asctime)s - %(levelname)s - %(message)s"
)

print("üöÄ Starting Monitoring Simulation...")
logging.info("Pipeline monitoring initiated")

# 2Ô∏è‚É£ Simulate pipeline stages
stages = ["Ingest", "Transform", "Validate", "Load"]
start_time = time.time()

for stage in stages:
    logging.info(f"Stage started: {stage}")
    print(f"‚ñ∂ Running stage: {stage}")
    time.sleep(random.uniform(0.5, 1.5))  # simulate processing
    logging.info(f"Stage completed: {stage}")

# 3Ô∏è‚É£ Simulate data
df = pd.DataFrame([
    {"order_id": 1, "region": "APAC", "price": 250},
    {"order_id": 2, "region": None, "price": 400}
])

# 4Ô∏è‚É£ Data Quality Monitoring
missing_regions = df["region"].isnull().sum()
if missing_regions > 0:
    logging.warning(f"‚ö†Ô∏è Found {missing_regions} missing region(s)")
    print(f"‚ö†Ô∏è Warning: {missing_regions} missing region(s) detected")
else:
    logging.info("‚úÖ No missing regions detected")

# 5Ô∏è‚É£ Performance Metrics
records_processed = len(df)
processing_time = time.time() - start_time
logging.info(f"Records processed: {records_processed}")
logging.info(f"Total processing time: {processing_time:.2f} seconds")

# 6Ô∏è‚É£ Simulate alerting condition
if processing_time > 3:
    logging.error("üö® Pipeline runtime exceeded 3 seconds! Alert triggered.")
    print("üö® ALERT: Pipeline took too long!")

# 7Ô∏è‚É£ Summary Output
print("\nüìä Monitoring Report:")
print(f"Records processed: {records_processed}")
print(f"Missing regions: {missing_regions}")
print(f"Processing time: {processing_time:.2f}s")

# 8Ô∏è‚É£ Completion
print("\n‚úÖ Monitoring simulation completed successfully.")
logging.info("Monitoring simulation completed successfully")

# 9Ô∏è‚É£ Summary
"""
This example simulates:
- Real-time logging per pipeline stage
- Data quality checks
- Performance metrics
- Alert simulation for threshold breaches
"""


üöÄ Starting Monitoring Simulation...
‚ñ∂ Running stage: Ingest
‚ñ∂ Running stage: Transform
‚ñ∂ Running stage: Validate
‚ñ∂ Running stage: Load
üö® ALERT: Pipeline took too long!

üìä Monitoring Report:
Records processed: 2
Missing regions: 1
Processing time: 3.94s

‚úÖ Monitoring simulation completed successfully.


'\nThis example simulates:\n- Real-time logging per pipeline stage\n- Data quality checks\n- Performance metrics\n- Alert simulation for threshold breaches\n'