In [1]:
# Data Pipeline Simulation
# -------------------------

import pandas as pd
import logging
from datetime import datetime

# 1Ô∏è‚É£ Setup logging
logging.basicConfig(
    filename="data_pipeline.log",
    level=logging.INFO,
    format="%(asctime)s - %(levelname)s - %(message)s"
)

print("üöÄ Starting data pipeline...")
logging.info("Pipeline started")

# 2Ô∏è‚É£ Step 1: Data Ingestion
raw_data = [
    {"order_id": 101, "region": "APAC", "price": 250, "date": "2025-11-05"},
    {"order_id": 102, "region": "EMEA", "price": 300, "date": "2025-11-05"},
    {"order_id": 103, "region": None, "price": 200, "date": "2025-11-05"}
]
df = pd.DataFrame(raw_data)
print("\n‚úÖ Step 1: Data Ingested")
print(df)
logging.info(f"Step 1: Loaded {len(df)} records")

# 3Ô∏è‚É£ Step 2: Data Cleaning
df["region"].fillna("UNKNOWN", inplace=True)
df["price"] = df["price"].astype(float)
print("\n‚úÖ Step 2: Data Cleaned")
print(df)
logging.info("Step 2: Data cleaning completed")

# 4Ô∏è‚É£ Step 3: Transformation
df["price_usd"] = df["price"] * 0.012
df["processed_at"] = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
print("\n‚úÖ Step 3: Transformation Completed")
print(df)
logging.info("Step 3: Transformation done")

# 5Ô∏è‚É£ Step 4: Validation
assert df["price_usd"].notnull().all(), "Price conversion failed!"
assert df["region"].notnull().all(), "Region missing!"
print("\n‚úÖ Step 4: Validation Passed")
logging.info("Step 4: Validation successful")

# 6Ô∏è‚É£ Step 5: Output / Loading
output_file = "pipeline_output.csv"
df.to_csv(output_file, index=False)
print(f"\n‚úÖ Step 5: Output Saved to {output_file}")
logging.info(f"Output saved as {output_file}")

# 7Ô∏è‚É£ Completion
print("\nüéâ Pipeline executed successfully!")
logging.info("Pipeline completed successfully")

# 8Ô∏è‚É£ Summary
"""
This example covers:
- Data ingestion (mock input)
- Data cleaning and transformation
- Validation checks
- Output writing (CSV)
- Logging every step
"""


üöÄ Starting data pipeline...

‚úÖ Step 1: Data Ingested
   order_id region  price        date
0       101   APAC    250  2025-11-05
1       102   EMEA    300  2025-11-05
2       103   None    200  2025-11-05

‚úÖ Step 2: Data Cleaned
   order_id   region  price        date
0       101     APAC  250.0  2025-11-05
1       102     EMEA  300.0  2025-11-05
2       103  UNKNOWN  200.0  2025-11-05

‚úÖ Step 3: Transformation Completed
   order_id   region  price        date  price_usd         processed_at
0       101     APAC  250.0  2025-11-05        3.0  2025-11-07 15:48:26
1       102     EMEA  300.0  2025-11-05        3.6  2025-11-07 15:48:26
2       103  UNKNOWN  200.0  2025-11-05        2.4  2025-11-07 15:48:26

‚úÖ Step 4: Validation Passed

‚úÖ Step 5: Output Saved to pipeline_output.csv

üéâ Pipeline executed successfully!


'\nThis example covers:\n- Data ingestion (mock input)\n- Data cleaning and transformation\n- Validation checks\n- Output writing (CSV)\n- Logging every step\n'