In [1]:
# Data Ingestion ‚Üí Transformation
# -------------------------------

import pandas as pd
import logging
from datetime import datetime

# 1Ô∏è‚É£ Setup logging
logging.basicConfig(
    filename="ingestion_transformation.log",
    level=logging.INFO,
    format="%(asctime)s - %(levelname)s - %(message)s"
)

print("üöÄ Starting data ingestion and transformation pipeline...")
logging.info("Pipeline started")

# 2Ô∏è‚É£ Step 1: Data Ingestion (simulate source CSV)
raw_data = [
    {"order_id": 101, "region": "APAC", "price": 250, "date": "2025-11-05"},
    {"order_id": 102, "region": None, "price": 400, "date": "2025-11-05"},
    {"order_id": 103, "region": "EMEA", "price": None, "date": "2025-11-05"}
]
df = pd.DataFrame(raw_data)
print("\n‚úÖ Step 1: Raw Data Ingested")
print(df)
logging.info(f"Step 1: Ingested {len(df)} records")

# 3Ô∏è‚É£ Step 2: Data Cleaning
df["region"].fillna("UNKNOWN", inplace=True)
df["price"].fillna(df["price"].mean(), inplace=True)
df["price"] = df["price"].astype(float)
print("\n‚úÖ Step 2: Data Cleaned")
print(df)
logging.info("Step 2: Data cleaned and formatted")

# 4Ô∏è‚É£ Step 3: Transformation
df["price_usd"] = df["price"] * 0.012
df["category"] = df["price_usd"].apply(lambda x: "High" if x > 3 else "Low")
df["processed_at"] = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
print("\n‚úÖ Step 3: Data Transformed")
print(df)
logging.info("Step 3: Transformation completed successfully")

# 5Ô∏è‚É£ Step 4: Validation
assert "price_usd" in df.columns, "Transformation failed ‚Äî price_usd missing"
assert df["price_usd"].notnull().all(), "Nulls found in price_usd"
print("\n‚úÖ Step 4: Validation Passed")
logging.info("Step 4: Data validation successful")

# 6Ô∏è‚É£ Step 5: Output Simulation
output_file = "transformed_orders.csv"
df.to_csv(output_file, index=False)
print(f"\n‚úÖ Step 5: Transformed data exported ‚Üí {output_file}")
logging.info(f"Output saved to {output_file}")

# 7Ô∏è‚É£ Summary
print("\nüéâ Ingestion ‚Üí Transformation flow completed successfully!")
logging.info("Pipeline completed successfully")

"""
- Step 1: Read raw data
- Step 2: Clean missing / invalid values
- Step 3: Apply transformations
- Step 4: Validate output
- Step 5: Export to CSV
"""


üöÄ Starting data ingestion and transformation pipeline...

‚úÖ Step 1: Raw Data Ingested
   order_id region  price        date
0       101   APAC  250.0  2025-11-05
1       102   None  400.0  2025-11-05
2       103   EMEA    NaN  2025-11-05

‚úÖ Step 2: Data Cleaned
   order_id   region  price        date
0       101     APAC  250.0  2025-11-05
1       102  UNKNOWN  400.0  2025-11-05
2       103     EMEA  325.0  2025-11-05

‚úÖ Step 3: Data Transformed
   order_id   region  price        date  price_usd category  \
0       101     APAC  250.0  2025-11-05        3.0      Low   
1       102  UNKNOWN  400.0  2025-11-05        4.8     High   
2       103     EMEA  325.0  2025-11-05        3.9     High   

          processed_at  
0  2025-11-07 16:17:38  
1  2025-11-07 16:17:38  
2  2025-11-07 16:17:38  

‚úÖ Step 4: Validation Passed

‚úÖ Step 5: Transformed data exported ‚Üí transformed_orders.csv

üéâ Ingestion ‚Üí Transformation flow completed successfully!


'\n- Step 1: Read raw data\n- Step 2: Clean missing / invalid values\n- Step 3: Apply transformations\n- Step 4: Validate output\n- Step 5: Export to CSV\n'