In [1]:
# ETL Testing Concepts
# --------------------

import pandas as pd
import logging

# 1Ô∏è‚É£ Setup logging
logging.basicConfig(
    filename="etl_testing.log",
    level=logging.INFO,
    format="%(asctime)s - %(levelname)s - %(message)s"
)

print("üöÄ Starting ETL Testing Simulation...")
logging.info("ETL testing process started")

# 2Ô∏è‚É£ Simulate source data (Extract)
source_data = [
    {"order_id": 1, "region": "APAC", "price": 250},
    {"order_id": 2, "region": "EMEA", "price": 300},
    {"order_id": 3, "region": "APAC", "price": 400}
]
source_df = pd.DataFrame(source_data)
print("\nüì• Source Data (Extract):")
print(source_df)

# 3Ô∏è‚É£ Simulate transformed data (Transform)
target_df = source_df.copy()
target_df["price_usd"] = target_df["price"] * 0.012
target_df.drop(columns=["price"], inplace=True)
print("\n‚öôÔ∏è Transformed Data (Load):")
print(target_df)

# 4Ô∏è‚É£ Define ETL Tests
print("\nüß™ Running ETL Tests...")

# ‚úÖ Test 1: Record Count (Completeness)
assert len(source_df) == len(target_df), "‚ùå Record count mismatch!"
print("‚úÖ Test 1 Passed: Record counts match.")
logging.info("Test 1 passed: Record count verified")

# ‚úÖ Test 2: Transformation Accuracy
expected_price_usd = source_df["price"] * 0.012
assert all(target_df["price_usd"].round(2) == expected_price_usd.round(2)), "‚ùå Price transformation mismatch!"
print("‚úÖ Test 2 Passed: Price transformation correct.")
logging.info("Test 2 passed: Transformation validated")

# ‚úÖ Test 3: Key Integrity
assert target_df["order_id"].is_unique, "‚ùå Duplicate order IDs found!"
print("‚úÖ Test 3 Passed: Unique keys validated.")
logging.info("Test 3 passed: Unique order IDs confirmed")

# ‚úÖ Test 4: Data Type Validation
assert target_df["price_usd"].dtype == "float64", "‚ùå Incorrect data type for price_usd!"
print("‚úÖ Test 4 Passed: Data types are correct.")
logging.info("Test 4 passed: Data types validated")

# ‚úÖ Test 5: Field Presence
expected_columns = {"order_id", "price_usd", "region"}
assert set(target_df.columns) == expected_columns, "‚ùå Missing expected columns!"
print("‚úÖ Test 5 Passed: All required fields present.")
logging.info("Test 5 passed: Schema validated")

# 5Ô∏è‚É£ All Tests Passed
print("\nüéâ All ETL tests passed successfully!")
logging.info("All ETL validation tests passed successfully")

"""
This simulation covers:
- Completeness testing (record count)
- Transformation validation
- Key integrity and schema testing
- Data type checks
- ETL quality assurance using assertions
"""


üöÄ Starting ETL Testing Simulation...

üì• Source Data (Extract):
   order_id region  price
0         1   APAC    250
1         2   EMEA    300
2         3   APAC    400

‚öôÔ∏è Transformed Data (Load):
   order_id region  price_usd
0         1   APAC        3.0
1         2   EMEA        3.6
2         3   APAC        4.8

üß™ Running ETL Tests...
‚úÖ Test 1 Passed: Record counts match.
‚úÖ Test 2 Passed: Price transformation correct.
‚úÖ Test 3 Passed: Unique keys validated.
‚úÖ Test 4 Passed: Data types are correct.
‚úÖ Test 5 Passed: All required fields present.

üéâ All ETL tests passed successfully!


'\nThis simulation covers:\n- Completeness testing (record count)\n- Transformation validation\n- Key integrity and schema testing\n- Data type checks\n- ETL quality assurance using assertions\n'