In [1]:
# Regression Testing Simulation
# -----------------------------

import pandas as pd
import logging

# 1Ô∏è‚É£ Setup logging
logging.basicConfig(
    filename="regression_testing.log",
    level=logging.INFO,
    format="%(asctime)s - %(levelname)s - %(message)s"
)

print("üöÄ Starting Regression Testing Simulation...")
logging.info("Regression testing started")

# 2Ô∏è‚É£ Simulate old (baseline) data output
old_data = [
    {"order_id": 1, "region": "APAC", "price": 250, "price_usd": 3.0},
    {"order_id": 2, "region": "EMEA", "price": 400, "price_usd": 4.8}
]
old_df = pd.DataFrame(old_data)

# 3Ô∏è‚É£ Simulate new (updated) pipeline output
new_data = [
    {"order_id": 1, "region": "APAC", "price": 250, "price_usd": 3.0},
    {"order_id": 2, "region": "EMEA", "price": 400, "price_usd": 4.8}
]
new_df = pd.DataFrame(new_data)

# 4Ô∏è‚É£ Step 1: Save baseline output
old_df.to_csv("baseline_output.csv", index=False)
print("\nüì¶ Baseline (v1) Output:")
print(old_df)

# 5Ô∏è‚É£ Step 2: Compare with new output
print("\n‚öôÔ∏è Running Regression Test (same input ‚Üí same output)...")

try:
    pd.testing.assert_frame_equal(old_df, new_df)
    print("‚úÖ Regression Test Passed: No differences detected.")
    logging.info("Regression test passed successfully")
except AssertionError as e:
    print("‚ùå Regression Test Failed!")
    print(str(e))
    logging.error(f"Regression mismatch detected: {str(e)}")

# 6Ô∏è‚É£ Optional Variation ‚Äî simulate a code change
new_df.loc[1, "price_usd"] = 4.9  # Introduce small change

print("\nüß™ Re-running after simulated code change...")
try:
    pd.testing.assert_frame_equal(old_df, new_df)
except AssertionError as e:
    print("‚ö†Ô∏è Regression Mismatch Detected After Code Change:")
    print(str(e))
    logging.error(f"Regression failure after code update: {str(e)}")

# 7Ô∏è‚É£ Summary
print("\nüéØ Regression Testing Completed.")
logging.info("Regression testing simulation completed successfully")

"""
This simulation shows:
- How to compare baseline vs new outputs
- Detecting silent data logic changes
- Ensuring consistent ETL results after code updates
"""


üöÄ Starting Regression Testing Simulation...

üì¶ Baseline (v1) Output:
   order_id region  price  price_usd
0         1   APAC    250        3.0
1         2   EMEA    400        4.8

‚öôÔ∏è Running Regression Test (same input ‚Üí same output)...
‚úÖ Regression Test Passed: No differences detected.

üß™ Re-running after simulated code change...
‚ö†Ô∏è Regression Mismatch Detected After Code Change:
DataFrame.iloc[:, 3] (column name="price_usd") are different

DataFrame.iloc[:, 3] (column name="price_usd") values are different (50.0 %)
[index]: [0, 1]
[left]:  [3.0, 4.8]
[right]: [3.0, 4.9]

üéØ Regression Testing Completed.


'\nThis simulation shows:\n- How to compare baseline vs new outputs\n- Detecting silent data logic changes\n- Ensuring consistent ETL results after code updates\n'