In [1]:
# Data Validation Tests (Row Count Match, Schema Validation, Null Handling)
# -------------------------------------------------------------------------

import pandas as pd
import logging

# 1Ô∏è‚É£ Setup logging
logging.basicConfig(
    filename="data_validation.log",
    level=logging.INFO,
    format="%(asctime)s - %(levelname)s - %(message)s"
)

print("üöÄ Starting Data Validation Tests...")
logging.info("Data validation tests started")

# 2Ô∏è‚É£ Simulate source data
source_data = [
    {"order_id": 1, "region": "APAC", "price": 250},
    {"order_id": 2, "region": "EMEA", "price": 300},
    {"order_id": 3, "region": "APAC", "price": 400}
]
source_df = pd.DataFrame(source_data)
print("\nüì• Source Data:")
print(source_df)

# 3Ô∏è‚É£ Simulate target data (transformed)
target_data = [
    {"order_id": 1, "region": "APAC", "price": 250, "price_usd": 3.0},
    {"order_id": 2, "region": "EMEA", "price": 300, "price_usd": 3.6},
    {"order_id": 3, "region": "APAC", "price": 400, "price_usd": 4.8}
]
target_df = pd.DataFrame(target_data)
print("\nüì¶ Target Data:")
print(target_df)

# 4Ô∏è‚É£ Row Count Validation
print("\nüßÆ Test 1: Row Count Validation")
try:
    assert len(source_df) == len(target_df), "‚ùå Row count mismatch!"
    print("‚úÖ Row count matches between source and target.")
    logging.info("Row count validation passed")
except AssertionError as e:
    print(str(e))
    logging.error("Row count validation failed")

# 5Ô∏è‚É£ Schema Validation
print("\nüìã Test 2: Schema Validation")
try:
    expected_columns = {"order_id", "region", "price", "price_usd"}
    assert set(target_df.columns) == expected_columns, "‚ùå Schema mismatch!"
    print("‚úÖ Schema validated successfully.")
    logging.info("Schema validation passed")
except AssertionError as e:
    print(str(e))
    logging.error("Schema validation failed")

# 6Ô∏è‚É£ Null Handling Validation
print("\nüîç Test 3: Null Handling Validation")
null_counts = target_df.isnull().sum()
print("Null counts:\n", null_counts)

try:
    assert target_df.notnull().all().all(), "‚ùå Null values found!"
    print("‚úÖ No null values found in target data.")
    logging.info("Null handling validation passed")
except AssertionError as e:
    print(str(e))
    logging.error("Null handling validation failed")

# 7Ô∏è‚É£ Summary Output
print("\nüéØ Validation Summary:")
print(f"- Row Count: {len(source_df)} source vs {len(target_df)} target")
print(f"- Columns: {list(target_df.columns)}")
print(f"- Any Nulls: {target_df.isnull().values.any()}")

logging.info("Data validation tests completed successfully")

"""
This script demonstrates:
- Row count match validation
- Schema consistency check
- Null detection and handling
- Logging test results for audit
"""


üöÄ Starting Data Validation Tests...

üì• Source Data:
   order_id region  price
0         1   APAC    250
1         2   EMEA    300
2         3   APAC    400

üì¶ Target Data:
   order_id region  price  price_usd
0         1   APAC    250        3.0
1         2   EMEA    300        3.6
2         3   APAC    400        4.8

üßÆ Test 1: Row Count Validation
‚úÖ Row count matches between source and target.

üìã Test 2: Schema Validation
‚úÖ Schema validated successfully.

üîç Test 3: Null Handling Validation
Null counts:
 order_id     0
region       0
price        0
price_usd    0
dtype: int64
‚úÖ No null values found in target data.

üéØ Validation Summary:
- Row Count: 3 source vs 3 target
- Columns: ['order_id', 'region', 'price', 'price_usd']
- Any Nulls: False


'\nThis script demonstrates:\n- Row count match validation\n- Schema consistency check\n- Null detection and handling\n- Logging test results for audit\n'