In [0]:
%run "/Workspace/Users/ruchika.b.mhetre@v4c.ai/vstone_project/vstone_databricks_pipeline/src/notebooks/00_Setup/project_config"

In [0]:
from pyspark.sql import functions as F
from pyspark.sql.functions import col, expr, lit, current_timestamp, count, avg, round, sum, to_date, to_timestamp

In [0]:
def test_gold_geography():
    df = spark.table(f"{catalog_name}.{schema_name}.gold_sales_by_geography")
    
    # Check for join drops: total market value should be a positive number
    stats = df.select(F.sum("total_market_value")).collect()[0][0]
    assert stats > 0, "Error: Market value is 0. Geography join may have failed."
    
    # Check for logical consistency: car price shouldn't be 0
    min_price = df.select(F.min("avg_car_price")).collect()[0][0]
    assert min_price > 0, f"Error: Found city with 0 average car price: {min_price}"
    
    print("✅ Geography Gold Test Passed.")

In [0]:
def test_gold_content():
    df = spark.table(f"{catalog_name}.{schema_name}.gold_content_impact_analysis")
    
    # Ensure has_photos is correctly boolean
    distinct_bools = [row.has_photos for row in df.select("has_photos").distinct().collect()]
    assert True in distinct_bools or False in distinct_bools, "Error: has_photos column missing values."
    
    # Photo count check: if has_photos is True, avg_photo_qty must be > 0
    test_val = df.filter("has_photos = True").select(F.min("avg_photo_qty")).collect()[0][0]
    if test_val is not None:
        assert test_val > 0, "Error: Logic conflict! 'has_photos' is True but avg_photo_qty is 0."
    
    print("✅ Content Impact Gold Test Passed.")

In [0]:
def test_gold_technical():
    df = spark.table(f"{catalog_name}.{schema_name}.gold_technical_valuation")
    
    # Assert table size
    row_count = df.count()
    assert row_count > 0, "Error: gold_technical_valuation is empty! Check join on marka/model."
    
    # Power check: ensure horsepower is within a realistic range
    avg_hp = df.select(F.avg("avg_horsepower")).collect()[0][0]
    assert 10 < avg_hp < 2000, f"Error: Average horsepower ({avg_hp}) seems unrealistic."
    
    print("✅ Technical Valuation Gold Test Passed.")

In [0]:
test_gold_geography()
test_gold_content()
test_gold_technical()