In [0]:
%run "/Workspace/Users/ruchika.b.mhetre@v4c.ai/vstone_project/vstone_databricks_pipeline/src/notebooks/00_Setup/project_config"

In [0]:
from pyspark.sql import Row
import pyspark.sql.functions as F

def test_bronze_logic():
    # 1. Create mock data for Text (testing double -> long cast) and Transactions (testing date)
    mock_text_data = spark.createDataFrame([Row(id="47937696.0", text="Great car")])
    mock_trans_data = spark.createDataFrame([Row(date="15.01.2023", year="2022", id="1")])

    # 2. Test Text ID Logic
    res_text = mock_text_data.select(F.expr("cast(cast(id as double) as long)").alias("id"))
    assert res_text.first()["id"] == 47937696, "Text ID casting failed!"

    # 3. Test Transaction Date/Year Logic
    res_trans = mock_trans_data.select(
        F.expr("try_cast(year as int)").alias("year"),
        F.expr("""
            coalesce(
                to_date(date, 'dd.MM.yyyy'),
                to_date(date, "yyyy-MM-dd'T'HH:mm:ss'Z'"),
                to_date(date, 'yyyy-MM-dd')
            )
        """).alias("date")
    )
    
    row = res_trans.first()
    assert row["year"] == 2022
    assert row["date"].strftime("%Y-%m-%d") == "2023-01-15", "Date Coalesce logic failed!"
    
    print("✅ All Logic Unit Tests Passed!")

test_bronze_logic()

In [0]:
def verify_dlt_deployment():
    # 1. Verify Unified Table Count
    unified_df = spark.table("LIVE.bronze_transactions_unified")
    count = unified_df.count()
    assert count > 0, "Unified table is empty!"

    # 2. Verify Geo-Mapping completeness
    # Ensure 'city_prepositional' exists so it can join with 'place' later
    geo_df = spark.table("LIVE.bronze_geographic")
    null_geo = geo_df.filter(F.col("city_prepositional").isNull()).count()
    assert null_geo == 0, f"Found {null_geo} rows in Geographic table with missing join keys!"

    # 3. Verify Photo URLs
    photos_df = spark.table("LIVE.bronze_photos")
    assert "photo_url" in photos_df.columns, "Photo URL column missing!"
    
    print(f"✅ Deployment Verified: {count} unified transactions ready for Silver layer.")

# Note: 'LIVE' is the default schema name inside a DLT pipeline context. 
# Outside DLT, use your catalog.schema name.