In [0]:
%run "/Workspace/Users/ruchika.b.mhetre@v4c.ai/vstone_project/vstone_databricks_pipeline/src/notebooks/00_Setup/project_config"

# Logic Unit Test

In [0]:
from pyspark.sql import functions as F

# 1. Setup Mock Data
data = [
    ("1", "Toyota", "2022", "15.01.2023"),           # dd.MM.yyyy
    ("2", "Honda", "2021", "2023-05-20T10:00:00Z"),  # ISO Timestamp
    ("3", "Ford", "UNKNOWN", "2023-12-01")           # yyyy-MM-dd
]
columns = ["id", "marka", "year", "date"]
mock_df = spark.createDataFrame(data, columns)

# 2. Apply Transformation
test_df = mock_df.select(
    F.col("id").cast("string"),
    # FIX: Use F.expr to call try_cast. This bypasses the ANSI mode crash.
    F.expr("try_cast(year as int)").alias("year"), 
    F.coalesce(
        F.try_to_timestamp(F.col("date"), F.lit('dd.MM.yyyy')),
        F.try_to_timestamp(F.col("date"), F.lit("yyyy-MM-dd'T'HH:mm:ss'Z'")),
        F.try_to_timestamp(F.col("date"), F.lit('yyyy-MM-dd'))
    ).cast("date").alias("date_parsed")
)

# 3. Assertions
results = test_df.collect()

# Test Row 1: dd.MM.yyyy
assert results[0]["date_parsed"].strftime('%Y-%m-%d') == "2023-01-15", "Failed: dd.MM.yyyy"

# Test Row 2: ISO Timestamp
assert results[1]["date_parsed"].strftime('%Y-%m-%d') == "2023-05-20", "Failed: ISO T-format"

# Test Row 3: Malformed Year
# try_cast will return None instead of throwing [CAST_INVALID_INPUT]
assert results[2]["year"] is None, "Failed: Malformed year cast"

print("Logic Test Passed")

# Integration Tests (Bronze Layer Validation)

In [0]:
from pyspark.sql.types import DateType, DoubleType

# Configuration
target_table = "vstone_project.db_project.bronze_transactions"

print(f"Starting Integration Tests for {target_table}...")

# 1. Verification: Does the table exist in Unity Catalog?
assert spark.catalog.tableExists(target_table), f"Critical Error: Table {target_table} not found!"

# 2. Verification: Did data actually ingest?
row_count = spark.table(target_table).count()
assert row_count > 0, f"Critical Error: {target_table} is empty!"
print(f"Verified: {row_count} records found in Bronze layer.")

# 3. Verification: Schema Integrity (Data Types)
# Ensuring 'date' is a proper Date object and 'cost' is numeric
schema = spark.table(target_table).schema

assert isinstance(schema["date"].dataType, DateType), \
    f"Type Mismatch: 'date' is {schema['date'].dataType}, expected DateType"

assert isinstance(schema["cost"].dataType, DoubleType), \
    f"Type Mismatch: 'cost' is {schema['cost'].dataType}, expected DoubleType"

print("Integration Tests Passed: Bronze table exists, contains data, and has correct data types.")