In [0]:
# schema_validator.py

def validate_schema(df, expected_schema_dict):
    """
    Validates a Spark DataFrame against the expected schema.
    Raises exception if schema mismatch found.
    """
    expected_fields = expected_schema_dict.get("fields", [])
    expected_columns = [(field["name"], field["type"]) for field in expected_fields]
    actual_columns = [(f.name, f.dataType.simpleString()) for f in df.schema.fields]

    mismatches = [
        (expected, actual)
        for expected, actual in zip(expected_columns, actual_columns)
        if expected != actual
    ]

    if mismatches:
        raise ValueError(f"Schema validation failed. Mismatches: {mismatches}")

    return df
