# Data Validation - Bridge Failure Prediction

In [1]:
import pandas as pd
from src.validation.data_validation import DataValidator
from src.data.validation import check_column_ranges, check_allowed_values

## Load Data

In [2]:
df = pd.read_csv('../data/processed/features.csv')

## Schema and Required Columns

In [3]:
schema = {
    "structure_id": "object",
    "inspection_date": "datetime64[ns]",
    "last_maintenance_date": "datetime64[ns]",
    "avg_daily_traffic": "float64",
    "bridge_condition": "object",
    "failure_within_1yr": "int64",
    "latitude": "float64",
    "longitude": "float64",
    "precipitation": "float64",
    "avg_temp": "float64",
    "corrosion_level": "float64",
    "previous_failures": "int64",
    "soil_type": "object",
    "region_code": "object"
}
required = list(schema.keys())

## Validate Columns and Types

In [4]:
validator = DataValidator(schema=schema, required=required)
try:
    df_valid = validator.validate(df)
    print('Schema and column validation: PASSED')
except Exception as e:
    print('Schema and column validation: FAILED:', e)

## Range and Allowed Value Checks

In [5]:
try:
    check_column_ranges(df, 'avg_daily_traffic', min_val=0)
    check_column_ranges(df, 'precipitation', min_val=0)
    check_column_ranges(df, 'corrosion_level', min_val=0, max_val=1)
    check_allowed_values(df, 'bridge_condition', ['Good', 'Fair', 'Poor'])
    print('Range and allowed value checks: PASSED')
except Exception as e:
    print('Range and allowed value checks: FAILED:', e)