In [9]:
import sys
from pathlib import Path
import os
from dotenv import load_dotenv

# Add project root to path
project_root = Path.cwd().parent
sys.path.insert(0, str(project_root))

# Load environment variables explicitly
env_path = project_root / 'config' / '.env'
print(f"Loading .env from: {env_path}")
print(f".env exists: {env_path.exists()}")

load_dotenv(env_path)

# Verify it loaded
db_url = os.getenv('DB_URL')
print(f"DB_URL loaded: {db_url is not None}")
print(f"DB_URL preview: {db_url[:50] if db_url else 'NOT FOUND'}...")


Loading .env from: /Users/srujana/Desktop/HealthCare_Datapipeline/hospital-readmission-pipeline/config/.env
.env exists: True
DB_URL loaded: True
DB_URL preview: postgresql://postgres.wfkbokzkpiinlobuodjh:Srujana...


In [10]:
import pandas as pd
from src.database import DatabaseConnection
from src.validation.data_validator import DataValidator

# Load data
db = DatabaseConnection()
df = pd.read_sql("SELECT * FROM hospital_readmissions", db.engine)

# Run validation
validator = DataValidator(df)
report = validator.validate_all()

print("DATA VALIDATION REPORT")
print("=" * 80)
print(f"Valid: {report['valid']}")
print(f"\nErrors ({len(report['errors'])}):")
for error in report['errors']:
    print(f"  ✗ {error}")

print(f"\nWarnings ({len(report['warnings'])}):")
for warning in report['warnings']:
    print(f"  ⚠ {warning}")


DATA VALIDATION REPORT
Valid: True

Errors (0):

  ⚠ glucose_test: unexpected values {'normal', 'high'}
  ⚠ A1Ctest: unexpected values {'normal', 'high'}
