## Real-World Case Studies

### Healthcare - Medical Prediction Errors:
**Description**: Implement validation rules using a healthcare dataset to reduce errors in
predictive models by automating data quality checks.

In [None]:

import pandas as pd
import numpy as np
def validate_healthcare_data(df):
    report = []
    if 'age' in df.columns:
        invalid_ages = df[(df['age'] < 0) | (df['age'] > 120)]
        report.append(["Invalid Age Entries", len(invalid_ages)])
    if 'heart_rate' in df.columns:
        abnormal_hr = df[(df['heart_rate'] < 30) | (df['heart_rate'] > 200)]
        report.append(["Abnormal Heart Rates", len(abnormal_hr)])
    if 'gender' in df.columns:
        invalid_gender = df[~df['gender'].isin(['M', 'F', 'Other'])]
        report.append(["Invalid Gender Values", len(invalid_gender)])
    if 'diagnosis_code' in df.columns:
        missing_codes = df['diagnosis_code'].isnull().sum()
        report.append(["Missing Diagnosis Codes", missing_codes])
    if 'blood_pressure_systolic' in df.columns and 'blood_pressure_diastolic' in df.columns:
        swapped_bp = df[df['blood_pressure_systolic'] < df['blood_pressure_diastolic']]
        report.append(["Swapped BP Values", len(swapped_bp)])
    return pd.DataFrame(report, columns=["Check", "Issue Count"])

data = pd.DataFrame({
    "age": [25, 130, -2],
    "heart_rate": [72, 210, 15],
    "gender": ["M", "X", "F"],
    "diagnosis_code": ["D123", None, "D456"],
    "blood_pressure_systolic": [120, 80, 110],
    "blood_pressure_diastolic": [80, 90, 70]
})
validation_report = validate_healthcare_data(data)
print(validation_report)


                     Check  Issue Count
0      Invalid Age Entries            2
1     Abnormal Heart Rates            2
2    Invalid Gender Values            1
3  Missing Diagnosis Codes            1
4        Swapped BP Values            1
