In [None]:
import pandas as pd

In [None]:
df = pd.DataFrame({
    "age": ["25", "30", "unknown", "40", None],
    "salary": ["50000", "60000", "70000", "N/A", "80000"]
})

### Notes: `isna()` and `notna()` in schema validation

- `isna()` checks for missing values (`NaN`) after an operation (e.g., type conversion).
- `age_numeric.isna()` is `True` for values that failed conversion **or** were originally missing.
- `notna()` checks that the original data actually exists.
- `df["age"].notna()` excludes genuinely missing values (`NaN`, `None`).
- Using `isna() & notna()` isolates **invalid values**, not missing data.
- Commonly used to detect **schema/type violations** in pandas.


In [None]:
age_numeric = pd.to_numeric(df["age"], errors="coerce")

invalid_age = df.loc[
    age_numeric.isna() & df["age"].notna(),
    "age"
]
invalid_age

In [None]:
# Example policy: drop invalid rows
df_clean = df.drop(index=invalid_age.index)
df_clean


In [None]:
df_clean["age"] = pd.to_numeric(df_clean["age"], errors="raise").astype("Int64")

In [None]:
df_clean["salary"] = df_clean["salary"].replace({"N/A": None})

In [None]:
df_clean["salary"] = pd.to_numeric(df_clean["salary"], errors="raise").astype("Int64")

In [None]:
df_clean