# ✅ Validating and Constraining Values

This notebook demonstrates how to validate and constrain values in a dataset using range checks, format validation, enum lists, and logical constraints.

In [1]:
import pandas as pd
import numpy as np

## 📏 Example 1: Range Check (age between 0 and 120)

In [2]:
df_age = pd.DataFrame({'age': [25, 30, -5, 130, 45]})
# Keep only valid age range
df_age_valid = df_age[(df_age['age'] >= 0) & (df_age['age'] <= 120)]
df_age_valid

Unnamed: 0,age
0,25
1,30
4,45


## 🧑‍⚖️ Example 2: Enum Check (gender must be 'male' or 'female')

In [3]:
df_gender = pd.DataFrame({'gender': ['male', 'female', 'F', 'unknown', 'MALE']})
# Normalize and check valid genders
df_gender['gender'] = df_gender['gender'].str.lower()
df_gender_valid = df_gender[df_gender['gender'].isin(['male', 'female'])]
df_gender_valid

Unnamed: 0,gender
0,male
1,female
4,male


## 📧 Example 3: Format Check (valid email addresses)

In [4]:
df_email = pd.DataFrame({'email': ['alice@example.com', 'bob[at]mail.com', 'carol@mail.com', None]})
# Keep only emails containing '@'
df_email_valid = df_email[df_email['email'].str.contains('@', na=False)]
df_email_valid

Unnamed: 0,email
0,alice@example.com
2,carol@mail.com


## 📅 Example 4: Datetime Parsing and Validation

In [5]:
df_date = pd.DataFrame({'date': ['2024-01-01', 'not a date', '2023-12-31']})
df_date['date_parsed'] = pd.to_datetime(df_date['date'], errors='coerce')
# Keep only valid datetimes
df_date_valid = df_date[df_date['date_parsed'].notna()]
df_date_valid

Unnamed: 0,date,date_parsed
0,2024-01-01,2024-01-01
2,2023-12-31,2023-12-31


## 🔄 Example 5: Cross-Field Validation (end date ≥ start date)

In [6]:
df_dates = pd.DataFrame({
    'start_date': pd.to_datetime(['2024-01-01', '2024-01-05', '2024-01-10']),
    'end_date': pd.to_datetime(['2024-01-02', '2024-01-01', '2024-01-15'])
})
# Keep only rows where end_date >= start_date
df_dates_valid = df_dates[df_dates['end_date'] >= df_dates['start_date']]
df_dates_valid

Unnamed: 0,start_date,end_date
0,2024-01-01,2024-01-02
2,2024-01-10,2024-01-15
