In [None]:
import pandas as pd
import logging

# Setup logging
logging.basicConfig(
    filename="validation.log",
    level=logging.INFO,
    format="%(asctime)s - %(levelname)s - %(message)s"
)

# Sample data
df = pd.DataFrame({
    "employee_id": [101, 102, 103, 104, 105],
    "age": [25, 17, 66, 29, None],
    "salary": [50000, 62000, -1000, 45000, 70000],
    "department": ["HR", "Engineering", "Marketing", "Sales", "Finance"]
})

# Allowed departments
allowed_departments = ["HR", "Engineering", "Sales"]

# Convert department to Categorical (invalid values become NaN)
df["department"] = pd.Categorical(df["department"], categories=allowed_departments)

# Masks
age_mask = df["age"].between(18, 65)         # ages 18-65
salary_mask = df["salary"] >= 0              # salary non-negative
dept_mask = df["department"].notna()         # valid categories only

# Add boolean columns for checks
df["age_valid"] = age_mask
df["salary_valid"] = salary_mask
df["dept_valid"] = dept_mask

# Combine issues into a human-readable column
def describe_issues(row):
    issues = []
    if not row["age_valid"]:
        issues.append(f"Invalid age: {row['age']}")
    if not row["salary_valid"]:
        issues.append(f"Invalid salary: {row['salary']}")
    if not row["dept_valid"]:
        issues.append(f"Invalid department: {row['department']}")

    if issues:
        message = f"Employee {row['employee_id']}: " + "; ".join(issues)
        # Log the issue
        logging.info(message)
        return "; ".join(issues)
    else:
        return None

df["issues"] = df.apply(describe_issues, axis=1) # type: ignore
# Filter rows with any issues
df_invalid = df[df["issues"].notna()]

# Show results
print("Full DataFrame with validation flags and issues column:\n")
print(df)
print("\nRows with issues:\n")
print(df_invalid[["employee_id", "age", "salary", "department", "issues"]])
