In [365]:
import checkedframe as cf


def validate(schema, df):
    try:
        print(schema.validate(df))
    except cf.exceptions.SchemaError as e:
        print(e)

# Pandas

In [366]:
import pandas as pd

df = pd.DataFrame(
    {
        "customer_id": ["TYX89", "F38J0M"],
        "balance": [198, -56],
        "overdraft_protection": [True, False],
    }
)

In [367]:
import checkedframe as cf


class MySchema(cf.Schema):
    customer_id = cf.String()
    balance = cf.Float64()
    overdraft_protection = cf.Boolean()

In [368]:
validate(MySchema, df)

Found 1 error(s)
  balance: 1 error(s)
    - Expected Float64, got Int64


In [369]:
class MySchema(cf.Schema):
    customer_id = cf.String()
    balance = cf.Float64(cast=True)
    overdraft_protection = cf.Boolean()

In [370]:
validate(MySchema, df)

  customer_id  balance  overdraft_protection
0       TYX89    198.0                  True
1      F38J0M    -56.0                 False


In [371]:
class MySchema(cf.Schema):
    customer_id = cf.String()
    balance = cf.Float64(cast=True)
    overdraft_protection = cf.Boolean()

    @cf.Check(columns="customer_id")
    def check_id_length(s: pd.Series) -> pd.Series:
        """customer_id must be of length 6"""
        return s.str.len() == 6

In [372]:
validate(MySchema, df)

Found 1 error(s)
  customer_id: 1 error(s)
    - check_id_length failed for 1 / 2 (50.00%) rows: customer_id must be of length 6


In [373]:
class MySchema(cf.Schema):
    customer_id = cf.String()
    balance = cf.Float64(cast=True)
    overdraft_protection = cf.Boolean()

    @cf.Check(columns="customer_id")
    def check_id_length(s: pd.Series) -> pd.Series:
        """customer_id must be of length 6"""
        return s.str.len() == 6

    @cf.Check
    def check_balances_pos_if_protected(df: pd.DataFrame) -> pd.Series:
        """Balances can only be negative if there is no overdraft protection"""
        return (df["balance"] >= 0) & df["overdraft_protection"]

In [374]:
validate(MySchema, df)

Found 2 error(s)
  customer_id: 1 error(s)
    - check_id_length failed for 1 / 2 (50.00%) rows: customer_id must be of length 6
  * check_balances_pos_if_protected failed for 1 / 2 (50.00%) rows: Balances can only be negative if there is no overdraft protection


# Polars

In [375]:
import polars as pl

df = pl.DataFrame(
    {
        "customer_id": ["TYX89", "F38J0M"],
        "balance": [198, -56],
        "overdraft_protection": [True, False],
    }
)

In [376]:
import checkedframe as cf


class MySchema(cf.Schema):
    customer_id = cf.String()
    balance = cf.Float64()
    overdraft_protection = cf.Boolean()

In [377]:
validate(MySchema, df)

Found 1 error(s)
  balance: 1 error(s)
    - Expected Float64, got Int64


In [378]:
class MySchema(cf.Schema):
    customer_id = cf.String()
    balance = cf.Float64(cast=True)
    overdraft_protection = cf.Boolean()

In [379]:
validate(MySchema, df)

shape: (2, 3)
┌─────────────┬─────────┬──────────────────────┐
│ customer_id ┆ balance ┆ overdraft_protection │
│ ---         ┆ ---     ┆ ---                  │
│ str         ┆ f64     ┆ bool                 │
╞═════════════╪═════════╪══════════════════════╡
│ TYX89       ┆ 198.0   ┆ true                 │
│ F38J0M      ┆ -56.0   ┆ false                │
└─────────────┴─────────┴──────────────────────┘


In [380]:
class MySchema(cf.Schema):
    customer_id = cf.String()
    balance = cf.Float64(cast=True)
    overdraft_protection = cf.Boolean()

    @cf.Check(columns="customer_id")
    def check_id_length(name: str) -> pl.Expr:
        """customer_id must be of length 6"""
        return pl.col(name).str.len_chars() == 6

In [381]:
validate(MySchema, df)

Found 1 error(s)
  customer_id: 1 error(s)
    - check_id_length failed for 1 / 2 (50.00%) rows: customer_id must be of length 6


In [382]:
class MySchema(cf.Schema):
    customer_id = cf.String()
    balance = cf.Float64(cast=True)
    overdraft_protection = cf.Boolean()

    @cf.Check(columns="customer_id")
    def check_id_length(name: str) -> pl.Expr:
        """customer_id must be of length 6"""
        return pl.col(name).str.len_chars() == 6

    @cf.Check
    def check_balances_pos_if_protected() -> pl.Expr:
        """Balances can only be negative if there is no overdraft protection"""
        return (pl.col("balance") >= 0) & pl.col("overdraft_protection")

In [383]:
validate(MySchema, df)

Found 2 error(s)
  customer_id: 1 error(s)
    - check_id_length failed for 1 / 2 (50.00%) rows: customer_id must be of length 6
  * check_balances_pos_if_protected failed for 1 / 2 (50.00%) rows: Balances can only be negative if there is no overdraft protection


# Agnostic

In [384]:
import polars as pl

df = pl.DataFrame(
    {
        "customer_id": ["TYX89", "F38J0M"],
        "balance": [198.28, -56],
        "overdraft_protection": [True, False],
    }
)

In [385]:
import checkedframe as cf


class MySchema(cf.Schema):
    customer_id = cf.String()
    balance = cf.Float64()
    overdraft_protection = cf.Boolean()

In [386]:
validate(MySchema, df)

shape: (2, 3)
┌─────────────┬─────────┬──────────────────────┐
│ customer_id ┆ balance ┆ overdraft_protection │
│ ---         ┆ ---     ┆ ---                  │
│ str         ┆ f64     ┆ bool                 │
╞═════════════╪═════════╪══════════════════════╡
│ TYX89       ┆ 198.28  ┆ true                 │
│ F38J0M      ┆ -56.0   ┆ false                │
└─────────────┴─────────┴──────────────────────┘


In [387]:
class MySchema(cf.Schema):
    customer_id = cf.String()
    balance = cf.Float64(cast=True)
    overdraft_protection = cf.Boolean()

In [388]:
validate(MySchema, df)

shape: (2, 3)
┌─────────────┬─────────┬──────────────────────┐
│ customer_id ┆ balance ┆ overdraft_protection │
│ ---         ┆ ---     ┆ ---                  │
│ str         ┆ f64     ┆ bool                 │
╞═════════════╪═════════╪══════════════════════╡
│ TYX89       ┆ 198.28  ┆ true                 │
│ F38J0M      ┆ -56.0   ┆ false                │
└─────────────┴─────────┴──────────────────────┘


In [389]:
class MySchema(cf.Schema):
    customer_id = cf.String()
    balance = cf.Float64(cast=True)
    overdraft_protection = cf.Boolean()

    @cf.Check(columns="customer_id")
    def check_id_length(name: str) -> cf.Expr:
        """customer_id must be of length 6"""
        return cf.col(name).str.len_chars() == 6

In [390]:
validate(MySchema, df)

Found 1 error(s)
  customer_id: 1 error(s)
    - check_id_length failed for 1 / 2 (50.00%) rows: customer_id must be of length 6


In [391]:
class MySchema(cf.Schema):
    customer_id = cf.String()
    balance = cf.Float64(cast=True)
    overdraft_protection = cf.Boolean()

    @cf.Check(columns="customer_id")
    def check_id_length(name: str) -> cf.Expr:
        """customer_id must be of length 6"""
        return cf.col(name).str.len_chars() == 6

    @cf.Check
    def check_balances_pos_if_protected() -> cf.Expr:
        """Balances can only be negative if there is no overdraft protection"""
        return (cf.col("balance") >= 0) & cf.col("overdraft_protection")

In [392]:
validate(MySchema, df)

Found 2 error(s)
  customer_id: 1 error(s)
    - check_id_length failed for 1 / 2 (50.00%) rows: customer_id must be of length 6
  * check_balances_pos_if_protected failed for 1 / 2 (50.00%) rows: Balances can only be negative if there is no overdraft protection
