In [1]:
import pandas as pd
import numpy as np
from alphabase.spectral_library import validate


In [2]:
schema_1 = validate.Schema(
    'schema_1',
    [
        validate.Required('a', np.int64),
        validate.Required('b', np.int64),
        validate.Required('c', np.int64),
        validate.Required('d', np.int64),
    ]
)

df_1 = pd.DataFrame({
    'a': [1, 2, 3],
    'b': np.array([4,5,6]),
    'c': np.array([7,8,9]).astype(np.int16),
    'd': np.array([10,11,12]).astype(np.uint32),
    'e': np.array([13.,14.,15.]),
})

schema_1(df_1)
for column in [column.name for column in schema_1.schema]:
    assert np.issubdtype(df_1[column].dtype, int)
assert np.issubdtype(df_1['e'].dtype, float)

# raise on missing column 
df_1 = pd.DataFrame({
    'a': [1, 2, 3],
    'b': np.array([4,5,6]),
    'c': np.array([7,8,9])
})

raised = False
try:
   schema_1(df_1)
except:
   raised = True
assert raised

# raise on wrong type
df_1 = pd.DataFrame({
    'a': [1, 2, 3],
    'b': np.array([4,5,6]),
    'c': np.array([7,8,9]),
    'd': np.array([10.,11.,12.])
})

raised = False
try:
    schema_1(df_1)
except:
    raised = True
assert raised

In [3]:
# convert to correct type
schema_2 = validate.Schema(
    'schema_2',
    [
        validate.Required('a', np.int32),
        validate.Optional('b', np.int32),
        validate.Required('c', np.int32),
    ]
)

df_2 = pd.DataFrame({
    'a': np.array([1, 2, 3]).astype(np.int16),
    'b': np.array([4,5,6]).astype(np.uint16),
    'c': np.array([7,8,9]).astype(np.bool_),
})

schema_2(df_2)
for column in [column.name for column in schema_2.schema]:
    assert np.issubdtype(df_2[column].dtype, np.int32)

# raise on impossible conversion
df_2 = pd.DataFrame({
    'a': np.array([1, 2, 3]).astype(np.uint32),
    'b': np.array([4,5,6]).astype(np.int32),
    'c': np.array([7,8,9]).astype(np.int32),
})

raised = False
try:
    schema_2(df_2)
except:
    raised = True
assert raised

# raise on impossible conversion
df_2 = pd.DataFrame({
    'a': [1, 2, 3],
    'b': np.array([4,5,6]).astype(np.int32),
    'c': np.array([7,8,9]).astype(np.int32),
})

raised = False
try:
    schema_2(df_2)
except:
    raised = True
assert raised

# raise on impossible conversion
df_2 = pd.DataFrame({
    'a': np.array([1, 2, 3]).astype(np.float32),
    'b': np.array([4,5,6]).astype(np.int32),
    'c': np.array([7,8,9]).astype(np.int32),
})

raised = False
try:
    schema_2(df_2)
except:
    raised = True
assert raised