In [20]:
from pandas import DataFrame, Series
from process_framework.references import Reference, ColumnReference
from process_framework.steps import TransformingStep, ModifyingStep, Log, Append
from process_framework.steps.batch_processing_step import BatchProcessDataFrame
import logging

logger = logging.getLogger()
logger.setLevel(logging.INFO)

In [2]:
df = DataFrame(range(200), columns=['value'])
df['value_pow_2'] = df.value.pow(2)
df.head()

Unnamed: 0,value,value_pow_2
0,0,0
1,1,1
2,2,4
3,3,9
4,4,16


In [3]:
df_ref = Reference(DataFrame, df)
_value = ColumnReference(df_ref, 'value')
_value_pow_2 = ColumnReference(df_ref, 'value_pow_2')
_value_pow_2_start = ColumnReference(df_ref, 'value_pow_2_start')


In [4]:
class TakeStart(ModifyingStep[Series]):
    def transform(self, subject: Series) -> Series | None:
        return subject.map(str).str.pad(3,'right', '-').str[:3]
    

ts = TakeStart(_value_pow_2, _value_pow_2_start)
ts.do()


df_ref.value.head()

Unnamed: 0,value,value_pow_2,value_pow_2_start
0,0,0,0--
1,1,1,1--
2,2,4,4--
3,3,9,9--
4,4,16,16-


In [18]:
class Divide(ModifyingStep[Series]):
    def transform(self, subject: Series) -> Series | None:
        a, b = subject.divmod(2)
        return b

In [24]:
batch = Reference(DataFrame)
_b_value = ColumnReference(batch, 'value')
_div_2 = ColumnReference(batch, 'div_2')
_div_2_start = ColumnReference(batch, 'div_2_start')

list_div_2 = Reference(list)

bdf = BatchProcessDataFrame(
    batch_size=50,
    subject=df_ref,
    batch=batch,
    steps=[
        Divide(_b_value, _div_2),
        TakeStart(_div_2, _div_2_start),
        Log(batch),
        Append(_div_2, list_div_2)
    ]
)

bdf.do()

INFO:root:Reference[DataFrame]((50, 5), {0: {'div_2': 0, 'div_2_start': '0--', 'value': 0, ...}, 1: {'div_2': 1, 'div_2_start': '1--', 'value': 1, ...}, 2: {'div_2': 0, 'div_2_start': '0--', 'value': 2, ...}, ...}), int64, [np.int64(0), np.int64(1), np.int64(2), ...]
INFO:root:Reference[DataFrame]((50, 5), {50: {'div_2': 0, 'div_2_start': '0--', 'value': 50, ...}, 51: {'div_2': 1, 'div_2_start': '1--', 'value': 51, ...}, 52: {'div_2': 0, 'div_2_start': '0--', 'value': 52, ...}, ...}), int64, [np.int64(50), np.int64(51), np.int64(52), ...]
INFO:root:Reference[DataFrame]((50, 5), {100: {'div_2': 0, 'div_2_start': '0--', 'value': 100, ...}, 101: {'div_2': 1, 'div_2_start': '1--', 'value': 101, ...}, 102: {'div_2': 0, 'div_2_start': '0--', 'value': 102, ...}, ...}), int64, [np.int64(100), np.int64(101), np.int64(102), ...]
INFO:root:Reference[DataFrame]((50, 5), {150: {'div_2': 0, 'div_2_start': '0--', 'value': 150, ...}, 151: {'div_2': 1, 'div_2_start': '1--', 'value': 151, ...}, 152: {'d

done!
