In [None]:
%%capture
%pip install -U 'rockfish[labs]' -f 'https://docs142.rockfish.ai/packages/index.html'

In [None]:
import io
import rockfish as rf
import rockfish.actions as ra

In [None]:
data = b"""\
a,b,c
1.0,2.0,3.0
4.0,5.0,6.0
"""
dataset = rf.Dataset.from_csv("sample", io.BytesIO(data))

### 1. Add

Select one field and add the indicated value

In [None]:
dataset.to_pandas()

Unnamed: 0,a,b,c
0,1.0,2.0,3.0
1,4.0,5.0,6.0


In [None]:
conn = rf.Connection.local()

In [None]:
selected_col = "a"
indicated_method_value = 2
add = ra.Transform({"function": {"add": [selected_col, indicated_method_value]}})

In [None]:
save = rf.actions.DatasetSave(name="add")
builder = rf.WorkflowBuilder()
builder.add_dataset(dataset)
builder.add_action(add, parents=[dataset])
builder.add_action(save, parents=[add])
workflow = await builder.start(conn)

print(f"Workflow: {workflow.id()}")

Workflow: c0077b62-5015-4fac-a314-722d361e4746


In [None]:
new_dataset = None
async for sds in workflow.datasets():
    new_dataset = await sds.to_local(conn)
new_dataset.to_pandas()

Unnamed: 0,a,b,c
0,3.0,2.0,3.0
1,6.0,5.0,6.0


### 2. Subtract

Select one field and subtract the indicated value

In [None]:
dataset.to_pandas()

Unnamed: 0,a,b,c
0,1.0,2.0,3.0
1,4.0,5.0,6.0


In [None]:
selected_col = "a"
indicated_method_value = 2
subtract = ra.Transform({"function": {"subtract": [selected_col, indicated_method_value]}})

In [None]:
save = rf.actions.DatasetSave(name="subtract")
builder = rf.WorkflowBuilder()
builder.add_dataset(dataset)
builder.add_action(subtract, parents=[dataset])
builder.add_action(save, parents=[subtract])
workflow = await builder.start(conn)

print(f"Workflow: {workflow.id()}")

Workflow: 5b589bfc-0e8b-4205-975d-171072e217c3


In [None]:
new_dataset = None
async for sds in workflow.datasets():
    new_dataset = await sds.to_local(conn)
new_dataset.to_pandas()

Unnamed: 0,a,b,c
0,-1.0,2.0,3.0
1,2.0,5.0,6.0


### 3. Multiply
Select one field and multiply by the indicated value

In [None]:
dataset.to_pandas()

Unnamed: 0,a,b,c
0,1.0,2.0,3.0
1,4.0,5.0,6.0


In [None]:
selected_col = "a"
indicated_method_value = 2
multiply = ra.Transform({"function": {"multiply": [selected_col, indicated_method_value]}})

In [None]:
save = rf.actions.DatasetSave(name="multiply")
builder = rf.WorkflowBuilder()
builder.add_dataset(dataset)
builder.add_action(multiply, parents=[dataset])
builder.add_action(save, parents=[multiply])
workflow = await builder.start(conn)

print(f"Workflow: {workflow.id()}")

Workflow: c446db56-f89b-4a0a-b6f0-7ef989092123


In [None]:
new_dataset = None
async for sds in workflow.datasets():
    new_dataset = await sds.to_local(conn)
new_dataset.to_pandas()

Unnamed: 0,a,b,c
0,2.0,2.0,3.0
1,8.0,5.0,6.0


### 4. Divide
Select one field and divide by the indicated value

In [None]:
dataset.to_pandas()

Unnamed: 0,a,b,c
0,1.0,2.0,3.0
1,4.0,5.0,6.0


In [None]:
selected_col = "a"
indicated_method_value = 2
divide = ra.Transform({"function": {"divide": [selected_col, indicated_method_value]}})

In [None]:
save = rf.actions.DatasetSave(name="divide")
builder = rf.WorkflowBuilder()
builder.add_dataset(dataset)
builder.add_action(divide, parents=[dataset])
builder.add_action(save, parents=[divide])
workflow = await builder.start(conn)

print(f"Workflow: {workflow.id()}")

Workflow: 352355bd-bf07-4295-8e7e-e2ad771624a7


In [None]:
new_dataset = None
async for sds in workflow.datasets():
    new_dataset = await sds.to_local(conn)
new_dataset.to_pandas()

Unnamed: 0,a,b,c
0,0.5,2.0,3.0
1,2.0,5.0,6.0


## Append new column for the transformed field

In [None]:
dataset.to_pandas()

Unnamed: 0,a,b,c
0,1.0,2.0,3.0
1,4.0,5.0,6.0


In [None]:
selected_col = "a"
indicated_method_value = 2
new_col_name = "new_a"
add_new_col = ra.Apply({"function": {"divide": [selected_col, indicated_method_value]}, "append_field": new_col_name})

In [None]:
save = rf.actions.DatasetSave(name="new_col_divide")
builder = rf.WorkflowBuilder()
builder.add_dataset(dataset)
builder.add_action(add_new_col, parents=[dataset])
builder.add_action(save, parents=[add_new_col])
workflow = await builder.start(conn)

print(f"Workflow: {workflow.id()}")

Workflow: b669b5f0-92c1-481d-b8d3-631358fd709a


In [None]:
new_dataset = None
async for sds in workflow.datasets():
    new_dataset = await sds.to_local(conn)
new_dataset.to_pandas()

Unnamed: 0,a,b,c,new_a
0,1.0,2.0,3.0,0.5
1,4.0,5.0,6.0,2.0
