In [None]:
%%capture
%pip install -U 'rockfish[labs]' -f 'https://docs142.rockfish.ai/packages/index.html'

In [None]:
import rockfish as rf
import rockfish.actions as ra
import rockfish.labs as rl

Please replace `YOUR_API_KEY` with the assigned API key string. Note that it should be without quotes.

For example, if the assigned API Key is `abcd1234`, you can do the following
```python
%env ROCKFISH_API_KEY=abcd1234
conn = rf.Connection.from_env()
```
If you do not have API Key, please reach out to support@rockfish.ai.

In [None]:
%env ROCKFISH_API_KEY=YOUR_API_KEY
conn = rf.Connection.from_env()

In [None]:
# download our example of tabular data: finance.csv
!wget --no-clobber https://docs142.rockfish.ai/tutorials/finance.csv

--2023-12-05 20:25:37--  https://docs142.rockfish.ai/tutorials/finance.csv
Resolving docs142.rockfish.ai (docs142.rockfish.ai)... 13.226.52.67, 13.226.52.93, 13.226.52.77, ...
Connecting to docs142.rockfish.ai (docs142.rockfish.ai)|13.226.52.67|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 3444556 (3.3M) [text/csv]
Saving to: ‘finance.csv’


2023-12-05 20:25:37 (15.6 MB/s) - ‘finance.csv’ saved [3444556/3444556]



In [None]:
dataset = rf.Dataset.from_csv("finance", "finance.csv")
dataset.to_pandas()

Unnamed: 0,customer,age,gender,merchant,category,amount,fraud,timestamp
0,C1093826151,4,M,M348934600,transportation,4.55,0,2023-01-01
1,C575345520,2,F,M348934600,transportation,76.67,0,2023-01-01
2,C1787537369,2,M,M1823072687,transportation,48.02,0,2023-01-01
3,C1732307957,5,F,M348934600,transportation,55.06,0,2023-01-01
4,C842799656,1,F,M348934600,transportation,25.62,0,2023-01-01
...,...,...,...,...,...,...,...,...
49995,C1971105040,3,M,M348934600,transportation,67.91,0,2023-01-20
49996,C51444479,3,M,M348934600,transportation,32.27,0,2023-01-20
49997,C1096642744,5,M,M1535107174,wellnessandbeauty,149.70,0,2023-01-20
49998,C1166683343,2,F,M1823072687,transportation,24.78,0,2023-01-20


### Redact a field
We can redact any categorical column.

Here we redact `gender` column

In [None]:
config = ra.TrainTimeGAN.Config(
    encoder=ra.TrainTimeGAN.DatasetConfig(
        timestamp=ra.TrainTimeGAN.TimestampConfig(field="timestamp"),
        metadata=[
            ra.TrainTimeGAN.FieldConfig(field="customer", type="session"),
            ra.TrainTimeGAN.FieldConfig(field="age", type="categorical"),
            ra.TrainTimeGAN.FieldConfig(field="gender", type="categorical"),
        ],
        measurements=[
            ra.TrainTimeGAN.FieldConfig(field="merchant", type="categorical"),
            ra.TrainTimeGAN.FieldConfig(field="category", type="categorical"),
            ra.TrainTimeGAN.FieldConfig(field="amount"),
            ra.TrainTimeGAN.FieldConfig(field="fraud", type="categorical"),
        ],
        privacy=ra.TrainTimeGAN.PrivacyConfig(fields=["gender"]),
    ),
    doppelganger=ra.TrainTimeGAN.DGConfig(
        sample_len=19, epoch=10, batch_size=64, sessions=3765
    ),
)
# create train action
train = ra.TrainTimeGAN(config)

In [None]:
builder = rf.WorkflowBuilder()
builder.add_dataset(dataset)
builder.add_action(train, parents=[dataset])
workflow = await builder.start(conn)

print(f"Workflow: {workflow.id()}")

Workflow: 94ebe6ff-3ca5-4bc3-8fc4-9c689ee46208


In [None]:
async for progress in workflow.progress().notebook():
    pass

  0%|          | 0/10 [00:00<?, ?it/s]

In [None]:
async for model in workflow.models():
    print(model)

9aefb492-93ac-11ee-b6a3-166494505975


In [None]:
generate = recommends.generate()
generate.config().doppelganger.sessions = 4000
save = ra.DatasetSave({"name": "finance_privacy_syn"})
builder = rf.WorkflowBuilder()
builder.add_model(model)
builder.add_action(generate, parents=[model])
builder.add_action(save, parents=[generate])
workflow = await builder.start(conn)
print(f"Workflow: {workflow.id()}")

Workflow: 7deca736-6e1e-4ef3-851c-76a605f2278b


In [None]:
async for progress in workflow.progress().notebook():
    pass

  0%|          | 0/4 [00:00<?, ?it/s]

In [None]:
syn_privacy = None
async for sds in workflow.datasets():
    syn_privacy = await sds.to_local(conn)
syn_privacy.to_pandas()

Unnamed: 0,timestamp,amount,age,gender,merchant,category,fraud,session_key
0,2023-01-06 21:50:40.843,462.897137,3,**********,M348934600,fashion,0,0.0
1,2023-01-07 21:07:15.433,501.230683,3,**********,M348934600,transportation,0,0.0
2,2023-01-08 19:54:43.607,533.850421,3,**********,M348934600,transportation,0,0.0
3,2023-01-09 18:28:52.114,543.010546,3,**********,M348934600,transportation,0,0.0
4,2023-01-10 16:31:00.845,538.194137,3,**********,M348934600,transportation,0,0.0
...,...,...,...,...,...,...,...,...
55449,2023-01-02 23:31:26.000,-0.000074,3,**********,M348934600,transportation,0,3999.0
55450,2023-01-02 23:31:25.983,-0.000074,3,**********,M348934600,transportation,0,3999.0
55451,2023-01-02 23:31:25.967,-0.000074,3,**********,M348934600,transportation,0,3999.0
55452,2023-01-02 23:31:25.950,-0.000074,3,**********,M348934600,transportation,0,3999.0
