In [None]:
%%capture
%pip install -U 'rockfish[labs]' -f 'https://docs142.rockfish.ai/packages/index.html'

In [None]:
import rockfish as rf
import rockfish.actions as ra

Please replace `YOUR_API_KEY` with the assigned API key string. Note that it should be without quotes.

For example, if the assigned API Key is `abcd1234`, you can do the following
```python
%env ROCKFISH_API_KEY=abcd1234
conn = rf.Connection.from_env()
```
If you do not have API Key, please reach out to support@rockfish.ai.

In [None]:
%env ROCKFISH_API_KEY=YOUR_API_KEY
conn = rf.Connection.from_env()

In [None]:
# download our example of timeseries data: pcap.csv
!wget --no-clobber https://docs142.rockfish.ai/tutorials/pcap.csv

In [None]:
dataset = rf.Dataset.from_csv("DC pcap", "pcap.csv")
dataset.to_pandas()

In [None]:
config = {
    "encoder": {
        "timestamp": {"field": "timestamp"},
        "metadata": [
            {"field": "srcip", "type": "categorical"},
            {"field": "dstip", "type": "categorical"},
            {"field": "srcport", "type": "categorical"},
            {"field": "dstport", "type": "categorical"},
            {"field": "proto", "type": "categorical"},
        ],
        "measurements": [{"field": "pkt_len", "type": "continuous"}],
    },
    "rtf": {
        "mode": "relational",
        "num_bootstrap": 2,
        "parent": {
            "epochs": 1,
            "transformer": {"gpt2_config": {"layer": 1, "head": 1, "embed": 1}},
        },
        "child": {"output_max_length": 2048, "epochs": 1},
    },
}
# create train action
train = ra.TrainTransformer(config)

In [None]:
builder = rf.WorkflowBuilder()
builder.add_dataset(dataset)
builder.add_action(train, parents=[dataset])
workflow = await builder.start(conn)
print(f"Workflow: {workflow.id()}")

In [None]:
async for log in workflow.logs():
    print(log)

In [None]:
model = await workflow.models().last()
model

### Update the generated sessions

In [None]:
config["rtf"].update({"sessions": 200})
generate = ra.GenerateTransformer(config)
save = ra.DatasetSave({"name": "SyntheticData_large"})
builder = rf.WorkflowBuilder()
builder.add_model(model)
builder.add_action(generate, parents=[model])
builder.add_action(save, parents=[generate])
workflow = await builder.start(conn)
print(f"Workflow: {workflow.id()}")

In [None]:
async for log in workflow.logs():
    print(log)

In [None]:
syn = None
async for sds in workflow.datasets():
    syn = await sds.to_local(conn)
syn.to_pandas()

### Generate large dataset
We recommend you to use our `SessionTarget` and please refer [here](https://docs142.rockfish.ai/data-gen.html#time-series-data) for details

In [None]:
'''If wanting to concat by `concat_session_key`, 
update the config to include the sessions_key
'''
config["rtf"].update({"sessions_flag": True})
generate = ra.GenerateTransformer(config)

session_target = ra.SessionTarget(target=1000)  # providing the target "sessions" value
save = ra.DatasetSave(
    name="target_synthetic", concat_tables=True, concat_session_key="session_key"
)
builder = rf.WorkflowBuilder()
builder.add_model(model)
builder.add_action(generate, parents=[model, session_target])
builder.add_action(session_target, parents=[generate])
builder.add_action(save, parents=[generate])
workflow = await builder.start(conn)
print(f"Workflow: {workflow.id()}")

In [None]:
async for log in workflow.logs():
    print(log)

In [None]:
syn_large = None
async for sds in workflow.datasets():
    syn_large = await sds.to_local(conn)
syn_large.to_pandas()