# Due to data policies, this data can't be shared to central location where the model is stored and refined.

In [1]:
import rockfish as rf
import rockfish.actions as ra

In [16]:
data = rf.Dataset.from_csv('loc1', './location1.csv')

In [17]:
data.to_pandas().head()

Unnamed: 0,timestamp,amount,age,gender,merchant,category,fraud,customer
0,2023-01-04 01:13:59.021,172.731084,3,F,M480139044,health,1,108.0
1,2023-01-09 11:44:38.836,132.292031,4,F,M1823072687,health,1,608.0
2,2023-01-10 09:37:59.665,312.221864,4,F,M480139044,health,1,608.0
3,2023-01-19 03:18:15.512,234.86752,3,M,M480139044,health,1,618.0
4,2023-01-12 06:58:34.742,218.621997,2,F,M480139044,health,1,714.0


In [18]:
conn = rf.Connection.from_env()

In [19]:
join_customer = ra.JoinFields(fields=['customer', 'email'], append_field='customer')
join_merchant = ra.JoinFields(fields=['merchant', 'category'], append_field='merchant')
split_customer = ra.SplitField(field='customer', append_fields=['customer', 'email'])
split_merchant = ra.SplitField(field='merchant', append_fields=['merchant', 'category'])

In [20]:
encoder_config = ra.TrainTimeGAN.DatasetConfig(
    timestamp=ra.TrainTimeGAN.TimestampConfig(field="timestamp"),
    metadata=[
        ra.TrainTimeGAN.FieldConfig(field="customer", type="session"),
        ra.TrainTimeGAN.FieldConfig(field="age", type="categorical"),
        ra.TrainTimeGAN.FieldConfig(field="gender", type="categorical"),
    ],
    measurements=[
        ra.TrainTimeGAN.FieldConfig(field="merchant", type="categorical"),
        ra.TrainTimeGAN.FieldConfig(field="category", type="categorical"),
        ra.TrainTimeGAN.FieldConfig(field="amount", type="continuous"),
        ra.TrainTimeGAN.FieldConfig(field="fraud", type="categorical"),
    ],
)

model_config = ra.TrainTimeGAN.DGConfig(
    sample_len=4,
    epoch=250,
    epoch_checkpoint_freq=100,
    sessions=4000,
    batch_size=512,
    activate_normalization_per_sample=False,
    g_lr=0.001,
    d_lr=0.001,
    attr_d_lr=0.001,
    generator_attribute_num_units=100,
    generator_attribute_num_layers=3,
    generator_feature_num_units=100,
    generator_feature_num_layers=5,
    discriminator_num_layers=5,
    discriminator_num_units=200,
    attr_discriminator_num_layers=5,
    attr_discriminator_num_units=200
)

config = ra.TrainTimeGAN.Config(
    encoder=encoder_config,
    doppelganger=model_config,
)
train = ra.TrainTimeGAN(config)

In [21]:
train.config()

Config(
    doppelganger=DGConfig(
        sample_len=4,
        activate_normalization_per_sample=False,
        generator_attribute_num_layers=3,
        generator_feature_num_layers=5,
        epoch=250,
        epoch_checkpoint_freq=100,
        batch_size=512,
        g_lr=0.001,
        d_lr=0.001,
        attr_d_beta1=0.5,
        sessions=4000,
        extras={
            'attr_d_lr': 0.001,
            'generator_attribute_num_units': 100,
            'generator_feature_num_units': 100,
            'discriminator_num_layers': 5,
            'discriminator_num_units': 200,
            'attr_discriminator_num_layers': 5,
            'attr_discriminator_num_units': 200
        }
    ),
    encoder=DatasetConfig(
        name='default',
        metadata=[
            FieldConfig(field='customer', type='session', semantic_type=None),
            FieldConfig(field='age', type='categorical', semantic_type=None),
            FieldConfig(
                field='gender',
              

In [22]:
builder = rf.WorkflowBuilder()
builder.add_path(data, train)
workflow = await builder.start(conn)
workflow.id()

'5CZTvI0AdaS2OsdKGvq30C'

In [23]:
async for log in workflow.logs():
    print(log)

2024-09-20T21:07:54Z dataset-load: INFO Loading dataset '4nEHxcETz97dTm43RwuNPO' with 7386 rows
2024-09-20T21:07:55Z train-time-gan: INFO Starting DG training job
2024-09-20T21:07:57Z train-time-gan: INFO Epoch 1 completed.
2024-09-20T21:07:58Z train-time-gan: INFO Epoch 2 completed.
2024-09-20T21:07:59Z train-time-gan: INFO Epoch 3 completed.
2024-09-20T21:08:00Z train-time-gan: INFO Epoch 4 completed.
2024-09-20T21:08:01Z train-time-gan: INFO Epoch 5 completed.
2024-09-20T21:08:02Z train-time-gan: INFO Epoch 6 completed.
2024-09-20T21:08:03Z train-time-gan: INFO Epoch 7 completed.
2024-09-20T21:08:04Z train-time-gan: INFO Epoch 8 completed.
2024-09-20T21:08:05Z train-time-gan: INFO Epoch 9 completed.
2024-09-20T21:08:06Z train-time-gan: INFO Epoch 10 completed.
2024-09-20T21:08:07Z train-time-gan: INFO Epoch 11 completed.
2024-09-20T21:08:09Z train-time-gan: INFO Epoch 12 completed.
2024-09-20T21:08:10Z train-time-gan: INFO Epoch 13 completed.
2024-09-20T21:08:11Z train-time-gan: INF

In [24]:
(await (await workflow.models().last()).add_labels(conn,location='location1_x')).labels

{'location': 'location1_x', 'workflow_id': '5CZTvI0AdaS2OsdKGvq30C'}