# Due to data policies, this data can't be shared to central location where the model is stored and refined.

In [1]:
import rockfish as rf
import rockfish.actions as ra

In [2]:
data = rf.Dataset.from_csv('loc1', './location1.csv')

In [5]:
data.to_pandas().head()

Unnamed: 0,customer,email,age,gender,merchant,category,amount,fraud,timestamp
0,C100045114,nmontgomery@example.net,4,M,M348934600,transportation,35.13,0,2023-01-01 00:00:00
1,C100045114,nmontgomery@example.net,4,M,M348934600,transportation,27.63,0,2023-01-01 08:00:00
2,C100045114,nmontgomery@example.net,4,M,M348934600,transportation,13.46,0,2023-01-01 16:00:00
3,C100045114,nmontgomery@example.net,4,M,M348934600,transportation,28.86,0,2023-01-02 00:00:00
4,C100045114,nmontgomery@example.net,4,M,M151143676,barsandrestaurants,64.99,0,2023-01-02 08:00:00


In [2]:
conn = rf.Connection.from_env()

In [8]:
join_customer = ra.JoinFields(fields=['customer', 'email'], append_field='customer')
join_merchant = ra.JoinFields(fields=['merchant', 'category'], append_field='merchant')
split_customer = ra.SplitField(field='customer', append_fields=['customer', 'email'])
split_merchant = ra.SplitField(field='merchant', append_fields=['merchant', 'category'])

In [73]:
encoder_config = ra.TrainTimeGAN.DatasetConfig(
    timestamp=ra.TrainTimeGAN.TimestampConfig(field="timestamp"),
    metadata=[
        ra.TrainTimeGAN.FieldConfig(field="customer", type="session"),
        ra.TrainTimeGAN.FieldConfig(field="age", type="categorical"),
        ra.TrainTimeGAN.FieldConfig(field="gender", type="categorical"),
    ],
    measurements=[
        ra.TrainTimeGAN.FieldConfig(field="merchant", type="categorical"),
        ra.TrainTimeGAN.FieldConfig(field="category", type="categorical"),
        ra.TrainTimeGAN.FieldConfig(field="amount", type="continuous"),
        ra.TrainTimeGAN.FieldConfig(field="fraud", type="categorical"),
    ],
)

model_config = ra.TrainTimeGAN.DGConfig(
    sample_len=4,
    epoch=250,
    epoch_checkpoint_freq=100,
    sessions=4000,
    batch_size=512,
    activate_normalization_per_sample=False,
    g_lr=0.001,
    d_lr=0.001,
    attr_d_lr=0.001,
    generator_attribute_num_units=100,
    generator_attribute_num_layers=3,
    generator_feature_num_units=100,
    generator_feature_num_layers=5,
    discriminator_num_layers=5,
    discriminator_num_units=200,
    attr_discriminator_num_layers=5,
    attr_discriminator_num_units=200
)

config = ra.TrainTimeGAN.Config(
    encoder=encoder_config,
    doppelganger=model_config,
)
train = ra.TrainTimeGAN(config)

In [74]:
train.config()

Config(
    doppelganger=DGConfig(
        sample_len=4,
        activate_normalization_per_sample=False,
        generator_attribute_num_layers=3,
        generator_feature_num_layers=5,
        epoch=250,
        epoch_checkpoint_freq=100,
        batch_size=512,
        g_lr=0.001,
        d_lr=0.001,
        attr_d_beta1=0.5,
        sessions=4000,
        extras={
            'attr_d_lr': 0.001,
            'generator_attribute_num_units': 100,
            'generator_feature_num_units': 100,
            'discriminator_num_layers': 5,
            'discriminator_num_units': 200,
            'attr_discriminator_num_layers': 5,
            'attr_discriminator_num_units': 200
        }
    ),
    encoder=DatasetConfig(
        name='default',
        metadata=[
            FieldConfig(field='customer', type='session', semantic_type=None),
            FieldConfig(field='age', type='categorical', semantic_type=None),
            FieldConfig(
                field='gender',
              

In [75]:
builder = rf.WorkflowBuilder()
builder.add_path(data, train)
workflow = await builder.start(conn)
workflow.id()

'6UfjK0pqV7S2RN3syFcVH2'

In [76]:
async for log in workflow.logs():
    print(log)

2024-09-12T23:34:49Z dataset-load: INFO Loading dataset '2mimoHkX8GWxCnL7ssF7ku' with 8779 rows
2024-09-12T23:34:49Z train-time-gan: WARN Unsafe time cast on timestamp
2024-09-12T23:34:49Z train-time-gan: INFO Starting DG training job
2024-09-12T23:34:50Z train-time-gan: INFO Epoch 1 completed.
2024-09-12T23:34:50Z train-time-gan: INFO Epoch 2 completed.
2024-09-12T23:34:50Z train-time-gan: INFO Epoch 3 completed.
2024-09-12T23:34:50Z train-time-gan: INFO Epoch 4 completed.
2024-09-12T23:34:50Z train-time-gan: INFO Epoch 5 completed.
2024-09-12T23:34:50Z train-time-gan: INFO Epoch 6 completed.
2024-09-12T23:34:51Z train-time-gan: INFO Epoch 7 completed.
2024-09-12T23:34:51Z train-time-gan: INFO Epoch 8 completed.
2024-09-12T23:34:51Z train-time-gan: INFO Epoch 9 completed.
2024-09-12T23:34:51Z train-time-gan: INFO Epoch 10 completed.
2024-09-12T23:34:51Z train-time-gan: INFO Epoch 11 completed.
2024-09-12T23:34:51Z train-time-gan: INFO Epoch 12 completed.
2024-09-12T23:34:51Z train-tim

In [81]:
(await (await workflow.models().last()).add_labels(conn,location='location1')).labels

{'location': 'location1', 'workflow_id': '6UfjK0pqV7S2RN3syFcVH2'}