In [12]:
import pandas as pd
from rockfish import LogLevel


def sessionise_data(df, start_session =1):
    # Ensure the timestamp is in datetime format
    df['timestamp'] = pd.to_datetime(df['timestamp'])
    
    # Sort the dataframe by timestamp
    df = df.sort_values(by='timestamp').reset_index(drop=True)
    
    def generate_sessions(df, window_size=24, step_size=1):
        sessions = []
        start_time = df['timestamp'].min()
        end_time = df['timestamp'].max()
        
        current_time = start_time
        session_id = 1
        
        while current_time + pd.Timedelta(hours=window_size) <= end_time:
            session_df = df[(df['timestamp'] >= current_time) & (df['timestamp'] < current_time + pd.Timedelta(hours=window_size))].copy()
            session_df['sessionID'] = session_id
            sessions.append(session_df)
            
            current_time += pd.Timedelta(hours=step_size)
            session_id += 1
        
        return pd.concat(sessions, ignore_index=True)
    
    # Generate sessions
    sessions_df = generate_sessions(df)
    return sessions_df

In [20]:
df = pd.read_csv('location3.csv')

In [21]:
df = sessionise_data(df)

In [22]:
df.to_csv('location3.csv',index=False)

In [23]:
df.columns

Index(['feature_0', 'feature_1', 'feature_2', 'feature_3', 'feature_4',
       'feature_5', 'feature_6', 'feature_7', 'feature_8', 'feature_9',
       'feature_10', 'feature_11', 'feature_12', 'feature_13', 'feature_14',
       'feature_15', 'feature_16', 'feature_17', 'feature_18', 'feature_19',
       'feature_20', 'feature_21', 'feature_22', 'feature_23', 'feature_24',
       'timestamp', 'sessionID'],
      dtype='object')

In [24]:
df = pd.read_csv('location1.csv').iloc[:10000]

In [25]:
df.to_csv('onboarding_sample.csv', index=False)

In [26]:
import pickle
q = pickle.load(open('runtime_conf.pkl','rb'))

In [37]:
from actions.dg.train import TrainTimeGAN
r = TrainTimeGAN(rf.converter.unstructure(q.actions['train-time-gan'].config()))

In [38]:
r

<actions.dg.train.TrainTimeGAN at 0x38f1cec10>

In [39]:
q

<rockfish.workflow.WorkflowBuilder at 0x349d77f50>

In [41]:
q.actions['train-time-gan'] = r

In [43]:
q.actions

{'datastream-load': <rockfish.actions.datastream.DatastreamLoad at 0x33ee895d0>,
 'train-time-gan': <actions.dg.train.TrainTimeGAN at 0x38f1cec10>}

In [44]:
pickle.dump(q, open('runtime_conf.pkl','wb'))

In [45]:
r

<actions.dg.train.TrainTimeGAN at 0x38f1cec10>

In [53]:
from rockfish.events import LogLevel
builder = rf.WorkflowBuilder()
builder.add_path(rf.Dataset.from_csv('he', 'location1.csv'), r)
workflow = await builder.start(rf.Connection.local())
async for log in workflow.logs(level=LogLevel.DEBUG):
    print(log)

2024-09-26T20:18:55.357578Z dataset-load: INFO Loading dataset 'Y07byMzJiv0Z4VugaHeRL' with 1002240 rows
2024-09-26T20:18:55.389857Z train-time-gan: DEBUG Training on mps:1
2024-09-26T20:19:00.588911Z train-time-gan: INFO Starting DG training job
2024-09-26T20:19:13.548179Z train-time-gan: DEBUG Epoch 1, g_loss_d: -3.8544, g_loss: -2.7139, d_loss_fake:  3.9665, d_loss_real: -1292.2047, d_loss_gp:  59.7385, d_loss: -690.8535, g_loss_attr_d:  1.1405, attr_d_loss_fake: -1.0972, attr_d_loss_real:  0.3274, attr_d_loss_gp:  0.0143, attr_d_loss: -0.6272
2024-09-26T20:19:13.551315Z train-time-gan: INFO Epoch 1 completed.
2024-09-26T20:19:24.267152Z train-time-gan: DEBUG Epoch 2, g_loss_d: -3.7732, g_loss: -1.1264, d_loss_fake:  3.7545, d_loss_real: -1175.9513, d_loss_gp:  51.4496, d_loss: -657.7007, g_loss_attr_d:  2.6468, attr_d_loss_fake: -2.6076, attr_d_loss_real:  1.3635, attr_d_loss_gp:  0.0038, attr_d_loss: -1.2058
2024-09-26T20:19:24.269799Z train-time-gan: INFO Epoch 2 completed.
2024-

CancelledError: 

In [51]:
await workflow.stop()

In [56]:
df = pd.read_csv('location1.csv')

In [48]:
df.fillna(0, inplace=True)

In [49]:
df.to_csv('location1.csv', index=False)

In [54]:
conn = rf.Connection.from_config()

In [55]:
(await conn.get_workflow('5r4AlW5j3m6WgwTngF5xuZ')).stop()

<coroutine object Workflow.stop at 0x14f3e60a0>

In [57]:
df.sessionID.max()

696

In [58]:
df[df.sessionID == 100].index

Index([142560, 142561, 142562, 142563, 142564, 142565, 142566, 142567, 142568,
       142569,
       ...
       143990, 143991, 143992, 143993, 143994, 143995, 143996, 143997, 143998,
       143999],
      dtype='int64', length=1440)

In [59]:
len(df)

1002240

In [64]:
df.iloc[:1000].to_csv('locationx.csv', index=False)