#### 1. Importing libraries and initial configurations

In [1]:
import duckdb

In [2]:
DB_FILE = '../../dataset/supervised_dataset.duckdb'
FINAL_TRAINING_TABLE = 'training_data'

#### 2. Joining sequenced_feature_table and labeled_sequences to create the final training table  

In [4]:
try:
    conn = duckdb.connect(database=DB_FILE, read_only=False)
    print(f"Successfully connected to {DB_FILE}")

    query = f"""
    CREATE OR REPLACE TABLE {FINAL_TRAINING_TABLE} AS
    SELECT
        sft.*, -- Selects all columns from the sequenced feature table
        ls.label
    FROM
        sequenced_feature_table AS sft
    INNER JOIN
        labeled_sequences AS ls ON sft.sequence_id = ls.sequence_id;
    """

    print(f"Creating the final '{FINAL_TRAINING_TABLE}' table...")
    conn.execute(query)
    print("Table created successfully.")

    print("\n--- Verifying the final training data (showing one sequence) ---")
    verification_df = conn.execute(
        f"SELECT user_id, timestamp, event_type, sequence_id, label FROM {FINAL_TRAINING_TABLE} WHERE sequence_id = (SELECT sequence_id FROM {FINAL_TRAINING_TABLE} LIMIT 1);"
    ).fetchdf()
    print(verification_df)

    print(f"\nFinal training table is now successfully constructed, The table is stored as : '{FINAL_TRAINING_TABLE}'.")

except Exception as e:
    print(f"An error occurred: {e}")

Successfully connected to ../../dataset/supervised_dataset.duckdb
Creating the final 'training_data' table...


FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Table created successfully.

--- Verifying the final training data (showing one sequence) ---
     user_id           timestamp event_type sequence_id  label
0    KWC0004 2010-03-18 08:12:39       HTTP  KWC0004_94      0
1    KWC0004 2010-03-18 09:02:09       HTTP  KWC0004_94      0
2    KWC0004 2010-03-18 09:19:16       HTTP  KWC0004_94      0
3    KWC0004 2010-03-18 09:45:59      Email  KWC0004_94      0
4    KWC0004 2010-03-18 10:32:08       HTTP  KWC0004_94      0
..       ...                 ...        ...         ...    ...
321  KWC0004 2010-03-18 14:12:07       HTTP  KWC0004_94      0
322  KWC0004 2010-03-18 14:17:54       HTTP  KWC0004_94      0
323  KWC0004 2010-03-18 14:35:12       HTTP  KWC0004_94      0
324  KWC0004 2010-03-18 15:39:56       HTTP  KWC0004_94      0
325  KWC0004 2010-03-18 16:19:00       HTTP  KWC0004_94      0

[326 rows x 5 columns]

Final training table is now successfully constructed, The table is stored as : 'training_data'.


In [6]:
conn.close() 