#### 1. Importing libraries and initial configurations

In [1]:
import duckdb

In [2]:
DB_FILE = '../../dataset/supervised_dataset.duckdb'
FINAL_TRANSFORMED_DATA_TABLE = 'transformed_data' 

#### 2. Joining sequenced_feature_table and labeled_sequences to create the transformed data table

In [6]:
try:
    conn = duckdb.connect(database=DB_FILE, read_only=False)
    print(f"Successfully connected to {DB_FILE}")

    query = f"""
    CREATE OR REPLACE TABLE {FINAL_TRANSFORMED_DATA_TABLE} AS
    SELECT
        sft.*, -- Selects all columns from the sequenced feature table
        ls.label
    FROM
        sequenced_feature_table AS sft
    INNER JOIN
        labeled_sequences AS ls ON sft.sequence_id = ls.sequence_id;
    """

    print(f"Creating the final '{FINAL_TRANSFORMED_DATA_TABLE}' table...")
    conn.execute(query)
    print("Table created successfully.")

    print("\n--- Verifying the final transformed data table (showing one sequence) ---")
    verification_df = conn.execute(
        f"SELECT user_id, timestamp, event_type, sequence_id, label FROM {FINAL_TRANSFORMED_DATA_TABLE} WHERE sequence_id = (SELECT sequence_id FROM {FINAL_TRANSFORMED_DATA_TABLE} LIMIT 1);"
    ).fetchdf()
    print(verification_df)

    print(f"\nFinal transformed data table is now successfully constructed, The table is stored as : '{FINAL_TRANSFORMED_DATA_TABLE}'.")

except Exception as e:
    print(f"An error occurred: {e}")

Successfully connected to ../../dataset/supervised_dataset.duckdb
Creating the final 'transformed_data' table...


FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Table created successfully.

--- Verifying the final transformed data table (showing one sequence) ---
    user_id           timestamp event_type  sequence_id  label
0   ABN0239 2010-03-05 14:01:13       HTTP  ABN0239_131      0
1   ABN0239 2010-03-05 14:13:37      Email  ABN0239_131      0
2   ABN0239 2010-03-05 16:35:48       HTTP  ABN0239_131      0
3   ABN0239 2010-03-05 17:07:40       HTTP  ABN0239_131      0
4   ABN0239 2010-03-05 17:23:26       HTTP  ABN0239_131      0
..      ...                 ...        ...          ...    ...
66  ABN0239 2010-03-05 16:35:19       HTTP  ABN0239_131      0
67  ABN0239 2010-03-05 17:50:09       HTTP  ABN0239_131      0
68  ABN0239 2010-03-05 17:50:50       HTTP  ABN0239_131      0
69  ABN0239 2010-03-05 17:50:16       HTTP  ABN0239_131      0
70  ABN0239 2010-03-05 17:50:37       HTTP  ABN0239_131      0

[71 rows x 5 columns]

Final transformed data table is now successfully constructed, The table is stored as : 'transformed_data'.


In [None]:
conn.close()