In [16]:
from glob import glob 
import os

import pandas as pd

In [28]:
os.makedirs('data/trainticket', exist_ok=True)
os.makedirs('data/eshopper', exist_ok=True)

In [20]:
def transform_float_to_int(x):
    # If x is integer-like, convert to int
    if x == int(x):
        return int(x)
    else:
        raise ValueError()

In [31]:
dataset_counter = {'trainticket':0, 'eshopper':0}

for rq in ['rq1',  'rq2', 'rq3']:
    for path in glob(f"../datasets/*/{rq}*/*.parquet"):
        # get system name
        system = path.split('/')[2]


        # Read dataframe
        df = pd.read_parquet(path)
        
        # Rename columns
        mapper = {"ts-travel-service_queryInfo": "Latency",
                  "HomeControllerHome": "Latency",
                  "get":"GatewayGet"} 
        df.rename(axis='columns', mapper=mapper, inplace=True)

        # Drop useless column
        df.drop(columns=['traceId'], inplace=True)


        # Reorder columns
        latencies = df.pop('Latency')
        df.insert(0, 'Latency', latencies)


        # Change anomaly semantic (0: no anomaly, 1: first anomaly, 2: second anomaly)
        mapper = {i:0 for i in range(2, 10)}
        mapper[0] = 1
        mapper[1] = 2
        df['anomaly'] = df.pop('experiment').astype(int).map(mapper)

        # Transform float to int
        df = df.applymap(transform_float_to_int)

        # Save dataset
        id = str(dataset_counter[system]).zfill(3)
        path = f"data/{system}/{id}.csv"
        df.to_csv(path, index=False)


        # Increment dataset counter
        dataset_counter[system] += 1

