In [None]:
import pandas as pd
import numpy as np
import plotly.express as px

import plotly.io as pio
pio.renderers.default = "browser"
from pathlib import Path

Bit flag position

In [None]:
OVERRUN_BIT = 0
COMM_FAILURE_BIT = 1

### Load acquisition data

In [None]:
path = list(Path("raw_dataset").glob("benchy_27*"))[0]
print(path)
df = pd.read_csv(path)
df_test = df.drop_duplicates(subset=["block"])

### Data Analysis

In [None]:
samples = len(df.index)
time = (samples/3200)/3600
print(f"Samples: {samples}")
print(f"Estimated hours: {time}")

acquisition status analysis

In [None]:
overrun = np.array(list(map(lambda x: x & (1 << OVERRUN_BIT), df_test["overrun"])))
comm_failure = np.array(list(map(lambda x: x & (1 << COMM_FAILURE_BIT), df_test["overrun"])))
max_queue_size = np.array(df_test['queue_state']).max()
count_issues = (df_test["count"]>32).sum()

print(f"overrun failures: {overrun.sum()}")
print(f"comm failures: {comm_failure.sum()}")
print(f"max queue: {max_queue_size}")
print(f"count issues: {count_issues}")

In [None]:
df[df["block"].diff() < 0]

In [None]:
df[df["block"].diff() > 1]

NaN values verification

In [None]:
print(f'Accel_x NaNs: {df["accel_x"].isna().any()}')
print(f'Accel_y NaNs: {df["accel_y"].isna().any()}')
print(f'Accel_z NaNs: {df["accel_z"].isna().any()}')

In [None]:
df[df["accel_x"].isna()]

### Data view

In [None]:
df_view = df.iloc[16600000:17000000]

In [None]:
df_view["index"] = df_view.index
fig = px.line(df_view, x='index', y='accel_x', title='Sensor Data Over Time')
fig.update_layout(xaxis_title='Time', yaxis_title='Sensor Value')
fig.show()
print(path)

### Data Labeling

In [None]:
df_printing = df.iloc[70500:16754000]
df_printing["class"] = "nozzle_02"
df_printing["class"] = df_printing["class"].astype("category")
df_printing = df_printing.drop(columns=["block","count","overrun","queue_state"])

In [None]:
print(f'Accel_x NaNs: {df_printing["accel_x"].isna().any()}')
print(f'Accel_y NaNs: {df_printing["accel_y"].isna().any()}')
print(f'Accel_z NaNs: {df_printing["accel_z"].isna().any()}')

In [None]:
df_printing["accel_x"] = df_printing["accel_x"].astype("int16")
df_printing["accel_y"] = df_printing["accel_y"].astype("int16")
df_printing["accel_z"] = df_printing["accel_z"].astype("int16")

In [None]:
df_printing.dtypes

In [None]:
print(path)

In [None]:
df_printing.to_parquet('labeled_dataset_typed/benchy_27_nozzle_02.parquet.gzip',
              compression='gzip')

Verify saved data

In [None]:
pd.read_parquet('labeled_dataset_typed/benchy_27_nozzle_02.parquet.gzip')