In [9]:
import numpy as np
from sklearn.preprocessing import OneHotEncoder
import joblib


In [10]:
# Load segmented normalized data
X = np.load("../data/processed/chest_X.npy")  # Shape: (num_samples, window_size, num_features)
y = np.load("../data/processed/chest_y.npy")  # Shape: (num_samples,)

print(f"✅ Loaded X shape: {X.shape}")
print(f"✅ Loaded y shape: {y.shape}")

✅ Loaded X shape: (171, 42000, 8)
✅ Loaded y shape: (171,)


In [14]:
# Original shape: (171, 42000, 8)
window_size = 256         # Size of each segment
stride = 128              # How much to shift the window by (50% overlap)

X_segmented = []
y_segmented = []

for i in range(X.shape[0]):
    series = X[i]
    label = y[i]
    
    for start in range(0, series.shape[0] - window_size + 1, stride):
        end = start + window_size
        X_segmented.append(series[start:end])
        y_segmented.append(label)

X_segmented = np.array(X_segmented)
y_segmented = np.array(y_segmented)

print(f"✅ Resequenced shape: {X_segmented.shape}")
print(f"✅ Labels shape: {y_segmented.shape}")

✅ Resequenced shape: (55917, 256, 8)
✅ Labels shape: (55917,)


In [22]:

# 1. Reshape y to 2D for the encoder
y_reshaped = y_segmented.reshape(-1, 1)  # shape: (55917, 1)

# 2. One-hot encode labels
encoder = OneHotEncoder(sparse_output=False)  # `sparse=False` if using older scikit-learn
y_onehot = encoder.fit_transform(y_reshaped)  # shape: (55917, 4)

print(f"✅ One-hot label shape: {y_onehot.shape}")

# 3. Repeat the labels across the sequence length (256 timesteps)
y_onehot_repeated = np.repeat(y_onehot[:, np.newaxis, :], repeats=X_segmented.shape[1], axis=1)  # shape: (55917, 256, 4)

# 4. Concatenate to X along the feature dimension
X_conditional = np.concatenate([X_segmented, y_onehot_repeated], axis=-1)  # shape: (55917, 256, 12)

print(f"✅ Conditional input shape: {X_conditional.shape}")

✅ One-hot label shape: (55917, 4)
✅ Conditional input shape: (55917, 256, 12)


In [23]:
np.save("../data/processed/chest_X_conditional.npy", X_conditional)
np.save("../data/processed/chest_y_onehot.npy", y_onehot)

In [24]:
joblib.dump(encoder, "../data/processed/chest_label_encoder.pkl")

['../data/processed/chest_label_encoder.pkl']