In [1]:
import os
import numpy as np
import pandas as pd
from scipy.stats import kurtosis, skew
from pathlib import Path

# === USER SETTINGS ===
data_dir = 'C:/Users/ammar/SHAP_ML/datasets/1st_test'
save_dir = 'C:/Users/ammar/SHAP_ML/outputs'  
max_files = None

# === FEATURE EXTRACTION ===
def extract_features(signal):
    return {
        "mean": np.mean(signal),
        "std": np.std(signal),
        "rms": np.sqrt(np.mean(signal**2)),
        "kurtosis": kurtosis(signal),
        "skewness": skew(signal),
        "peak_to_peak": np.ptp(signal),
        "crest_factor": np.max(np.abs(signal)) / np.sqrt(np.mean(signal**2))
    }

# === LOAD AND PROCESS FILES ===
all_files = sorted([f for f in Path(data_dir).rglob("*") if f.is_file()])
if max_files:
    all_files = all_files[:max_files]

data = []
for i, file_path in enumerate(all_files):
    signal = np.loadtxt(file_path)
    if signal.ndim > 1:
        signal = signal.flatten()  # Ensure it's 1D
    print(f"Processing file {i}: {file_path.name}, signal length = {len(signal)}")
    feats = extract_features(signal)
    feats["filename"] = file_path.name
    feats["timestamp_index"] = i
    data.append(feats)

df = pd.DataFrame(data)
print(df.head())
print(df.columns)
print(df.shape)

# === LABELS ===
cutoff = int(len(df) * 0.9)
df["label"] = 0
df.loc[cutoff:, "label"] = 1  # Last 10% = faulty

df["RUL"] = df["timestamp_index"].max() - df["timestamp_index"]

# === SAVE CSVs ===
df_class = df.drop(columns=["RUL"])
df_rul = df.drop(columns=["label"])

df_class.to_csv(os.path.join(save_dir, "bearing_classification.csv"), index=False)
df_rul.to_csv(os.path.join(save_dir, "bearing_rul.csv"), index=False)

print("Done! Files saved to:")
print(f"- {os.path.join(save_dir, 'bearing_classification.csv')}")
print(f"- {os.path.join(save_dir, 'bearing_rul.csv')}")

Processing file 0: 2003.10.22.12.06.24, signal length = 163840
Processing file 1: 2003.10.22.12.09.13, signal length = 163840
Processing file 2: 2003.10.22.12.14.13, signal length = 163840
Processing file 3: 2003.10.22.12.19.13, signal length = 163840
Processing file 4: 2003.10.22.12.24.13, signal length = 163840
Processing file 5: 2003.10.22.12.29.13, signal length = 163840
Processing file 6: 2003.10.22.12.34.13, signal length = 163840
Processing file 7: 2003.10.22.12.39.13, signal length = 163840
Processing file 8: 2003.10.22.12.44.13, signal length = 163840
Processing file 9: 2003.10.22.12.49.13, signal length = 163840
Processing file 10: 2003.10.22.12.54.13, signal length = 163840
Processing file 11: 2003.10.22.12.59.13, signal length = 163840
Processing file 12: 2003.10.22.13.04.13, signal length = 163840
Processing file 13: 2003.10.22.13.09.13, signal length = 163840
Processing file 14: 2003.10.22.13.14.13, signal length = 163840
Processing file 15: 2003.10.22.13.19.13, signal le