In [1]:
import os
import pandas as pd

SRC_DIR = "/home/charles/HZU/Data_raw/MCC5/MCC5_speed"
OUT_DIR = "/home/charles/HZU/Data_processed/HSML/MCC5"
os.makedirs(OUT_DIR, exist_ok=True)

K = 50000

OUT_CSV = os.path.join(OUT_DIR, "mcc5_speed_10Nm_1000rpm_raw.csv")
OUT_MAP = os.path.join(OUT_DIR, "mcc5_class_mapping.csv")

# ======================================================
# ⭐ 固定 class 映射（来自你提供的表）
# ======================================================

CLASS_MAP = {
    "_health": 0,
    "gear_pitting_L": 1,
    "gear_pitting_M": 2,
    "gear_pitting_H": 3,
    "gear_wear_L": 4,
    "gear_wear_M": 5,
    "gear_wear_H": 6,
    "miss_teeth_health": 7,
    "teeth_break_L": 8,
    "teeth_break_M": 9,
    "teeth_break_H": 10,
    "teeth_break_and_bearing_inner_L": 11,
    "teeth_break_and_bearing_inner_M": 12,
    "teeth_break_and_bearing_inner_H": 13,
    "teeth_break_and_bearing_outer_L": 14,
    "teeth_break_and_bearing_outer_M": 15,
    "teeth_break_and_bearing_outer_H": 16,
    "teeth_crack_L": 17,
    "teeth_crack_M": 18,
    "teeth_crack_H": 19,
}

all_frames = []

print("Scanning files...")

for fname in os.listdir(SRC_DIR):

    if not fname.endswith(".csv"):
        continue

    if "10Nm-1000rpm" not in fname:
        continue

    fpath = os.path.join(SRC_DIR, fname)

    # ===============================
    # 解析 fault / severity
    # ===============================

    name = fname.replace(".csv", "")
    parts = name.split("_")

    severity = None
    fault_parts = []

    for p in parts:
        if p in ["H", "M", "L", "health"]:
            severity = p
        elif p not in ["speed", "circulation", "10Nm-1000rpm"]:
            fault_parts.append(p)

    fault = "_".join(fault_parts)

    if severity is None:
        severity = "health"

    class_name = f"{fault}_{severity}"

    if class_name not in CLASS_MAP:
        print("⚠️ Skip unknown class:", class_name)
        continue

    class_id = CLASS_MAP[class_name]

    # ===============================
    # 读取 CSV
    # ===============================

    df = pd.read_csv(fpath)

    if len(df) > K:
        df = df.sample(K, random_state=42)

    df["fault"] = fault
    df["severity"] = severity
    df["class_name"] = class_name
    df["class_id"] = class_id
    df["source_file"] = fname

    all_frames.append(df)

    print(f"Loaded {fname} -> {len(df)} rows | {class_name} -> {class_id}")

# ===============================
# 合并 + 保存
# ===============================

full = pd.concat(all_frames, ignore_index=True)
full.to_csv(OUT_CSV, index=False)

# 保存 mapping 表
map_df = pd.DataFrame({
    "class_id": list(CLASS_MAP.values()),
    "class_name": list(CLASS_MAP.keys())
}).sort_values("class_id")

map_df.to_csv(OUT_MAP, index=False)

print("\n================ RESULT ================")
print("Total samples:", len(full))
print("Total classes:", full["class_id"].nunique())
print("Saved data to:", OUT_CSV)
print("Saved mapping to:", OUT_MAP)


Scanning files...
Loaded teeth_break_and_bearing_inner_M_speed_circulation_10Nm-1000rpm.csv -> 50000 rows | teeth_break_and_bearing_inner_M -> 12
Loaded teeth_break_and_bearing_outer_M_speed_circulation_10Nm-1000rpm.csv -> 50000 rows | teeth_break_and_bearing_outer_M -> 15
Loaded teeth_break_and_bearing_outer_L_speed_circulation_10Nm-1000rpm.csv -> 50000 rows | teeth_break_and_bearing_outer_L -> 14
Loaded health_speed_circulation_10Nm-1000rpm.csv -> 50000 rows | _health -> 0
Loaded gear_pitting_H_speed_circulation_10Nm-1000rpm.csv -> 50000 rows | gear_pitting_H -> 3
Loaded teeth_break_M_speed_circulation_10Nm-1000rpm.csv -> 50000 rows | teeth_break_M -> 9
Loaded teeth_break_and_bearing_outer_H_speed_circulation_10Nm-1000rpm.csv -> 50000 rows | teeth_break_and_bearing_outer_H -> 16
Loaded teeth_break_H_speed_circulation_10Nm-1000rpm.csv -> 50000 rows | teeth_break_H -> 10
Loaded gear_wear_M_speed_circulation_10Nm-1000rpm.csv -> 50000 rows | gear_wear_M -> 5
Loaded gear_pitting_M_speed_c

In [2]:
import pandas as pd

CSV_PATH = "/home/charles/HZU/Data_processed/HSML/MCC5/mcc5_speed_10Nm_1000rpm_raw.csv"

df = pd.read_csv(CSV_PATH)

# 删除指定列
cols_to_drop = ["fault", "severity", "class_name", "source_file"]
df = df.drop(columns=cols_to_drop, errors="ignore")

# 覆盖保存（或你也可以另存为新文件）
df.to_csv(CSV_PATH, index=False)

print("✅ Done. Remaining columns:")
print(df.columns.tolist())


✅ Done. Remaining columns:
['speed', 'torque', 'motor_vibration_x', 'motor_vibration_y', 'motor_vibration_z', 'gearbox_vibration_x', 'gearbox_vibration_y', 'gearbox_vibration_z', 'class_id']
