In [None]:
import os
import glob
import numpy as np
import pandas as pd


from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import GridSearchCV
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

In [25]:
# ============================================
# 1. 滑动窗口
# ============================================
def sliding_window(data, label, window_size=20, step_size=10):

    X, y = [], []
    T = data.shape[0]

    for start in range(0, T - window_size + 1, step_size):
        end = start + window_size
        X.append(data[start:end])
        y.append(label)

    return np.array(X), np.array(y)


# ============================================
# 2. 数据加载（每个 CSV 内时间切分）
# ============================================
def load_dataset(root_dir,
                 window_size=20,
                 step_size=10,
                 train_ratio=0.6,
                 val_ratio=0.2):

    label_map = {
        'Lhemiplegia': 0,
        'Rhemiplegia': 1,
        'normal': 2,
        'Parkinson': 3
    }

    X_train_all, y_train_all = [], []
    X_val_all,   y_val_all   = [], []
    X_test_all,  y_test_all  = [], []

    csv_files = sorted(glob.glob(os.path.join(root_dir, '*.csv')))
    if len(csv_files) == 0:
        raise RuntimeError('未找到 CSV 文件')

    # ---------- 归一化 ----------
    train_sensor_pool = []

    for csv_file in csv_files:
        df = pd.read_csv(csv_file, skiprows=1)
        sensor_data = df.iloc[:, 0:4].values.astype(np.float32)

        T = len(sensor_data)
        t_train = int(T * train_ratio)

        train_sensor_pool.append(sensor_data[:t_train])

    scaler = MinMaxScaler()
    scaler.fit(np.vstack(train_sensor_pool))

    # ---------- 构建数据 ----------
    for csv_file in csv_files:
        df = pd.read_csv(csv_file, skiprows=1)

        sensor_data = df.iloc[:, 0:4].values.astype(np.float32)
        sensor_data = scaler.transform(sensor_data)

        raw_label = df.iloc[0, -1]
        label = label_map[raw_label]

        T = len(sensor_data)
        t_train = int(T * train_ratio)
        t_val   = int(T * (train_ratio + val_ratio))

        data_train = sensor_data[:t_train]
        data_val   = sensor_data[t_train:t_val]
        data_test  = sensor_data[t_val:]

        X_tr, y_tr = sliding_window(data_train, label, window_size, step_size)
        X_va, y_va = sliding_window(data_val,   label, window_size, step_size)
        X_te, y_te = sliding_window(data_test,  label, window_size, step_size)

        X_train_all.append(X_tr)
        y_train_all.append(y_tr)
        X_val_all.append(X_va)
        y_val_all.append(y_va)
        X_test_all.append(X_te)
        y_test_all.append(y_te)

    return (
        np.concatenate(X_train_all),
        np.concatenate(X_val_all),
        np.concatenate(X_test_all),
        np.concatenate(y_train_all),
        np.concatenate(y_val_all),
        np.concatenate(y_test_all)
    )


# ============================================
# 3. 展平窗口
# ============================================
def flatten_windows(X):
    N, T, C = X.shape
    return X.reshape(N, T * C)

data_dir = './Datasets/abnormal'

X_train, X_val, X_test, y_train, y_val, y_test = load_dataset(
        root_dir=data_dir,
        window_size=20,
        step_size=10
    )

X_train = flatten_windows(X_train)
X_val   = flatten_windows(X_val)
X_test  = flatten_windows(X_test)


Train: (291, 80) [0 1 2 3]
Val  : (93, 80) [0 1 2 3]
Test : (93, 80) [0 1 2 3]


In [26]:
# ========================================
# 5. Decision Tree + GridSearch
# ========================================
clf = DecisionTreeClassifier(random_state=42)

param_grid = {
        'criterion': ['gini'],
        'class_weight': ['balanced'],
        'max_depth': [5, 10, 20],
        'min_samples_leaf': [1, 2, 3, 5, 10]
    }

grid = GridSearchCV(
        clf,
        param_grid,
        cv=10,
        scoring='accuracy',
        n_jobs=-1
    )

grid.fit(X_train, y_train)

print("Best params:", grid.best_params_)
print("Best CV acc:", grid.best_score_)

best_model = grid.best_estimator_

# ========================================
# 6. 测试集评估
# ========================================
y_pred = best_model.predict(X_test)

print("\nTest accuracy:",accuracy_score(y_test, y_pred))

print(classification_report(y_test, y_pred,target_names=["Lhemiplegia", "Rhemiplegia", "Normal", "Parkinson"]))


Best params: {'class_weight': 'balanced', 'criterion': 'gini', 'max_depth': 10, 'min_samples_leaf': 1}
Best CV acc: 0.9180459770114942

Test accuracy: 0.8387096774193549
              precision    recall  f1-score   support

 Lhemiplegia       0.64      0.73      0.68        22
 Rhemiplegia       1.00      1.00      1.00        25
      Normal       1.00      0.89      0.94        28
   Parkinson       0.67      0.67      0.67        18

    accuracy                           0.84        93
   macro avg       0.83      0.82      0.82        93
weighted avg       0.85      0.84      0.84        93



In [30]:
# -------------------------------
# Random Forest + GridSearch
# -------------------------------

rf = RandomForestClassifier(random_state=42)

param_grid = {
        'n_estimators': [100, 200,300],
        'max_depth': [5, 10, 20, 30],
        'min_samples_leaf': [1, 3, 5],
        'class_weight': ['balanced']
    }

grid = GridSearchCV(
        rf,
        param_grid,
        cv=10,
        scoring='accuracy',
        n_jobs=-1
)

grid.fit(X_train, y_train)

print("Best parameters:", grid.best_params_)
print("Best CV accuracy:", grid.best_score_)

best_model = grid.best_estimator_

# -------------------------------
# 测试集评估
# -------------------------------
y_pred = best_model.predict(X_test)

print("\nTest accuracy:",accuracy_score(y_test, y_pred))

print(classification_report(y_test, y_pred,
        target_names=["Lhemiplegia", "Rhemiplegia", "Normal", "Parkinson"]))


Best parameters: {'class_weight': 'balanced', 'max_depth': 10, 'min_samples_leaf': 1, 'n_estimators': 200}
Best CV accuracy: 0.9796551724137931

Test accuracy: 0.8279569892473119
              precision    recall  f1-score   support

 Lhemiplegia       0.59      1.00      0.75        22
 Rhemiplegia       1.00      0.96      0.98        25
      Normal       0.97      1.00      0.98        28
   Parkinson       1.00      0.17      0.29        18

    accuracy                           0.83        93
   macro avg       0.89      0.78      0.75        93
weighted avg       0.89      0.83      0.79        93

