In [27]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '-1'  # Disable GPU usage
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.utils import to_categorical
from sklearn.metrics import accuracy_score
import glob
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import accuracy_score
from sklearn.utils import shuffle



In [51]:
# CONFIG
data_path = 'data'
labels = ['correct', 'err1', 'err2']
window_size = 65
stride = 5
batch_size = 32
learning_rate = 0.001
epochs = 20
model_save_path='./model.h5'


In [52]:
# 1. Load labeled files
def load_data(label_name):
    pattern = os.path.join(data_path, f'*{label_name}*.csv')
    files = sorted(glob.glob(pattern))
    data_list = []
    for file in files:
        df = pd.read_csv(file)
        df['label'] = label_name
        df['file'] = os.path.basename(file)  # track file origin
        data_list.append(df)
    return data_list

data_dict = {label: load_data(label) for label in labels}
# print(data_dict['correct'])
# 2. Split into 4 train, 1 test per label
train_raw, test_raw = [], []
for label in labels:
    train_raw.extend(data_dict[label][:4])
    test_raw.extend(data_dict[label][4:5])  # only 1 file for test per label

# 3. Fit scaler only on training data
scaler = StandardScaler()
train_concat = pd.concat(train_raw)
scaler.fit(train_concat[['ax', 'ay', 'az']])

# 4. Apply normalization to each file independently
def normalize_files(file_list):
    normalized = []
    for df in file_list:
        df[['ax', 'ay', 'az']] = scaler.transform(df[['ax', 'ay', 'az']])
        normalized.append(df)
    return normalized

train_norm = normalize_files(train_raw)
test_norm = normalize_files(test_raw)


In [53]:

print(train_norm)


[      timestamp        ax        ay        az    label            file
0         11671  0.672683 -0.126569  1.398738  correct  correct数据1.csv
1         11694  0.636174 -0.138346  1.357783  correct  correct数据1.csv
2         11717  0.530702 -0.083877  1.175760  correct  correct数据1.csv
3         11740  0.579381 -0.066212  1.346407  correct  correct数据1.csv
4         11763  0.550985 -0.008799  1.323654  correct  correct数据1.csv
...         ...       ...       ...       ...      ...             ...
1026      35269  0.561126 -0.175149  1.205339  correct  correct数据1.csv
1027      35292  0.463768 -0.176621  1.173485  correct  correct数据1.csv
1028      35315  0.569240 -0.154539  1.298626  correct  correct数据1.csv
1029      35338  0.484051 -0.257587  1.223541  correct  correct数据1.csv
1030      35361  0.402919 -0.303223  1.175760  correct  correct数据1.csv

[1031 rows x 6 columns],       timestamp        ax        ay        az    label            file
0         56221  4.826649 -1.586912  3.535238  cor

In [54]:
# 5. Sliding window (within file only)
label_map = {label: i for i, label in enumerate(labels)}

def create_windows_from_files(file_list):
    X, y = [], []
    for df in file_list:
        data = df[['ax', 'ay', 'az']].values
        label = label_map[df['label'].iloc[0]]  # label is constant in file
        for i in range(0, len(df) - window_size + 1, stride):
            window = data[i:i + window_size]
            X.append(window)
            y.append(label)
    return np.array(X), np.array(y)

X_train, y_train = create_windows_from_files(train_norm)
X_test, y_test = create_windows_from_files(test_norm)

# 6. Shuffle train data
X_train, y_train = shuffle(X_train, y_train, random_state=42)

# 7. One-hot encode
y_train_cat = to_categorical(y_train, num_classes=len(labels))
y_test_cat = to_categorical(y_test, num_classes=len(labels))


In [55]:
print(X_train.shape, y_train_cat.shape)
print(X_test.shape, y_test_cat.shape)
# print(X_train)

(2328, 65, 3) (2328, 3)
(582, 65, 3) (582, 3)


In [56]:
# 8. Build LSTM model
model = Sequential([
    LSTM(64, input_shape=(window_size, 3)),
    Dense(64, activation='relu'),
    Dense(len(labels), activation='softmax')
])

optimizer = Adam(learning_rate=learning_rate)
model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

# 9. Train
model.fit(X_train, y_train_cat, epochs=epochs, batch_size=batch_size)

model.save(model_save_path)


Epoch 1/20


  super().__init__(**kwargs)


[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 16ms/step - accuracy: 0.7812 - loss: 0.6437
Epoch 2/20
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 16ms/step - accuracy: 0.9923 - loss: 0.0513
Epoch 3/20
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 17ms/step - accuracy: 0.9925 - loss: 0.0428
Epoch 4/20
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 16ms/step - accuracy: 0.9893 - loss: 0.0550
Epoch 5/20
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 15ms/step - accuracy: 0.9926 - loss: 0.0373
Epoch 6/20
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 17ms/step - accuracy: 0.9934 - loss: 0.0375
Epoch 7/20
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 16ms/step - accuracy: 0.9952 - loss: 0.0258
Epoch 8/20
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 15ms/step - accuracy: 0.9921 - loss: 0.0473
Epoch 9/20
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m



In [57]:
# 10. Evaluate
y_pred = model.predict(X_test)
y_pred_labels = np.argmax(y_pred, axis=1)
accuracy = accuracy_score(y_test, y_pred_labels)

print(f"✅ Test Accuracy: {accuracy:.2%}")

[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step
✅ Test Accuracy: 100.00%
