In [None]:
import pandas as pd
import numpy as np
import joblib
import os
import gc

from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Input, Flatten, Dense, Reshape
from tensorflow.keras.callbacks import EarlyStopping

In [49]:
# # Load
# input_file = "combined_data.csv"
# output_file = "train_data_200k.csv"
# df = pd.read_csv(input_file, nrows=200000)
# df.to_csv(output_file, index=False)

In [None]:
TIME_STEPS = 10
BATCH = 16
EPOCHS = 20
MAX_SEQ = 20000   # optional cap để tiết kiệm RAM
FEATURE_COLS = ['tempC','accX','accY','accZ','gyroX','gyroY','gyroZ','angleX','angleY','angleZ','hr']

In [50]:
df = pd.read_csv("train_data_200k.csv")
df.head()

Unnamed: 0,time,tempC,accX,accY,accZ,gyroX,gyroY,gyroZ,angleX,angleY,angleZ,hr
0,2021-01-01 11:22:48.305804,33.56,1.004385,-0.6937,-0.366474,1.15478,0.162031,-1.012268,0.0,0.0,0.0,106.382999
1,2021-01-01 11:22:48.307804,33.56,1.004934,-0.696874,-0.370197,1.149587,0.175879,-1.005211,-0.637444,-0.066491,0.576309,106.382999
2,2021-01-01 11:22:48.309804,33.56,1.005605,-0.698339,-0.38155,1.13707,0.198783,-0.98537,-1.268609,-0.139815,1.141699,106.382999
3,2021-01-01 11:22:48.311804,33.56,1.005605,-0.698339,-0.38155,1.13707,0.198783,-0.98537,-1.900552,-0.206855,1.707557,106.382999
4,2021-01-01 11:22:48.313804,33.56,1.007192,-0.700902,-0.383748,1.129213,0.206506,-0.972453,-2.528838,-0.272241,2.266426,106.382999


In [75]:
# Chuẩn hoá dữ liệu
scaler = MinMaxScaler()
data_scaled = scaler.fit_transform(df[FEATURE_COLS].values)
joblib.dump(scaler, "scaler.pkl")

mins = scaler.data_min_
maxs = scaler.data_max_
np.save("feature_mins.npy", mins)
np.save("feature_maxs.npy", maxs)

In [76]:
def create_sequences(arr, time_steps=TIME_STEPS):
    X = []
    for i in range(len(arr) - time_steps + 1):
        X.append(arr[i:i+time_steps])
    return np.array(X)

X = create_sequences(data_scaled, TIME_STEPS)
if len(X) > MAX_SEQ:
    X = X[:MAX_SEQ]
print("Sequences shape:", X.shape)  # (N_seq, 10, 11)

Sequences shape: (20000, 10, 11)


In [None]:
# Tạo model
n_features = X.shape[2]
model = Sequential([
    Input(shape=(TIME_STEPS, n_features)),
    Flatten(),
    Dense(64, activation='relu'),
    Dense(32, activation='relu'),
    Dense(64, activation='relu'),
    Dense(TIME_STEPS * n_features, activation='sigmoid'),  # vì dùng MinMaxScaler -> in [0,1]
    Reshape((TIME_STEPS, n_features))
])
model.compile(optimizer='adam', loss='mse')
model.summary()

In [78]:
# Huấn luyện
early_stop = EarlyStopping(monitor='loss', patience=3, restore_best_weights=True)
history = model.fit(X, 
                    X, 
                    batch_size=BATCH, 
                    epochs=EPOCHS, 
                    shuffle=True,
                    callbacks=[early_stop], 
                    verbose=1)



Epoch 1/20
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 3ms/step - loss: 0.0051
Epoch 2/20
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - loss: 0.0013
Epoch 3/20
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - loss: 9.7896e-04
Epoch 4/20
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 4ms/step - loss: 7.2699e-04
Epoch 5/20
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 6ms/step - loss: 5.3918e-04
Epoch 6/20
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - loss: 4.7134e-04
Epoch 7/20
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - loss: 4.4188e-04
Epoch 8/20
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - loss: 4.2761e-04
Epoch 9/20
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - loss: 4.1754e-04
Epoch 10/20
[1m1250/1250[0m [32m━━━━━━━━━

In [79]:
X_pred = model.predict(X, verbose=0)
mse = np.mean(np.power(X - X_pred, 2), axis=(1,2))  # MSE per sequence
thr95 = np.percentile(mse, 95)
thr99 = np.percentile(mse, 99)
print("Threshold 95pct:", thr95, "  99pct:", thr99)


Threshold 95pct: 0.0007462859943076039   99pct: 0.0032732214752728738


In [80]:
np.save("mse_threshold_95.npy", np.array([thr95]))
np.save("mse_threshold_99.npy", np.array([thr99]))
model.save("dense_autoencoder_model.h5")




In [83]:
# Convert sang TFLite int8

import tensorflow as tf

# Representative dataset
def representative_data_gen():
    for i in range(0, len(X), BATCH):
        batch = X[i:i+BATCH].astype(np.float32)
        if batch.shape[0] == 0:
            continue
        yield [batch]

converter = tf.lite.TFLiteConverter.from_keras_model(model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.representative_dataset = representative_data_gen
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
converter.inference_input_type = tf.int8
converter.inference_output_type = tf.int8

tflite_quant_model = converter.convert()
with open("dense_autoencoder_esp32_int8.tflite","wb") as f:
    f.write(tflite_quant_model)

print("Saved int8 quantized model (KB):", os.path.getsize("dense_autoencoder_esp32_int8.tflite")/1024)
gc.collect()


INFO:tensorflow:Assets written to: C:\Users\ASUS\AppData\Local\Temp\tmpzdv2rjho\assets


INFO:tensorflow:Assets written to: C:\Users\ASUS\AppData\Local\Temp\tmpzdv2rjho\assets


Saved artifact at 'C:\Users\ASUS\AppData\Local\Temp\tmpzdv2rjho'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): TensorSpec(shape=(None, 10, 11), dtype=tf.float32, name='keras_tensor_46')
Output Type:
  TensorSpec(shape=(None, 10, 11), dtype=tf.float32, name=None)
Captures:
  2413277990544: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2413277990928: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2413277990736: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2413277987664: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2413277991504: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2413277991120: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2413277991888: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2413277991696: TensorSpec(shape=(), dtype=tf.resource, name=None)




Saved int8 quantized model (KB): 29.671875


3331

In [84]:
gc.collect()

0