In [40]:
import json, numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from pathlib import Path
from tensorflow import keras
from tensorflow.keras import layers
from collections import Counter

np.random.seed(2025)
tf.random.set_seed(2025)

TRAIN_CSV = "data/training.csv"
ART_DIR   = Path("artifacts"); ART_DIR.mkdir(exist_ok=True)

MODEL_NAME = "esp32s3_ripple_classifier"

FEATURES = ['micRMS', 'lux', 'dT_tb', 'dTdt_mid', 'DOproxy', 'tds_delta']

CLASS_NAMES = [
  "calm","cold-shock","cooling-hot","disturbance","flashlight-night",
  "glare","human-tap","manual-override","other","pump-self",
  "tds-spike","uniform-overheat"
]

In [41]:
usecols = [
    "micRMS","lux","tMid","dT_tb","DOproxy","tds_mV","irObj","irAmb",
    "airT","airRH","pressure_hPa","pump","manual_override","label"
]
df = pd.read_csv(TRAIN_CSV, usecols=usecols)

print("Rows:", len(df))
df.head()

Rows: 23592


Unnamed: 0,micRMS,lux,tMid,dT_tb,DOproxy,tds_mV,irObj,irAmb,airT,airRH,pressure_hPa,pump,manual_override,label
0,0.0,190.9,24.94,0.06,8.19,446.0,27.03,24.65,25.9,58.0,1004.7,0.0,0.0,calm
1,0.0,190.9,24.94,0.06,8.19,433.0,27.11,24.65,25.9,58.0,1004.6,0.0,0.0,calm
2,0.0,191.0,24.94,0.0,8.19,430.0,27.05,24.63,25.9,58.0,1004.7,0.0,0.0,calm
3,0.0,191.0,25.0,0.0,8.18,446.0,27.17,24.65,25.9,58.0,1004.6,0.0,0.0,calm
4,0.0,191.1,24.94,0.0,8.19,432.0,27.03,24.63,25.9,58.0,1004.7,0.0,0.0,calm


In [42]:
df = df.dropna(subset=["label"]).copy()
core = ["micRMS","lux","tMid","dT_tb","DOproxy","tds_mV"]
df = df.dropna(subset=core).reset_index(drop=True)

for k in core + ["airT","airRH","pressure_hPa","pump","manual_override","irObj","irAmb"]:
    if k in df.columns:
        df[k] = pd.to_numeric(df[k], errors="coerce")

df = df.dropna(subset=core).reset_index(drop=True)
print("Rows after cleanup:", len(df))
df.head()

Rows after cleanup: 23591


Unnamed: 0,micRMS,lux,tMid,dT_tb,DOproxy,tds_mV,irObj,irAmb,airT,airRH,pressure_hPa,pump,manual_override,label
0,0.0,190.9,24.94,0.06,8.19,446.0,27.03,24.65,25.9,58.0,1004.7,0.0,0.0,calm
1,0.0,190.9,24.94,0.06,8.19,433.0,27.11,24.65,25.9,58.0,1004.6,0.0,0.0,calm
2,0.0,191.0,24.94,0.0,8.19,430.0,27.05,24.63,25.9,58.0,1004.7,0.0,0.0,calm
3,0.0,191.0,25.0,0.0,8.18,446.0,27.17,24.65,25.9,58.0,1004.6,0.0,0.0,calm
4,0.0,191.1,24.94,0.0,8.19,432.0,27.03,24.63,25.9,58.0,1004.7,0.0,0.0,calm


In [43]:
HZ = 1
N = 60 * HZ

df = df.reset_index(drop=True)

df['dTdt_mid'] = df['tMid'].diff(N)
df['dTdt_mid'] = df['dTdt_mid'].fillna(0.0)

alpha = 0.01
tds_baseline = df['tds_mV'].ewm(alpha=alpha, adjust=False).mean()
df['tds_delta'] = (df['tds_mV'] - tds_baseline).fillna(0.0)

kept = FEATURES + ["label"]
df = df[kept].copy()

print("Final training columns:", list(df.columns))
df.head()

Final training columns: ['micRMS', 'lux', 'dT_tb', 'dTdt_mid', 'DOproxy', 'tds_delta', 'label']


Unnamed: 0,micRMS,lux,dT_tb,dTdt_mid,DOproxy,tds_delta,label
0,0.0,190.9,0.06,0.0,8.19,0.0,calm
1,0.0,190.9,0.06,0.0,8.19,-12.87,calm
2,0.0,191.0,0.0,0.0,8.19,-15.7113,calm
3,0.0,191.0,0.0,0.0,8.18,0.285813,calm
4,0.0,191.1,0.0,0.0,8.19,-13.577045,calm


In [44]:
label_map = {name:i for i,name in enumerate(CLASS_NAMES)}

def encode_label(s):
    s = str(s).strip()
    return label_map.get(s, label_map["other"])

y_idx = df["label"].map(encode_label).astype(np.int32).to_numpy()
X = df[FEATURES].astype(np.float32).to_numpy()

print("Classes present:", np.unique(y_idx))
print("X shape:", X.shape, "y shape:", y_idx.shape)

Classes present: [ 0  1  2  3  4  5  6  7  8 10 11]
X shape: (23591, 6) y shape: (23591,)


In [45]:
X_train, X_val, y_train, y_val = train_test_split(
    X, y_idx, test_size=0.2, random_state=42, stratify=y_idx if len(np.unique(y_idx))>1 else None
)

scale = np.ones(X.shape[1], dtype=np.float32)
shift = np.zeros(X.shape[1], dtype=np.float32)

X_train_s = X_train
X_val_s   = X_val

N_CLASSES = len(CLASS_NAMES)
print("Train/Val:", X_train_s.shape, X_val_s.shape, "N_CLASSES:", N_CLASSES)

Train/Val: (18872, 6) (4719, 6) N_CLASSES: 12


In [46]:
inputs = keras.Input(shape=(len(FEATURES),), dtype=tf.float32, name="x")
x = layers.Dense(24, activation="relu")(inputs)
x = layers.Dense(12, activation="relu")(x)
outputs = layers.Dense(N_CLASSES, activation="softmax")(x)

model = keras.Model(inputs, outputs, name="ripple_mcu_tiny")
model.compile(optimizer=keras.optimizers.Adam(1e-3),
              loss="sparse_categorical_crossentropy",
              metrics=["accuracy"])

model.summary()

In [47]:
callbacks = [
    keras.callbacks.ReduceLROnPlateau(patience=5, factor=0.5, verbose=1),
    keras.callbacks.EarlyStopping(patience=10, restore_best_weights=True, verbose=1)
]

hist = model.fit(
    X_train_s, y_train,
    validation_data=(X_val_s, y_val),
    epochs=100,
    batch_size=64,
    verbose=1,
    callbacks=callbacks
)

Epoch 1/100
[1m295/295[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 562us/step - accuracy: 0.4803 - loss: 9.0732 - val_accuracy: 0.7713 - val_loss: 1.0231 - learning_rate: 0.0010
Epoch 2/100
[1m295/295[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 369us/step - accuracy: 0.8072 - loss: 0.8183 - val_accuracy: 0.8260 - val_loss: 0.7425 - learning_rate: 0.0010
Epoch 3/100
[1m295/295[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 366us/step - accuracy: 0.8291 - loss: 0.6860 - val_accuracy: 0.8362 - val_loss: 0.6863 - learning_rate: 0.0010
Epoch 4/100
[1m295/295[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 365us/step - accuracy: 0.8334 - loss: 0.6506 - val_accuracy: 0.8326 - val_loss: 0.7107 - learning_rate: 0.0010
Epoch 5/100
[1m295/295[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 366us/step - accuracy: 0.8328 - loss: 0.6253 - val_accuracy: 0.8339 - val_loss: 0.6359 - learning_rate: 0.0010
Epoch 6/100
[1m295/295[0m [32m━━━━━━━━━━━━━━━━━━━━[

In [48]:
val_loss, val_acc = model.evaluate(X_val_s, y_val, verbose=0)
print(f"Val acc: {val_acc:.3f}, loss: {val_loss:.3f}")

KERAS_PATH = ART_DIR / f"{MODEL_NAME}.keras"
model.save(KERAS_PATH)
print("Saved:", KERAS_PATH)

Val acc: 0.883, loss: 0.377
Saved: artifacts/esp32s3_ripple_classifier.keras


In [49]:
def representative_dataset():
    n = min(500, len(X_train_s))
    for i in range(n):
        yield [X_train_s[i:i+1].astype(np.float32)]

conv = tf.lite.TFLiteConverter.from_keras_model(model)
conv.optimizations = [tf.lite.Optimize.DEFAULT]
conv.representative_dataset = representative_dataset
conv.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
conv.inference_input_type  = tf.int8
conv.inference_output_type = tf.int8

tflite_model = conv.convert()
TFLITE_PATH = ART_DIR / f"{MODEL_NAME}.tflite"
TFLITE_PATH.write_bytes(tflite_model)

meta = {
    "model_name": MODEL_NAME,
    "input_features": FEATURES,
    "classes": CLASS_NAMES,
    "scale_shift": {"scale": scale.tolist(), "shift": shift.tolist()},
    "input_dtype": "int8",
    "quantized": True
}
P = ART_DIR / "preprocess.json"
P.write_text(json.dumps(meta, indent=2))
print("Wrote:", TFLITE_PATH, "and", P)

INFO:tensorflow:Assets written to: /var/folders/z2/gvdzcbjs5k9fzscmw5rqxx6h0000gn/T/tmpj0k1sxvv/assets


INFO:tensorflow:Assets written to: /var/folders/z2/gvdzcbjs5k9fzscmw5rqxx6h0000gn/T/tmpj0k1sxvv/assets


Saved artifact at '/var/folders/z2/gvdzcbjs5k9fzscmw5rqxx6h0000gn/T/tmpj0k1sxvv'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): TensorSpec(shape=(None, 6), dtype=tf.float32, name='x')
Output Type:
  TensorSpec(shape=(None, 12), dtype=tf.float32, name=None)
Captures:
  4914826512: TensorSpec(shape=(), dtype=tf.resource, name=None)
  4914826320: TensorSpec(shape=(), dtype=tf.resource, name=None)
  4914824208: TensorSpec(shape=(), dtype=tf.resource, name=None)
  4914826128: TensorSpec(shape=(), dtype=tf.resource, name=None)
  4914824784: TensorSpec(shape=(), dtype=tf.resource, name=None)
  4914833808: TensorSpec(shape=(), dtype=tf.resource, name=None)
Wrote: artifacts/esp32s3_ripple_classifier.tflite and artifacts/preprocess.json


W0000 00:00:1758479057.111353  210205 tf_tfl_flatbuffer_helpers.cc:364] Ignored output_format.
W0000 00:00:1758479057.111361  210205 tf_tfl_flatbuffer_helpers.cc:367] Ignored drop_control_dependency.
2025-09-21 20:24:17.111435: I tensorflow/cc/saved_model/reader.cc:83] Reading SavedModel from: /var/folders/z2/gvdzcbjs5k9fzscmw5rqxx6h0000gn/T/tmpj0k1sxvv
2025-09-21 20:24:17.111633: I tensorflow/cc/saved_model/reader.cc:52] Reading meta graph with tags { serve }
2025-09-21 20:24:17.111637: I tensorflow/cc/saved_model/reader.cc:147] Reading SavedModel debug info (if present) from: /var/folders/z2/gvdzcbjs5k9fzscmw5rqxx6h0000gn/T/tmpj0k1sxvv
2025-09-21 20:24:17.113164: I tensorflow/cc/saved_model/loader.cc:236] Restoring SavedModel bundle.
2025-09-21 20:24:17.121282: I tensorflow/cc/saved_model/loader.cc:220] Running initialization op on SavedModel bundle at path: /var/folders/z2/gvdzcbjs5k9fzscmw5rqxx6h0000gn/T/tmpj0k1sxvv
2025-09-21 20:24:17.123966: I tensorflow/cc/saved_model/loader.cc:

In [50]:
interpreter = tf.lite.Interpreter(model_path=str(TFLITE_PATH))
interpreter.allocate_tensors()
inp = interpreter.get_input_details()[0]
out = interpreter.get_output_details()[0]
print("Input:", inp["shape"], inp["dtype"], "Output:", out["shape"], out["dtype"])

x = X_val_s[:1].astype(np.float32)
sc = inp['quantization_parameters']['scales'][0]
zp = inp['quantization_parameters']['zero_points'][0]
x_q = np.clip(np.rint(x / sc + zp), -128, 127).astype(np.int8)
interpreter.set_tensor(inp['index'], x_q)
interpreter.invoke()
y_q = interpreter.get_tensor(out['index'])

sc_o = out['quantization_parameters']['scales'][0]
zp_o = out['quantization_parameters']['zero_points'][0]
y = (y_q.astype(np.float32) - zp_o) * sc_o if out['dtype'] == np.int8 else y_q
print("Probs:", y[0], ": pred:", CLASS_NAMES[int(np.argmax(y[0]))])

Input: [1 6] <class 'numpy.int8'> Output: [ 1 12] <class 'numpy.int8'>
Probs: [0.765625   0.         0.         0.         0.00390625 0.
 0.04296875 0.0078125  0.         0.         0.00390625 0.1796875 ] : pred: calm


    TF 2.20. Please use the LiteRT interpreter from the ai_edge_litert package.
    See the [migration guide](https://ai.google.dev/edge/litert/migration)
    for details.
    


In [51]:
BIN = TFLITE_PATH.read_bytes()
ARR_NAME = f"{MODEL_NAME}_tflite".replace("-", "_")
HDR = ART_DIR / "model_data.h"
SRC = ART_DIR / "model_data.cpp"

HDR.write_text(
f"""#pragma once
#include <cstdint>

extern const unsigned char {ARR_NAME}[];
extern const unsigned int  {ARR_NAME}_len;
""")

def chunk(bs, n=12):
    for i in range(0, len(bs), n):
        yield bs[i:i+n]

with SRC.open("w") as f:
    f.write('#include "model_data.h"\n\n')
    f.write(f"const unsigned char {ARR_NAME}[] = {{\n")
    for row in chunk(BIN, 12):
        f.write("  " + ", ".join(f"0x{b:02X}" for b in row) + ",\n")
    f.write("};\n")
    f.write(f"const unsigned int {ARR_NAME}_len = {len(BIN)};\n")

print("Wrote:", HDR, "and", SRC, "(", len(BIN), "bytes )")

Wrote: artifacts/model_data.h and artifacts/model_data.cpp ( 4328 bytes )


In [52]:
print("Train counts:", Counter(y_train))
print("Val counts  :", Counter(y_val))

preds = model.predict(X_val_s, verbose=0)
y_hat = np.argmax(preds, axis=1)
acc = (y_hat == y_val).mean()
print("Val acc (Keras):", round(acc,3))

Train counts: Counter({np.int32(0): 13919, np.int32(8): 1793, np.int32(6): 1411, np.int32(11): 745, np.int32(10): 269, np.int32(2): 208, np.int32(1): 185, np.int32(3): 170, np.int32(7): 121, np.int32(4): 46, np.int32(5): 5})
Val counts  : Counter({np.int32(0): 3480, np.int32(8): 448, np.int32(6): 353, np.int32(11): 187, np.int32(10): 67, np.int32(2): 52, np.int32(1): 47, np.int32(3): 43, np.int32(7): 30, np.int32(4): 11, np.int32(5): 1})
Val acc (Keras): 0.883
