In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

# Load the dataset
file_name = "geothermal_training_balanced_eruption.csv"
df = pd.read_csv(file_name)

print("Data loaded successfully.")
print(df.head())

Data loaded successfully.
          Timestamp  Water_Temp_C  Flow_Rate_Lmin   SO2_ppm    H2S_ppm  \
0  01-01-2025 00:00     43.771285        4.339357  0.535230   1.108189   
1  01-01-2025 00:01     39.966053        4.020475  3.454176  20.828754   
2  01-01-2025 00:02     42.129587        5.284998  0.417567   2.806561   
3  01-01-2025 00:03     41.901139        5.494694  0.405894   3.590781   
4  01-01-2025 00:04     44.867394        3.286773  0.519083   0.087840   

   Anomaly_Label  Anomaly_Type_ID  Volcano_Eruption  
0              0                0                 0  
1              1                3                 1  
2              0                0                 0  
3              0                0                 0  
4              0                0                 0  


In [None]:
# Select features (X) and target (y)
features = ['Water_Temp_C', 'Flow_Rate_Lmin', 'SO2_ppm', 'H2S_ppm']
target = 'Volcano_Eruption'

X = df[features].values
y = df[target].values

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

print(f"Shape of X_train: {X_train.shape}")
print(f"Shape of y_train: {y_train.shape}")

Shape of X_train: (8000, 4)
Shape of y_train: (8000,)


In [None]:
# Initialize and fit scaler on training data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)

# Apply the *same* scaling to the test data
X_test_scaled = scaler.transform(X_test)

print("Data scaled successfully.")
print(f"Example of scaled data (first row): {X_train_scaled[0]}")

Data scaled successfully.
Example of scaled data (first row): [ 0.27670802 -1.91114589  0.05658262 -0.34959946]


In [None]:
# Define the Keras model
model = Sequential([
    # Input layer and first hidden layer
    Dense(16, activation='relu', input_shape=(X_train_scaled.shape[1],)),
    # Second hidden layer (optional, but helps with non-linearity)
    Dense(8, activation='relu'),
    # Output layer: 1 unit with sigmoid for binary classification (0 or 1)
    Dense(1, activation='sigmoid')
])

# Compile the model
model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy']
)

model.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [None]:
# Train the model
history = model.fit(
    X_train_scaled,
    y_train,
    epochs=50,          # Relatively small number of epochs
    batch_size=32,
    validation_split=0.1, # Use a small portion of training data for validation
    verbose=0
)

# Evaluate on the test set
loss, accuracy = model.evaluate(X_test_scaled, y_test, verbose=0)
print(f"Test Accuracy: {accuracy*100:.2f}%")

Test Accuracy: 94.95%


In [None]:
# Convert Keras model to a TFLite model
converter = tf.lite.TFLiteConverter.from_keras_model(model)

# Optimize for size and speed (default is dynamic range quantization)
# This is crucial for TinyML devices like ESP32
converter.optimizations = [tf.lite.Optimize.DEFAULT]

tflite_model = converter.convert()

# Save the TFLite model to a file
tflite_model_path = 'geothermal_eruption_model.tflite'
with open(tflite_model_path, 'wb') as f:
    f.write(tflite_model)

print(f"TensorFlow Lite model saved to: {tflite_model_path}")
print(f"Model size: {len(tflite_model) / 1024:.2f} KB")

Saved artifact at '/tmp/tmp962vm9e3'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): TensorSpec(shape=(None, 4), dtype=tf.float32, name='keras_tensor')
Output Type:
  TensorSpec(shape=(None, 1), dtype=tf.float32, name=None)
Captures:
  133362887270736: TensorSpec(shape=(), dtype=tf.resource, name=None)
  133362887271888: TensorSpec(shape=(), dtype=tf.resource, name=None)
  133362887270544: TensorSpec(shape=(), dtype=tf.resource, name=None)
  133362887269584: TensorSpec(shape=(), dtype=tf.resource, name=None)
  133362887272464: TensorSpec(shape=(), dtype=tf.resource, name=None)
  133362887268816: TensorSpec(shape=(), dtype=tf.resource, name=None)
TensorFlow Lite model saved to: geothermal_eruption_model.tflite
Model size: 2.90 KB


In [None]:
# Convert TFLite model to a C byte array header file
!echo "const unsigned char model_tflite[] = {" > model.h
!echo -n "  " >> model.h
# Use xxd utility to dump the model file contents in a C array format
!xxd -cols 12 -i {tflite_model_path} | sed 's/^/  /' | sed '$s/,$//' >> model.h
!echo "};" >> model.h
!echo "const int model_tflite_len = sizeof(model_tflite);" >> model.h

print("\nC Header file 'model.h' created successfully.")
print("This file contains the model as a C byte array for the ESP32.")


C Header file 'model.h' created successfully.
This file contains the model as a C byte array for the ESP32.
