# LSTM Model Training, Saving, and Evaluation

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.callbacks import EarlyStopping
import joblib
import os

In [None]:
df = pd.read_csv("realistic_threshold_noise_flow_data.csv")
df['timestamp'] = pd.to_datetime(df['timestamp'])
df = df.set_index('timestamp')

features = ['flow_rate', 'pressure', 'temperature_C']
data = df[features]

In [None]:
scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(data)

os.makedirs("models", exist_ok=True)
joblib.dump(scaler, "models/lstm_scaler.gz")

In [None]:
def create_sequences(data, seq_length=24):
    X, y = [], []
    for i in range(len(data) - seq_length):
        X.append(data[i:i+seq_length])
        y.append(data[i+seq_length][0])  # Predict flow_rate
    return np.array(X), np.array(y)

seq_length = 24
X, y = create_sequences(scaled_data, seq_length)

In [None]:
split = int(0.8 * len(X))
X_train, X_test = X[:split], X[split:]
y_train, y_test = y[:split], y[split:]

In [None]:
model = Sequential([
    LSTM(50, return_sequences=True, input_shape=(seq_length, len(features))),
    LSTM(50),
    Dense(1)
])
model.compile(optimizer='adam', loss='mse')

early_stopping = EarlyStopping(patience=5, restore_best_weights=True)
model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=50, callbacks=[early_stopping])

In [None]:
model.save("models/lstm_model.h5")
print("✅ Model and scaler saved.")

In [None]:
# Predict
y_pred = model.predict(X_test).flatten()

# Inverse scale predictions and actuals
flow_scaler = scaler.scale_[0]
flow_min = scaler.min_[0]
y_test_actual = y_test * flow_scaler + flow_min
y_pred_actual = y_pred * flow_scaler + flow_min

# Metrics
mse = mean_squared_error(y_test_actual, y_pred_actual)
rmse = np.sqrt(mse)
mae = mean_absolute_error(y_test_actual, y_pred_actual)
r2 = r2_score(y_test_actual, y_pred_actual)

# Custom accuracy
def regression_accuracy(y_true, y_pred, tolerance=0.1):
    rel_error = np.abs((y_pred - y_true) / y_true)
    accurate = rel_error < tolerance
    return np.mean(accurate) * 100

accuracy = regression_accuracy(y_test_actual, y_pred_actual)

# Display
print("📊 Performance Metrics:")
print(f"✅ MSE: {mse:.4f}")
print(f"✅ RMSE: {rmse:.4f}")
print(f"✅ MAE: {mae:.4f}")
print(f"✅ R² Score: {r2:.4f}")
print(f"✅ Accuracy (within ±10%): {accuracy:.2f}%")

In [None]:
plt.figure(figsize=(12, 6))
plt.plot(y_test_actual[:200], label='Actual Flow Rate')
plt.plot(y_pred_actual[:200], label='Predicted Flow Rate', linestyle='dashed')
plt.title("Flow Rate: Actual vs Predicted")
plt.xlabel("Time Steps")
plt.ylabel("Flow Rate")
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()