In [11]:
# ===============================
# LSTM Time-Series Forecasting
# ===============================

import pandas as pd
import numpy as np
import os
import joblib

from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.callbacks import EarlyStopping

# ------------------------------------------------
# Load preprocessed data
# ------------------------------------------------
%run DataPreprocessing.ipynb

# ------------------------------------------------
# Select features for LSTM
# ------------------------------------------------
features = [
    "methane_pct",
    "temperature_c",
    "humidity_pct",
    "airflow_mps",
    "vibration_mm_s",
    "oxygen_pct"
]

final_df = final_df.sort_values(["tunnel_id", "timestamp"])

# ------------------------------------------------
# Prepare LSTM outputs per tunnel
# ------------------------------------------------
lstm_outputs = []

WINDOW_SIZE = 5

for tunnel in final_df["tunnel_id"].unique():

    df_tunnel = final_df[final_df["tunnel_id"] == tunnel][features]

    scaler = MinMaxScaler()
    scaled = scaler.fit_transform(df_tunnel)

    X, y = [], []
    for i in range(len(scaled) - WINDOW_SIZE):
        X.append(scaled[i:i+WINDOW_SIZE])
        y.append(scaled[i+WINDOW_SIZE][0])  # methane prediction

    X, y = np.array(X), np.array(y)

    if len(X) < 5:
        continue

    model = Sequential([
        LSTM(32, input_shape=(WINDOW_SIZE, X.shape[2])),
        Dense(1)
    ])

    model.compile(optimizer="adam", loss="mse")

    model.fit(
        X, y,
        epochs=20,
        batch_size=8,
        verbose=0,
        callbacks=[EarlyStopping(patience=3)]
    )

    last_sequence = scaled[-WINDOW_SIZE:]
    predicted_scaled = model.predict(
        last_sequence.reshape(1, WINDOW_SIZE, X.shape[2]),
        verbose=0
    )

    predicted_methane = scaler.inverse_transform(
        np.concatenate([predicted_scaled, np.zeros((1, X.shape[2]-1))], axis=1)
    )[0][0]

    actual_methane = df_tunnel["methane_pct"].iloc[-1]

    trend = (
        "Increasing Risk"
        if predicted_methane > actual_methane
        else "Stable Risk"
    )

    lstm_outputs.append({
        "tunnel_id": tunnel,
        "actual_methane": round(actual_methane, 2),
        "predicted_methane": round(predicted_methane, 2),
        "trend": trend
    })

# ------------------------------------------------
# Save output for Streamlit
# ------------------------------------------------
os.makedirs("models", exist_ok=True)

lstm_output_df = pd.DataFrame(lstm_outputs)
joblib.dump(lstm_output_df, "models/lstm_output.pkl")

lstm_output_df


  df.fillna(method="ffill", inplace=True)
  df.fillna(method="ffill", inplace=True)
  df.fillna(method="ffill", inplace=True)
  super().__init__(**kwargs)
  current = self.get_monitor_value(logs)
  super().__init__(**kwargs)
  current = self.get_monitor_value(logs)
  super().__init__(**kwargs)
  current = self.get_monitor_value(logs)


Unnamed: 0,tunnel_id,actual_methane,predicted_methane,trend
0,A,2.5,2.03,Stable Risk
1,B,2.5,1.76,Stable Risk
2,C,2.5,2.05,Stable Risk
