In [4]:
# Training code has been moved to a separate script for clarity and modularity.
# To train the LSTM model, please run the 'train_lstm_model.py' script in your project folder.
# This notebook now focuses on inference, simulation, and anomaly detection only.
# Example: !python train_lstm_model.py

In [5]:
import joblib

SCALER_PATH = "scaler.save"

# Make sure 'scaler' is defined before dumping.
# Example: from sklearn.preprocessing import MinMaxScaler; scaler = MinMaxScaler().fit(X)
# joblib.dump(scaler, SCALER_PATH)


In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dropout, Dense
from tensorflow.keras.optimizers import Adam

# Build the LSTM model
model = Sequential([
    LSTM(64, return_sequences=True, input_shape=(X.shape[1], X.shape[2])),
    Dropout(0.2),
    LSTM(32),
    Dropout(0.2),
    Dense(y.shape[1])  # Predict all 24 features
])

# Compile the model
model.compile(optimizer=Adam(learning_rate=0.001), loss='mse')

# Train the model
history = model.fit(X, y, epochs=15, batch_size=32, validation_split=0.1, verbose=1)

# Save model for later real-time inference
model.save("lstm_forecast_model.h5")


In [None]:
from tensorflow.keras.models import load_model
import joblib

# Load model without compiling
model = load_model("lstm_forecast_model.h5", compile=False)

# Load the scaler
scaler = joblib.load("scaler.save")


In [None]:
import os
os.listdir()


['%userprofile%',
 '.ipynb_checkpoints',
 '0409',
 '0ae3b998-9a38-4b72-a4c4-06849441518d_Servicing-Stack.dll',
 '1028',
 '1029',
 '1031',
 '1033',
 '1036',
 '1040',
 '1041',
 '1042',
 '1045',
 '1046',
 '1049',
 '1055',
 '2052',
 '3082',
 '4545ffe2-0dc4-4df4-9d02-299ef204635e_hvsocket.dll',
 '69fe178f-26e7-43a9-aa7d-2b616b672dde_eventlogservice.dll',
 '6bea57fb-8dfb-4177-9ae8-42e8b3529933_RuntimeDeviceInstall.dll',
 '@AdvancedKeySettingsNotification.png',
 '@AppHelpToast.png',
 '@AudioToastIcon.png',
 '@BackgroundAccessToastIcon.png',
 '@bitlockertoastimage.png',
 '@edptoastimage.png',
 '@EnrollmentToastIcon.png',
 '@facial-recognition-windows-hello-rejuv.gif',
 '@facial-recognition-windows-hello.gif',
 '@language_notification_icon.png',
 '@optionalfeatures.png',
 '@StorageSenseToastIcon.png',
 '@VpnToastIcon.png',
 '@WindowsHelloFaceToastIcon.png',
 '@WindowsHelloFaceToastIconRejuv.png',
 '@WindowsUpdateToastIcon.contrast-black.png',
 '@WindowsUpdateToastIcon.contrast-white.png',
 '@Wi

In [None]:
def classify_anomaly(actual_values):
    """
    Takes actual (denormalized) sensor values and returns anomaly type if any.
    """
    Engine_Temperature, Fuel_Level, Engine_RPM, Brake_Pressure, Hydraulic_Pressure, Oil_Temperature, Vibration, Throttle_Position, Speed = actual_values

    if Engine_Temperature > 95:
        return "Engine Overheating"
    elif Fuel_Level < 10:
        return "Low Fuel"
    elif Engine_RPM < 400 and Throttle_Position > 40:
        return "Excessive Idling"
    elif Brake_Pressure > 90:
        return "Brake System Stress"
    elif Hydraulic_Pressure > 85:
        return "Hydraulic Overload"
    elif Oil_Temperature > 110:
        return "Lubrication Overheating"
    elif Vibration > 1.5:
        return "High Vibration"
    elif Throttle_Position > 90 and Speed < 5:
        return "Power Loss"
    elif Speed > 60 and Brake_Pressure < 10:
        return "Brake Failure Risk"
    elif Engine_RPM > 3200:
        return "RPM Surge"
    else:
        return "Normal"


In [None]:
import pandas as pd
import random
import time
from datetime import datetime
import csv
import os

# Generate sensor row
def generate_new_sensor_row():
    return {
        "Engine_Temperature": random.uniform(70, 110),
        "Fuel_Level": random.uniform(0, 100),
        "Engine_RPM": random.uniform(300, 3500),
        "Brake_Pressure": random.uniform(0, 100),
        "Hydraulic_Pressure": random.uniform(0, 100),
        "Oil_Temperature": random.uniform(60, 120),
        "Vibration": random.uniform(0.1, 2.0),
        "Throttle_Position": random.uniform(0, 100),
        "Speed": random.uniform(0, 70)
    }

# 📍 Desktop path (CHANGE IF USERNAME IS DIFFERENT)
desktop_path = "C:/Users/kcsat/Desktop"
sim_input_file = os.path.join(desktop_path, "simulated_input_stream.csv")

# Write headers if file doesn't exist
if not os.path.exists(sim_input_file):
    with open(sim_input_file, mode='w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(["Timestamp", "Engine_Temperature", "Fuel_Level", "Engine_RPM", "Brake_Pressure",
                         "Hydraulic_Pressure", "Oil_Temperature", "Vibration", "Throttle_Position", "Speed"])

# Simulate 1 row per second
print("🟢 Starting NEW real-time sensor simulation...\n")
while True:
    new_row = generate_new_sensor_row()
    timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    row_data = [timestamp] + list(new_row.values())

    with open(sim_input_file, mode='a', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(row_data)

    print(f"✅ Simulated input at {timestamp}")
    time.sleep(1)


🟢 Starting NEW real-time sensor simulation...

✅ Simulated input at 2025-07-17 20:09:02
✅ Simulated input at 2025-07-17 20:09:03
✅ Simulated input at 2025-07-17 20:09:04
✅ Simulated input at 2025-07-17 20:09:05
✅ Simulated input at 2025-07-17 20:09:06
✅ Simulated input at 2025-07-17 20:09:07
✅ Simulated input at 2025-07-17 20:09:08
✅ Simulated input at 2025-07-17 20:09:09
✅ Simulated input at 2025-07-17 20:09:10
✅ Simulated input at 2025-07-17 20:09:11
✅ Simulated input at 2025-07-17 20:09:12
✅ Simulated input at 2025-07-17 20:09:13
✅ Simulated input at 2025-07-17 20:09:14
✅ Simulated input at 2025-07-17 20:09:15
✅ Simulated input at 2025-07-17 20:09:16
✅ Simulated input at 2025-07-17 20:09:17
✅ Simulated input at 2025-07-17 20:09:18
✅ Simulated input at 2025-07-17 20:09:19
✅ Simulated input at 2025-07-17 20:09:20


KeyboardInterrupt: 

In [None]:
import numpy as np
import pandas as pd
import time
from tensorflow.keras.models import load_model
import joblib
import csv
import os

# Load model & scaler
model = load_model("lstm_forecast_model.h5", compile=False)
scaler = joblib.load("scaler.save")
window_size = 10

# 📍 Desktop paths
desktop_path = "C:/Users/kcsat/Desktop"
sim_input_file = os.path.join(desktop_path, "simulated_input_stream.csv")
output_log_file = os.path.join(desktop_path, "simulated_output_log.csv")

# Create output log with header
if not os.path.exists(output_log_file):
    with open(output_log_file, mode='w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(["Timestamp", "Engine_Temperature", "Fuel_Level", "Engine_RPM", "Brake_Pressure",
                         "Hydraulic_Pressure", "Oil_Temperature", "Vibration", "Throttle_Position", "Speed",
                         "MSE", "Anomaly?", "Anomaly Type"])

buffer = []
last_line = 0

print("🧠 Monitoring new simulated sensor data...\n")

while True:
    df = pd.read_csv(sim_input_file)

    if len(df) > last_line:
        new_rows = df.iloc[last_line:]

        for _, row in new_rows.iterrows():
            features = row[1:].values.astype(float)
            buffer.append(features)

            if len(buffer) >= window_size:
                input_window = np.array(buffer[-window_size:]).reshape(1, window_size, -1)
                prediction = model.predict(input_window, verbose=0)

                actual_scaled = scaler.transform([features])
                pred_scaled = scaler.transform(prediction)
                mse = np.mean((actual_scaled - pred_scaled) ** 2)

                is_anomaly = mse > 0.03

                # Anomaly classifier
                def classify_anomaly(actual):
                    Engine_Temperature, Fuel_Level, Engine_RPM, Brake_Pressure, Hydraulic_Pressure, Oil_Temperature, Vibration, Throttle_Position, Speed = actual
                    if Engine_Temperature > 95: return "Engine Overheating"
                    elif Fuel_Level < 10: return "Low Fuel"
                    elif Engine_RPM < 400 and Throttle_Position > 40: return "Excessive Idling"
                    elif Brake_Pressure > 90: return "Brake System Stress"
                    elif Hydraulic_Pressure > 85: return "Hydraulic Overload"
                    elif Oil_Temperature > 110: return "Lubrication Overheating"
                    elif Vibration > 1.5: return "High Vibration"
                    elif Throttle_Position > 90 and Speed < 5: return "Power Loss"
                    elif Speed > 60 and Brake_Pressure < 10: return "Brake Failure Risk"
                    elif Engine_RPM > 3200: return "RPM Surge"
                    else: return "Normal"

                anomaly_type = classify_anomaly(features) if is_anomaly else "Normal"
                timestamp = row["Timestamp"]

                # Log to Desktop
                with open(output_log_file, mode='a', newline='') as file:
                    writer = csv.writer(file)
                    writer.writerow([timestamp] + list(features) + [mse, "Yes" if is_anomaly else "No", anomaly_type])

                if is_anomaly:
                    print(f"🚨 SOS: {anomaly_type} at {timestamp}")

        last_line = len(df)

    time.sleep(1)


🧠 Monitoring new simulated sensor data...



ValueError: Exception encountered when calling LSTMCell.call().

[1mDimensions must be equal, but are 9 and 24 for '{{node sequential_1/lstm_1/lstm_cell_1/MatMul}} = MatMul[T=DT_FLOAT, grad_a=false, grad_b=false, transpose_a=false, transpose_b=false](sequential_1/lstm_1/strided_slice_1, sequential_1/lstm_1/lstm_cell_1/Cast/ReadVariableOp)' with input shapes: [1,9], [24,256].[0m

Arguments received by LSTMCell.call():
  • inputs=tf.Tensor(shape=(1, 9), dtype=float32)
  • states=('tf.Tensor(shape=(1, 64), dtype=float32)', 'tf.Tensor(shape=(1, 64), dtype=float32)')
  • training=False

In [None]:
import pandas as pd
import random
import time
from datetime import datetime
import csv
import os

def generate_sensor_row():
    return {
        "Engine_Temperature": random.uniform(70, 115),
        "Fuel_Level": random.uniform(0, 100),
        "Engine_RPM": random.uniform(300, 3500),
        "Brake_Pressure": random.uniform(0, 100),
        "Hydraulic_Pressure": random.uniform(10, 100),
        "Oil_Temperature": random.uniform(60, 130),
        "Vibration": random.uniform(0.1, 2.5),
        "Throttle_Position": random.uniform(0, 100),
        "Speed": random.uniform(0, 70),
        "Machine_Operating_Mode": random.choice([0, 1]),  # 1 = Active Mode, 0 = Idle
        "Hydraulic_Oil_Temp": random.uniform(50, 120),
        "Coolant_Temperature": random.uniform(70, 115),
        "Exhaust_Temperature": random.uniform(100, 250),
        "Transmission_Gear_State": random.randint(1, 6),
        "Transmission_Oil_Temp": random.uniform(60, 140),
        "Water_in_Fuel": random.uniform(0, 10),  # % Water
        "Fuel_Pressure": random.uniform(20, 70),
        "Air_Filter_Pressure_Drop": random.uniform(0, 5),
        "Battery_Voltage": random.uniform(10.5, 15.5),
        "Engine_Load": random.uniform(0, 100),
        "Fuel_Consumption": random.uniform(1, 20),
        "PTO_RPM": random.uniform(0, 1000)
    }

# Paths
desktop_path = "C:/Users/kcsat/Desktop"
sim_input_file = os.path.join(desktop_path, "simulated_input_stream.csv")

# Write headers
if not os.path.exists(sim_input_file):
    with open(sim_input_file, mode='w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(["Timestamp"] + list(generate_sensor_row().keys()))

# Start generating data
print("🟢 Generating full-sensor data stream...\n")
while True:
    row = generate_sensor_row()
    timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    row_data = [timestamp] + list(row.values())

    with open(sim_input_file, mode='a', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(row_data)

    print(f"✅ Logged row at {timestamp}")
    time.sleep(1)


🟢 Generating full-sensor data stream...

✅ Logged row at 2025-07-17 20:49:55
✅ Logged row at 2025-07-17 20:49:56
✅ Logged row at 2025-07-17 20:49:57
✅ Logged row at 2025-07-17 20:49:58
✅ Logged row at 2025-07-17 20:49:59


KeyboardInterrupt: 

In [None]:
import numpy as np
import pandas as pd
import time
from tensorflow.keras.models import load_model
import joblib
import csv
import os

# Load Model and Scaler
model = load_model("lstm_forecast_model.h5", compile=False)
scaler = joblib.load("scaler.save")
window_size = 10

# Paths
desktop_path = "C:/Users/kcsat/Desktop"
sim_input_file = os.path.join(desktop_path, "simulated_input_stream.csv")
output_log_file = os.path.join(desktop_path, "simulated_output_log.csv")

# Prepare Output File
if not os.path.exists(output_log_file):
    with open(output_log_file, mode='w', newline='') as file:
        writer = csv.writer(file)
        header = ["Timestamp"] + list(pd.read_csv(sim_input_file, nrows=1).columns[1:]) + ["MSE", "Anomaly?", "Anomaly Type"]
        writer.writerow(header)

buffer = []
last_line = 0

# Anomaly Classifier
def classify_anomaly(features_dict):
    f = features_dict
    if f["Engine_RPM"] < 500 and f["Machine_Operating_Mode"] == 1:
        return "Excessive Idling"
    if f["Hydraulic_Pressure"] > 90 and f["Hydraulic_Oil_Temp"] > 100:
        return "Hydraulic Overload"
    if any([f["Engine_Temperature"] > 105, f["Coolant_Temperature"] > 105, f["Exhaust_Temperature"] > 240]):
        return "Engine Overheating"
    if f["Transmission_Oil_Temp"] > 120 and f["Transmission_Gear_State"] in [3, 4, 5, 6]:
        return "Transmission Failure Risk"
    if f["Brake_Pressure"] < 5 and f["Machine_Operating_Mode"] == 1:
        return "Brake System Failure"
    if f["Water_in_Fuel"] > 5 and f["Fuel_Pressure"] < 30:
        return "Fuel System Contamination"
    if f["Air_Filter_Pressure_Drop"] > 4:
        return "Air Filter Blockage"
    if f["Battery_Voltage"] < 11.5 or f["Battery_Voltage"] > 15:
        return "Electrical Fault"
    if f["Engine_Load"] > 80 and f["Engine_RPM"] < 600:
        return "Overloading or Engine Stress"
    if f["PTO_RPM"] > 0 and f["Machine_Operating_Mode"] == 0:
        return "PTO System Misuse or Malfunction"
    return "Normal"

print("🧠 Anomaly Monitor Active...\n")

while True:
    df = pd.read_csv(sim_input_file)
    if len(df) > last_line:
        new_rows = df.iloc[last_line:]

        for _, row in new_rows.iterrows():
            features = row[1:].astype(float).values
            buffer.append(features)

            if len(buffer) >= window_size:
                input_seq = np.array(buffer[-window_size:]).reshape(1, window_size, -1)
                prediction = model.predict(input_seq, verbose=0)

                actual_scaled = scaler.transform([features])
                pred_scaled = scaler.transform(prediction)
                mse = np.mean((actual_scaled - pred_scaled) ** 2)
                is_anomaly = mse > 0.03

                features_dict = dict(zip(df.columns[1:], features))
                anomaly_type = classify_anomaly(features_dict) if is_anomaly else "Normal"
                timestamp = row["Timestamp"]

                with open(output_log_file, mode='a', newline='') as file:
                    writer = csv.writer(file)
                    writer.writerow([timestamp] + list(features) + [mse, "Yes" if is_anomaly else "No", anomaly_type])

                if is_anomaly:
                    print(f"🚨 Anomaly: {anomaly_type} at {timestamp}")

        last_line = len(df)

    time.sleep(1)


🧠 Anomaly Monitor Active...



ParserError: Error tokenizing data. C error: Expected 10 fields in line 25, saw 23


In [None]:
import pandas as pd
import json
import random
import time
import os

# Path for live stream (shared between both scripts)
stream_path = os.path.expanduser("~/Desktop/live_stream.jsonl")

# Remove existing stream file if exists
if os.path.exists(stream_path):
    os.remove(stream_path)

def generate_data():
    return {
        "Engine_Temperature": round(random.uniform(60, 115), 2),
        "Fuel_Level": round(random.uniform(5, 100), 2),
        "Fuel_Pressure": round(random.uniform(15, 40), 2),
        "Water_in_Fuel": round(random.uniform(0, 1), 2),
        "Engine_Oil_Pressure": round(random.uniform(15, 80), 2),
        "Engine_RPM": round(random.uniform(600, 2500), 2),
        "Hydraulic_Oil_Temp": round(random.uniform(40, 110), 2),
        "Hydraulic_Pressure": round(random.uniform(1000, 3500), 2),
        "Transmission_Oil_Temp": round(random.uniform(60, 130), 2),
        "Brake_Pressure": round(random.uniform(50, 150), 2),
        "Coolant_Temperature": round(random.uniform(60, 115), 2),
        "Air_Filter_Pressure_Drop": round(random.uniform(0.2, 1.2), 2),
        "Battery_Voltage": round(random.uniform(11, 14.8), 2),
        "PTO_RPM": round(random.uniform(0, 1000), 2),
        "Engine_Load": round(random.uniform(10, 100), 2),
        "Exhaust_Temperature": round(random.uniform(150, 700), 2),
        "Transmission_Gear_State": random.choice(["Neutral", "Drive", "Reverse"]),
        "Machine_Operating_Mode": random.choice(["Idle", "Working", "Transit"]),
        "Condition_Label": "Normal"
    }

print("🚜 Real-time data generation started... Logging to live_stream.jsonl\n")

while True:
    data = generate_data()
    with open(stream_path, 'a') as f:
        f.write(json.dumps(data) + '\n')
    time.sleep(1)

# Live stream source
stream_path = os.path.expanduser("~/Desktop/live_stream.jsonl")

# Log CSV output
log_path = os.path.expanduser("~/Desktop/anomaly_log.csv")

# Set of already processed lines to avoid rechecking
seen_lines = set()

# Initialize log file with headers
if not os.path.exists(log_path):
    with open(log_path, 'w') as f:
        headers = [
            "Timestamp", "Anomaly_Types"
        ] + [
            "Engine_Temperature", "Fuel_Level", "Fuel_Pressure", "Water_in_Fuel",
            "Engine_Oil_Pressure", "Engine_RPM", "Hydraulic_Oil_Temp", "Hydraulic_Pressure",
            "Transmission_Oil_Temp", "Brake_Pressure", "Coolant_Temperature",
            "Air_Filter_Pressure_Drop", "Battery_Voltage", "PTO_RPM", "Engine_Load",
            "Exhaust_Temperature", "Transmission_Gear_State", "Machine_Operating_Mode", "Condition_Label"
        ]
        f.write(','.join(headers) + '\n')

def check_anomalies(data):
    anomalies = []

    if data["Engine_Temperature"] > 105:
        anomalies.append("Engine Overheating")
    if data["Engine_RPM"] < 800 and data["Machine_Operating_Mode"] == "Idle":
        anomalies.append("Excessive Idling")
    if data["Hydraulic_Pressure"] > 3200:
        anomalies.append("Hydraulic Overload")
    if data["Fuel_Level"] < 10:
        anomalies.append("Low Fuel Warning")
    if data["Brake_Pressure"] < 70:
        anomalies.append("Brake Failure Risk")
    if data["Coolant_Temperature"] > 110:
        anomalies.append("Coolant Overheat")
    if data["Battery_Voltage"] < 11.5:
        anomalies.append("Low Battery")
    if data["Air_Filter_Pressure_Drop"] > 1.0:
        anomalies.append("Air Filter Clogging")
    if data["Water_in_Fuel"] > 0.7:
        anomalies.append("Fuel Contamination")
    if data["Engine_Load"] > 95 and data["Engine_RPM"] > 2200:
        anomalies.append("Overloading")

    return anomalies if anomalies else ["Normal"]

print("🧠 Anomaly detection started... Logging to anomaly_log.csv\n")

while True:
    if os.path.exists(stream_path):
        with open(stream_path, 'r') as f:
            lines = f.readlines()
        
        for line in lines:
            if line not in seen_lines:
                seen_lines.add(line)
                data = json.loads(line)
                timestamp = time.strftime("%Y-%m-%d %H:%M:%S")
                anomaly_result = check_anomalies(data)

                row = [timestamp, "; ".join(anomaly_result)]
                for key in [
                    "Engine_Temperature", "Fuel_Level", "Fuel_Pressure", "Water_in_Fuel",
                    "Engine_Oil_Pressure", "Engine_RPM", "Hydraulic_Oil_Temp", "Hydraulic_Pressure",
                    "Transmission_Oil_Temp", "Brake_Pressure", "Coolant_Temperature",
                    "Air_Filter_Pressure_Drop", "Battery_Voltage", "PTO_RPM", "Engine_Load",
                    "Exhaust_Temperature", "Transmission_Gear_State", "Machine_Operating_Mode", "Condition_Label"
                ]:
                    row.append(data[key])
                
                # Append row to log file
                with open(log_path, 'a') as f:
                    f.write(','.join(map(str, row)) + '\n')

    time.sleep(1)
