In [5]:
import pandas as pd
import numpy as np
import joblib
from sklearn.preprocessing import MinMaxScaler

# Define the column names as per the dataset's documentation
col_names = ['unit_number', 'cycle', 'setting1', 'setting2', 'setting3']
col_names += ['sensor' + str(i) for i in range(1, 22)]

# Load the training data
df_train = pd.read_csv('train_FD001.txt', sep=' ', header=None, names=col_names)

# Drop the last two columns (they are empty/NaN)
df_train.dropna(axis=1, inplace=True)

print("Data loaded successfully:")
print(df_train.head())

Data loaded successfully:
     unit_number   cycle  setting1  setting2  setting3  sensor1  sensor2  \
1 1      -0.0007 -0.0004     100.0    518.67    641.82  1589.70  1400.60   
  2       0.0019 -0.0003     100.0    518.67    642.15  1591.82  1403.14   
  3      -0.0043  0.0003     100.0    518.67    642.35  1587.99  1404.20   
  4       0.0007  0.0000     100.0    518.67    642.35  1582.79  1401.87   
  5      -0.0019 -0.0002     100.0    518.67    642.37  1582.85  1406.22   

     sensor3  sensor4  sensor5  ...  sensor10  sensor11  sensor12  sensor13  \
1 1    14.62    21.61   554.36  ...    521.66   2388.02   8138.62    8.4195   
  2    14.62    21.61   553.75  ...    522.28   2388.07   8131.49    8.4318   
  3    14.62    21.61   554.26  ...    522.42   2388.03   8133.23    8.4178   
  4    14.62    21.61   554.45  ...    522.86   2388.08   8133.83    8.3682   
  5    14.62    21.61   554.00  ...    522.19   2388.04   8133.80    8.4294   

     sensor14  sensor15  sensor16  sensor1

In [6]:
sensor_cols = [col for col in df_train.columns if col.startswith('sensor')]

In [7]:
# Find the last cycle (failure point) for each engine
max_cycles = df_train.groupby('unit_number')['cycle'].max().to_frame(name='max_cycle')

# Merge this back into the main dataframe
df_train = df_train.merge(max_cycles, left_on='unit_number', right_index=True)

# Calculate the RUL for each row
df_train['RUL'] = df_train['max_cycle'] - df_train['cycle']

In [8]:
# Define our "failure window"
FAILURE_WINDOW = 30

# Create the label: 1 if RUL <= 30, 0 otherwise
df_train['label'] = (df_train['RUL'] <= FAILURE_WINDOW).astype(int)

# We don't need these columns anymore
df_train.drop(columns=['max_cycle', 'RUL'], inplace=True)

print("Data with labels:")
print(df_train.tail()) # .tail() shows the end of an engine's life

Data with labels:
         unit_number   cycle  setting1  setting2  setting3  sensor1  sensor2  \
100 196      -0.0004 -0.0003     100.0    518.67    643.49  1597.98  1428.63   
    197      -0.0016 -0.0005     100.0    518.67    643.54  1604.50  1433.58   
    198       0.0004  0.0000     100.0    518.67    643.42  1602.46  1428.18   
    199      -0.0011  0.0003     100.0    518.67    643.23  1605.26  1426.53   
    200      -0.0032 -0.0005     100.0    518.67    643.85  1600.38  1432.14   

         sensor3  sensor4  sensor5  ...  sensor11  sensor12  sensor13  \
100 196    14.62    21.61   551.43  ...   2388.26   8137.60    8.4956   
    197    14.62    21.61   550.86  ...   2388.22   8136.50    8.5139   
    198    14.62    21.61   550.94  ...   2388.24   8141.05    8.5646   
    199    14.62    21.61   550.68  ...   2388.23   8139.29    8.5389   
    200    14.62    21.61   550.79  ...   2388.26   8137.33    8.5036   

         sensor14  sensor15  sensor16  sensor17  sensor18  sen

In [9]:
scaler = MinMaxScaler()

# Fit and transform the sensor columns
df_train[sensor_cols] = scaler.fit_transform(df_train[sensor_cols])

# Save this scaler object to a file. We NEED this for the test data and our app.
joblib.dump(scaler, 'sensor_scaler.pkl')
print("Sensor data scaled and scaler saved to 'sensor_scaler.pkl'")

Sensor data scaled and scaler saved to 'sensor_scaler.pkl'


In [10]:
SEQUENCE_LENGTH = 50

X_train_list = []
y_train_list = []

# Iterate over each unique engine
for unit in df_train['unit_number'].unique():
    # Get all data for this one engine
    engine_data = df_train[df_train['unit_number'] == unit]

    # Get this engine's sensor data and labels as numpy arrays
    sensor_data = engine_data[sensor_cols].values
    labels = engine_data['label'].values

    # Create sliding windows
    for i in range(len(sensor_data) - SEQUENCE_LENGTH + 1):
        # The sequence of features (X)
        X_train_list.append(sensor_data[i : i + SEQUENCE_LENGTH])

        # The label (y) for that sequence (taken from the LAST step)
        y_train_list.append(labels[i + SEQUENCE_LENGTH - 1])

# Convert lists to numpy arrays
X_train = np.array(X_train_list)
y_train = np.array(y_train_list)

# Check the final 3D shape
print(f"X_train shape: {X_train.shape}") # (num_samples, 50, num_sensors)
print(f"y_train shape: {y_train.shape}") # (num_samples,)

X_train shape: (15426, 50, 19)
y_train shape: (15426,)


In [11]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout

In [12]:
# Get the number of features (e.g., 14 sensors)
num_features = X_train.shape[2] 

model = Sequential()

# Input Layer (LSTM)
# 50 units, return_sequences=True because we stack another LSTM
model.add(LSTM(units=50, return_sequences=True, input_shape=(SEQUENCE_LENGTH, num_features)))
model.add(Dropout(0.2)) # Prevents overfitting

# Hidden Layer (LSTM)
model.add(LSTM(units=50))
model.add(Dropout(0.2))

# Output Layer (Dense)
# 1 unit (probability of failure)
# 'sigmoid' activation for binary classification (output between 0 and 1)
model.add(Dense(units=1, activation='sigmoid'))

# Compile the model
model.compile(optimizer='adam', 
              loss='binary_crossentropy', 
              metrics=['accuracy'])

model.summary()

  super().__init__(**kwargs)


In [13]:
print("Starting model training...")
history = model.fit(
    X_train, 
    y_train, 
    epochs=10,        # 10 epochs is a good start
    batch_size=64,
    validation_split=0.2 # Use 20% of data for validation
)
print("Training complete.")

# Save the trained model to a file
model.save('engine_failure_model.h5')
print("Model saved to 'engine_failure_model.h5'")

Starting model training...
Epoch 1/10
[1m193/193[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 57ms/step - accuracy: 0.9957 - loss: 0.0195 - val_accuracy: 1.0000 - val_loss: 1.5782e-04
Epoch 2/10
[1m193/193[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 58ms/step - accuracy: 1.0000 - loss: 1.4675e-04 - val_accuracy: 1.0000 - val_loss: 7.3870e-05
Epoch 3/10
[1m193/193[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 55ms/step - accuracy: 1.0000 - loss: 8.3337e-05 - val_accuracy: 1.0000 - val_loss: 4.7983e-05
Epoch 4/10
[1m193/193[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 54ms/step - accuracy: 1.0000 - loss: 5.9213e-05 - val_accuracy: 1.0000 - val_loss: 3.5204e-05
Epoch 5/10
[1m193/193[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 56ms/step - accuracy: 1.0000 - loss: 4.5873e-05 - val_accuracy: 1.0000 - val_loss: 2.6934e-05
Epoch 6/10
[1m193/193[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 54ms/step - accuracy: 1.0000 - loss: 3.



Training complete.
Model saved to 'engine_failure_model.h5'
