# 🚀 Anomaly Detection with LSTM Autoencoder on NASA C-MAPSS Dataset

In [None]:
# Step 1: Install dependencies (Colab usually has these)
!pip install pandas numpy scikit-learn tensorflow


In [None]:
# Step 2: Import Libraries
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, LSTM, RepeatVector
from tensorflow.keras.optimizers import Adam
import matplotlib.pyplot as plt


In [None]:
# Step 3: Upload the dataset
from google.colab import files
uploaded = files.upload()


In [None]:
# Step 4: Load the dataset (adjust file name if different)
file_name = 'train_FD001.txt'
columns = ['unit_number', 'time_in_cycles'] + \
          [f'operational_setting_{i}' for i in range(1, 4)] + \
          [f'sensor_{i}' for i in range(1, 22)]
df = pd.read_csv(file_name, sep='\s+', header=None, names=columns)


In [None]:
# Step 5: Normalize sensor data
sensor_cols = [col for col in df.columns if "sensor" in col]
scaler = MinMaxScaler()
df[sensor_cols] = scaler.fit_transform(df[sensor_cols])


In [None]:
# Step 6: Prepare training sequences from early healthy cycles
sequence_length = 30
normal_data = []

for unit in df['unit_number'].unique():
    unit_data = df[df['unit_number'] == unit].sort_values('time_in_cycles')
    early_cycles = unit_data[unit_data['time_in_cycles'] <= 50]
    for i in range(len(early_cycles) - sequence_length):
        seq = early_cycles.iloc[i:i+sequence_length][sensor_cols].values
        normal_data.append(seq)

normal_data = np.array(normal_data)
normal_data.shape


In [None]:
# Step 7: Build and train the Autoencoder
input_layer = Input(shape=(sequence_length, len(sensor_cols)))
encoded = LSTM(64, activation='relu', return_sequences=False)(input_layer)
bottleneck = RepeatVector(sequence_length)(encoded)
decoded = LSTM(64, activation='relu', return_sequences=True)(bottleneck)

autoencoder = Model(inputs=input_layer, outputs=decoded)
autoencoder.compile(optimizer=Adam(learning_rate=0.001), loss='mse')

history = autoencoder.fit(normal_data, normal_data,
                          epochs=20,
                          batch_size=32,
                          validation_split=0.1,
                          verbose=1)


In [None]:
# Step 8: Plot loss
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Val Loss')
plt.title('Autoencoder Training Loss')
plt.legend()
plt.grid()
plt.show()
