<a href="https://colab.research.google.com/github/DGuilherme/PMTese/blob/main/LSTMAllDatasets.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Imports


In [4]:
!pip install tensorflow

# Import libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf

from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.optimizers import Adam



In [5]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# Compare all datasets

In [13]:
def run_predictive_maintenance(dataset_name, scaler_type='minmax', seq_length=50):
  # Load data
  column_names = ['id', 'cycle', 'setting1', 'setting2', 'setting3'] + [f'sensor{i}' for i in range(1, 22)]
  train_data = pd.read_csv(f'/content/drive/MyDrive/Python/predictive-maintenance-main/datasets/cmapss/train_{dataset_name}.txt/train_{dataset_name}.txt', delim_whitespace=True, header=None)
  test_data = pd.read_csv(f'/content/drive/MyDrive/Python/predictive-maintenance-main/datasets/cmapss/test_{dataset_name}.txt/test_{dataset_name}.txt', delim_whitespace=True, header=None)
  rul_data = pd.read_csv(f'/content/drive/MyDrive/Python/predictive-maintenance-main/datasets/cmapss/RUL_{dataset_name}.txt/RUL_{dataset_name}.txt', delim_whitespace=True, header=None)


  train_data.columns = column_names
  test_data.columns = column_names
  rul_data.columns = ['RUL']

  # Generate RUL for training data
  max_cycle = train_data.groupby('id')['cycle'].max().reset_index().rename(columns={'cycle': 'cycle_max'})
  train_data = train_data.merge(max_cycle, on='id')
  train_data['RUL'] = train_data['cycle_max'] - train_data['cycle']
  train_data.drop(columns=['cycle_max'], inplace=True)

  # Generate RUL for test data
  max_cycle_test = test_data.groupby('id')['cycle'].max().reset_index().rename(columns={'cycle': 'cycle_max'})
  # Merge last observed cycle and ground-truth RUL on 'id'
  test_data = test_data.merge(max_cycle_test, on='id')
  test_data = test_data.merge(rul_data, on='id')

  test_data['RUL'] = test_data['RUL'] + test_data['max_cycle'] - test_data['cycle']
  test_data.drop(columns=['max_cycle'], inplace=True)

  # Select useful features (as suggested in papers)
  useful_sensor_cols = ['setting1', 'setting2', 'setting3'] + \
      [f'sensor{i}' for i in [2, 3, 4, 7, 8, 11, 12, 13, 14, 15, 17, 20, 21]]

  train_data = train_data[['id', 'cycle'] + useful_sensor_cols + ['RUL']]
  test_data = test_data[['id', 'cycle'] + useful_sensor_cols + ['RUL']]

  # Normalize sensor values
  scaler = MinMaxScaler()
  train_data[useful_sensor_cols] = scaler.fit_transform(train_data[useful_sensor_cols])
  test_data[useful_sensor_cols] = scaler.transform(test_data[useful_sensor_cols])

  # Windowing function
  def create_sequences(data, sequence_length=30):
      sequences = []
      labels = []
      engines = data['id'].unique()
      for engine_id in engines:
          engine_data = data[data['id'] == engine_id]
          feature_data = engine_data[useful_sensor_cols].values
          label_data = engine_data['RUL'].values
          for i in range(len(feature_data) - sequence_length + 1):
              sequences.append(feature_data[i:i+sequence_length])
              labels.append(label_data[i+sequence_length-1])
      return np.array(sequences), np.array(labels)

  # Create sequences
  X_train, y_train = create_sequences(train_data)
  X_test, y_test = create_sequences(test_data)

  print("Training set:", X_train.shape, y_train.shape)
  print("Testing set:", X_test.shape, y_test.shape)

  # code LSTM

  # HERE






  return


In [7]:
def compare_datasets(datasets, scaler_type='minmax', seq_length=50):
  results = []
  for dataset_name in datasets:
    run_predictive_maintenance(dataset_name, scaler_type, seq_length)
    #rmse = np.sqrt(mse)  # Calculate RMSE
    #results.append([dataset_name, rmse, mae])

  # Create a pandas DataFrame for the results
  #results_df = pd.DataFrame(results, columns=['Dataset', 'RMSE', 'MAE'])
  return

In [14]:
# Define the list of datasets
datasets = ['FD001', 'FD002', 'FD003', 'FD004']  # Add your dataset names here

# Run the comparison
compare_datasets(datasets, scaler_type='minmax', seq_length=60)

# Display the results table
#display(results_df)

  train_data = pd.read_csv(f'/content/drive/MyDrive/Python/predictive-maintenance-main/datasets/cmapss/train_{dataset_name}.txt/train_{dataset_name}.txt', delim_whitespace=True, header=None)
  test_data = pd.read_csv(f'/content/drive/MyDrive/Python/predictive-maintenance-main/datasets/cmapss/test_{dataset_name}.txt/test_{dataset_name}.txt', delim_whitespace=True, header=None)
  rul_data = pd.read_csv(f'/content/drive/MyDrive/Python/predictive-maintenance-main/datasets/cmapss/RUL_{dataset_name}.txt/RUL_{dataset_name}.txt', delim_whitespace=True, header=None)


---------------------------------- Started RUL calculation ----------------------------------
id              1.0000
cycle           6.0000
setting1       -0.0043
setting2       -0.0001
setting3      100.0000
sensor1       518.6700
sensor2       642.1000
sensor3      1584.4700
sensor4      1398.3700
sensor5        14.6200
sensor6        21.6100
sensor7       554.6700
sensor8      2388.0200
sensor9      9049.6800
sensor10        1.3000
sensor11       47.1600
sensor12      521.6800
sensor13     2388.0300
sensor14     8132.8500
sensor15        8.4108
sensor16        0.0300
sensor17      391.0000
sensor18     2388.0000
sensor19      100.0000
sensor20       38.9800
sensor21       23.3669
cycle_max     192.0000
Name: 5, dtype: float64
---------------------------------- MAX RUL - Cycle ----------------------------------
id              1.0000
cycle           6.0000
setting1       -0.0043
setting2       -0.0001
setting3      100.0000
sensor1       518.6700
sensor2       642.1000
sensor3      1

  train_data = pd.read_csv(f'/content/drive/MyDrive/Python/predictive-maintenance-main/datasets/cmapss/train_{dataset_name}.txt/train_{dataset_name}.txt', delim_whitespace=True, header=None)
  test_data = pd.read_csv(f'/content/drive/MyDrive/Python/predictive-maintenance-main/datasets/cmapss/test_{dataset_name}.txt/test_{dataset_name}.txt', delim_whitespace=True, header=None)
  rul_data = pd.read_csv(f'/content/drive/MyDrive/Python/predictive-maintenance-main/datasets/cmapss/RUL_{dataset_name}.txt/RUL_{dataset_name}.txt', delim_whitespace=True, header=None)


---------------------------------- Started RUL calculation ----------------------------------
id              1.0000
cycle           6.0000
setting1       25.0045
setting2        0.6205
setting3       60.0000
sensor1       462.5400
sensor2       537.0200
sensor3      1266.3800
sensor4      1048.7200
sensor5         7.0500
sensor6         9.0300
sensor7       175.1700
sensor8      1915.1500
sensor9      7996.1000
sensor10        0.9400
sensor11       36.7800
sensor12      164.2700
sensor13     2028.0100
sensor14     7868.8700
sensor15       10.8912
sensor16        0.0200
sensor17      306.0000
sensor18     1915.0000
sensor19       84.9300
sensor20       14.2800
sensor21        8.5590
cycle_max     149.0000
Name: 5, dtype: float64
---------------------------------- MAX RUL - Cycle ----------------------------------
id              1.0000
cycle           6.0000
setting1       25.0045
setting2        0.6205
setting3       60.0000
sensor1       462.5400
sensor2       537.0200
sensor3      1

  train_data = pd.read_csv(f'/content/drive/MyDrive/Python/predictive-maintenance-main/datasets/cmapss/train_{dataset_name}.txt/train_{dataset_name}.txt', delim_whitespace=True, header=None)
  test_data = pd.read_csv(f'/content/drive/MyDrive/Python/predictive-maintenance-main/datasets/cmapss/test_{dataset_name}.txt/test_{dataset_name}.txt', delim_whitespace=True, header=None)
  rul_data = pd.read_csv(f'/content/drive/MyDrive/Python/predictive-maintenance-main/datasets/cmapss/RUL_{dataset_name}.txt/RUL_{dataset_name}.txt', delim_whitespace=True, header=None)


---------------------------------- Started RUL calculation ----------------------------------
id              1.0000
cycle           6.0000
setting1        0.0011
setting2       -0.0005
setting3      100.0000
sensor1       518.6700
sensor2       642.2400
sensor3      1584.0900
sensor4      1400.0100
sensor5        14.6200
sensor6        21.6100
sensor7       554.7500
sensor8      2388.0000
sensor9      9074.9800
sensor10        1.3000
sensor11       47.0700
sensor12      522.4200
sensor13     2388.0200
sensor14     8144.9200
sensor15        8.4152
sensor16        0.0300
sensor17      393.0000
sensor18     2388.0000
sensor19      100.0000
sensor20       38.9200
sensor21       23.4281
cycle_max     259.0000
Name: 5, dtype: float64
---------------------------------- MAX RUL - Cycle ----------------------------------
id              1.0000
cycle           6.0000
setting1        0.0011
setting2       -0.0005
setting3      100.0000
sensor1       518.6700
sensor2       642.2400
sensor3      1

  train_data = pd.read_csv(f'/content/drive/MyDrive/Python/predictive-maintenance-main/datasets/cmapss/train_{dataset_name}.txt/train_{dataset_name}.txt', delim_whitespace=True, header=None)
  test_data = pd.read_csv(f'/content/drive/MyDrive/Python/predictive-maintenance-main/datasets/cmapss/test_{dataset_name}.txt/test_{dataset_name}.txt', delim_whitespace=True, header=None)
  rul_data = pd.read_csv(f'/content/drive/MyDrive/Python/predictive-maintenance-main/datasets/cmapss/RUL_{dataset_name}.txt/RUL_{dataset_name}.txt', delim_whitespace=True, header=None)


---------------------------------- Started RUL calculation ----------------------------------
id              1.0000
cycle           6.0000
setting1       34.9996
setting2        0.8400
setting3      100.0000
sensor1       449.4400
sensor2       554.7700
sensor3      1352.8700
sensor4      1117.0100
sensor5         5.4800
sensor6         7.9700
sensor7       193.8200
sensor8      2222.7700
sensor9      8340.0000
sensor10        1.0200
sensor11       41.4400
sensor12      181.9000
sensor13     2387.8700
sensor14     8054.1000
sensor15        9.3346
sensor16        0.0200
sensor17      330.0000
sensor18     2223.0000
sensor19      100.0000
sensor20       14.9100
sensor21        8.9057
cycle_max     321.0000
Name: 5, dtype: float64
---------------------------------- MAX RUL - Cycle ----------------------------------
id              1.0000
cycle           6.0000
setting1       34.9996
setting2        0.8400
setting3      100.0000
sensor1       449.4400
sensor2       554.7700
sensor3      1