In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.fft import fft
import matplotlib.dates as mdates
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.model_selection import TimeSeriesSplit

### Reading the data

In [2]:
df = pd.read_csv("/mnt/d/ML-Datasets/MetroPT/clean_data.csv")
df.head()

Unnamed: 0,timestamp,TP2,TP3,H1,DV_pressure,Reservoirs,Oil_temperature,Flowmeter,Motor_current,COMP,...,Pressure_switch,Oil_level,Caudal_impulses,failure_type_No Failure,failure_type_Air Leak,failure_type_Oil Leak,failure_component_No Failur Component,failure_component_Clients,failure_component_Air Dryer,failure_component_Compressor
0,2022-01-01 06:00:00,-0.012,9.76,9.76,-0.028,1.576,63.34,19.05,3.955,1,...,0,0,0,1,0,0,1,0,0,0
1,2022-01-01 06:00:01,-0.012,9.76,9.76,-0.028,1.578,63.25,19.05,4.027,1,...,0,0,0,1,0,0,1,0,0,0
2,2022-01-01 06:00:02,-0.01,9.76,9.76,-0.028,1.578,63.3,19.05,3.945,1,...,0,0,0,1,0,0,1,0,0,0
3,2022-01-01 06:00:03,-0.012,9.76,9.76,-0.03,1.576,63.2,19.05,3.93,1,...,0,0,0,1,0,0,1,0,0,0
4,2022-01-01 06:00:04,-0.012,9.76,9.76,-0.03,1.578,63.16,19.05,3.994,1,...,0,0,0,1,0,0,1,0,0,0


In [3]:
# Convert the timestamp column to datetime if it's not already
df['timestamp'] = pd.to_datetime(df['timestamp'])

# Select the desired intervals
interval1 = df[(df['timestamp'] >= '2022-02-20 21:53:00') & (df['timestamp'] <= '2022-03-04 02:00:00')]
interval2 = df[(df['timestamp'] >= '2022-03-20 14:54:00') & (df['timestamp'] <= '2022-03-26 15:24:00')]
interval3 = df[df['timestamp'] >= '2022-05-24 12:00:00']

# Concatenate the selected intervals into a new DataFrame
df = pd.concat([interval1, interval2, interval3])

In [4]:
len(df)

1853029

In [5]:
df.columns

Index(['timestamp', 'TP2', 'TP3', 'H1', 'DV_pressure', 'Reservoirs',
       'Oil_temperature', 'Flowmeter', 'Motor_current', 'COMP', 'DV_eletric',
       'Towers', 'MPG', 'LPS', 'Pressure_switch', 'Oil_level',
       'Caudal_impulses', 'failure_type_No Failure', 'failure_type_Air Leak',
       'failure_type_Oil Leak', 'failure_component_No Failur Component',
       'failure_component_Clients', 'failure_component_Air Dryer',
       'failure_component_Compressor'],
      dtype='object')

In [6]:
sensor_columns = ['TP2', 'TP3', 'H1', 'DV_pressure', 'Reservoirs',
                  'Oil_temperature', 'Flowmeter', 'Motor_current', 'COMP', 'DV_eletric',
                  'Towers', 'MPG', 'LPS', 'Pressure_switch', 'Oil_level',
                  'Caudal_impulses']

# Find the data types of the sensor columns
print("Data types of sensor columns:")
print(df[sensor_columns].dtypes)

Data types of sensor columns:
TP2                float64
TP3                float64
H1                 float64
DV_pressure        float64
Reservoirs         float64
Oil_temperature    float64
Flowmeter          float64
Motor_current      float64
COMP                 int64
DV_eletric           int64
Towers               int64
MPG                  int64
LPS                  int64
Pressure_switch      int64
Oil_level            int64
Caudal_impulses      int64
dtype: object


In [7]:
# Change data types to float16 and int16
df[sensor_columns] = df[sensor_columns].astype({
    'TP2': 'float16',
    'TP3': 'float16',
    'H1': 'float16',
    'DV_pressure': 'float16',
    'Reservoirs': 'float16',
    'Oil_temperature': 'float16',
    'Flowmeter': 'float16',
    'Motor_current': 'float16',
    'COMP': 'int16',
    'DV_eletric': 'int16',
    'Towers': 'int16',
    'MPG': 'int16',
    'LPS': 'int16',
    'Pressure_switch': 'int16',
    'Oil_level': 'int16',
    'Caudal_impulses': 'int16'
})

# Find the updated data types of the sensor columns
print("Updated data types of sensor columns:")
print(df[sensor_columns].dtypes)

Updated data types of sensor columns:
TP2                float16
TP3                float16
H1                 float16
DV_pressure        float16
Reservoirs         float16
Oil_temperature    float16
Flowmeter          float16
Motor_current      float16
COMP                 int16
DV_eletric           int16
Towers               int16
MPG                  int16
LPS                  int16
Pressure_switch      int16
Oil_level            int16
Caudal_impulses      int16
dtype: object


In [8]:
# Filter the integer columns
integer_columns = df[sensor_columns].select_dtypes(include=['int16']).columns

# Get the minimum and maximum values for the integer columns
print("Minimum values of integer columns:")
print(df[integer_columns].min())

print("Maximum values of integer columns:")
print(df[integer_columns].max())

Minimum values of integer columns:
COMP               0
DV_eletric         0
Towers             0
MPG                0
LPS                0
Pressure_switch    0
Oil_level          0
Caudal_impulses    0
dtype: int16
Maximum values of integer columns:
COMP               1
DV_eletric         1
Towers             1
MPG                1
LPS                1
Pressure_switch    0
Oil_level          0
Caudal_impulses    1
dtype: int16


In [9]:
# Remove the 'Pressure_switch' column
df = df.drop(['Pressure_switch', 'Oil_level'], axis=1)

# Specify the columns to be scaled
columns_to_scale = ['TP2', 'TP3', 'H1', 'DV_pressure', 'Reservoirs',
                    'Oil_temperature', 'Flowmeter', 'Motor_current']

# Create a scaler object
scaler = MinMaxScaler()

# Fit the scaler on the specified columns
scaler.fit(df[columns_to_scale])

# Transform the specified columns using the fitted scaler
df[columns_to_scale] = scaler.transform(df[columns_to_scale])

# Print the updated DataFrame
df.head()

Unnamed: 0,timestamp,TP2,TP3,H1,DV_pressure,Reservoirs,Oil_temperature,Flowmeter,Motor_current,COMP,...,MPG,LPS,Caudal_impulses,failure_type_No Failure,failure_type_Air Leak,failure_type_Oil Leak,failure_component_No Failur Component,failure_component_Clients,failure_component_Air Dryer,failure_component_Compressor
3656176,2022-02-20 21:53:00,0.001839,0.804199,0.804688,0.001432,0.34375,0.506836,0.007812,0.002178,1,...,1,0,0,1,0,0,1,0,0,0
3656177,2022-02-20 21:53:01,0.001656,0.804199,0.804688,0.001194,0.341797,0.505859,0.007812,0.001635,1,...,1,0,0,1,0,0,1,0,0,0
3656178,2022-02-20 21:53:02,0.001839,0.803711,0.804688,0.001432,0.341797,0.506836,0.007812,0.001635,1,...,1,0,0,1,0,0,1,0,0,0
3656179,2022-02-20 21:53:03,0.001839,0.803711,0.804688,0.001194,0.341797,0.506836,0.007812,0.001635,1,...,1,0,0,1,0,0,1,0,0,0
3656180,2022-02-20 21:53:04,0.002024,0.803711,0.804688,0.001194,0.34375,0.506836,0.007812,0.001635,1,...,1,0,0,1,0,0,1,0,0,0


In [10]:
# Assuming X and y are already defined
X = df[['TP2', 'TP3', 'H1', 'DV_pressure', 'Reservoirs', 'Oil_temperature', 'Flowmeter', 'Motor_current', 'COMP', 'DV_eletric', 'Towers', 'MPG', 'LPS', 'Caudal_impulses']]
y = df[['failure_type_No Failure', 'failure_type_Air Leak', 'failure_type_Oil Leak', 'failure_component_No Failur Component', 'failure_component_Clients', 'failure_component_Air Dryer', 'failure_component_Compressor']]

# Calculate the size of the training set (100% of the data)
train_set_size = int(X.shape[0] * 0.6)  # Use 80% of the data for training

# Split the data into training and validation sets
X_train, X_val = X.iloc[:train_set_size], X.iloc[train_set_size:]
y_train, y_val = y.iloc[:train_set_size], y.iloc[train_set_size:]

# Reshape the input data for LSTM
X_train = X_train.values.reshape((-1, X_train.shape[1], 1))
X_val = X_val.values.reshape((-1, X_val.shape[1], 1))


In [11]:
X_train.shape

(1111817, 14, 1)

In [12]:
y_train.shape

(1111817, 7)

In [14]:
from sklearn.multiclass import OneVsRestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense



# Define the LSTM model
model = Sequential()
model.add(LSTM(64, input_shape=(X_train.shape[1], 1)))
model.add(Dense(y_train.shape[1], activation='sigmoid'))

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_val, y_val))




2024-05-21 22:14:48.428970: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE4.1 SSE4.2 AVX AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
  """Return the value for the smallest normal.
2024-05-21 22:14:52.090892: I tensorflow/core/common_runtime/process_util.cc:146] Creating new thread pool with default inter op setting: 2. Tune using inter_op_parallelism_threads for best performance.


Epoch 1/10


2024-05-21 22:14:52.349525: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_2_grad/concat/split_2/split_dim' with dtype int32
	 [[{{node gradients/split_2_grad/concat/split_2/split_dim}}]]
2024-05-21 22:14:52.351061: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_grad/concat/split/split_dim' with dtype int32
	 [[{{node gradients/split_grad/concat/split/split_dim}}]]
2024-05-21 22:14:52.352152: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You mus



KeyboardInterrupt: 