<a href="https://colab.research.google.com/github/KingT5M/NARENDRA-SINGLE-FAULT/blob/main/NARENDRA_SINGLE_FAULT.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [50]:
#import packages
import tensorflow as tf
import pandas as pd
import os
import numpy as np
import matplotlib.pyplot as plt 
import keras_tuner as kt
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, MinMaxScaler
from sklearn.metrics import precision_score, recall_score, f1_score
from scipy.stats import randint, uniform 
from tensorflow import keras
from keras import layers
from keras.models import Sequential
from keras.layers import Conv1D, BatchNormalization, MaxPooling1D, LSTM, Flatten, Dense, Dropout
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping

In [51]:
# Data is stored in CSV files
healthy_data_path = r'C:\Users\T5M\Desktop\NARENDRA-SINGLE FAULT\FAULT DATASET\rec3_002-rpm-healthy.csv'
gain_fault_path = r'C:\Users\T5M\Desktop\NARENDRA-SINGLE FAULT\FAULT DATASET\rec3_004-gain-2.csv'
offset_fault_path = r'C:\Users\T5M\Desktop\NARENDRA-SINGLE FAULT\FAULT DATASET\rec3_007-offset-1000.csv'
stuck_fault_path = r'C:\Users\T5M\Desktop\NARENDRA-SINGLE FAULT\FAULT DATASET\rec3_008-stuck.csv'
noise_fault_path = r'C:\Users\T5M\Desktop\NARENDRA-SINGLE FAULT\FAULT DATASET\rec3_009-noise.csv'
drift_fault_path = r'C:\Users\T5M\Desktop\NARENDRA-SINGLE FAULT\FAULT DATASET\rec3_010-drift.csv'
hard_over_fault_path = r'C:\Users\T5M\Desktop\NARENDRA-SINGLE FAULT\FAULT DATASET\rec3_010-hard-over.csv'
delay_fault_path = r'C:\Users\T5M\Desktop\NARENDRA-SINGLE FAULT\FAULT DATASET\rec3_012-delay-2.0.csv'
spike_fault_path = r'C:\Users\T5M\Desktop\NARENDRA-SINGLE FAULT\FAULT DATASET\rec3_012-spike.csv'

# Make sure time column has 3 decimal places and all data is upto a uniform time stamp
def process_file(file_path):
    # Read CSV file into a pandas DataFrame
    df = pd.read_csv(file_path)

    # Round off values in the first column to 3 decimal places
    df['time'] = df['time'].round(3)

    # Find the index where the 'time' column is greater than 300.66
    threshold_index = df[df['time'] > 300.66].index.min()

    # If the threshold is found, delete rows after it
    if not pd.isnull(threshold_index):
        df = df.loc[:threshold_index]

    # Save the modified DataFrame back to the CSV file
    df.to_csv(file_path, index=False)

# Apply the function to each file path
process_file(healthy_data_path)
process_file(gain_fault_path)
process_file(spike_fault_path)
process_file(hard_over_fault_path)
process_file(noise_fault_path)


In [None]:

# Visualize data
def visualize_data(file_path, fault_type):
    # Read the CSV file into a DataFrame
    df = pd.read_csv(file_path)

    # Plot 'Engine-RPM'
    plt.figure(figsize=(12, 6))
    plt.subplot(2, 1, 2)
    plt.plot(df['time'], label='time', color='blue')
    plt.title(f'time Plot - {fault_type}')
    plt.legend()
    
    plt.figure(figsize=(12, 6))
    plt.subplot(2, 1, 2)
    plt.plot(df['Engine-RPM'], label='Engine-RPM', color='orange')
    plt.title(f'Engine-RPM Plot - {fault_type}')
    plt.legend()

    plt.tight_layout()
    plt.show()

# Visualize data for each file
for file_path, fault_type in [(healthy_data_path, 'healthy'), (gain_fault_path, 'gain'),
                              (spike_fault_path, 'spike'),
                              (hard_over_fault_path, 'hard over'),
                              (noise_fault_path, 'noise')]:
    visualize_data(file_path, fault_type)

In [52]:
# File paths
file_paths = [
    r'C:\Users\T5M\Desktop\NARENDRA-SINGLE FAULT\FAULT DATASET\rec3_002-rpm-healthy.csv',
    r'C:\Users\T5M\Desktop\NARENDRA-SINGLE FAULT\FAULT DATASET\rec3_004-gain-2.csv',
    r'C:\Users\T5M\Desktop\NARENDRA-SINGLE FAULT\FAULT DATASET\rec3_007-offset-1000.csv',
    r'C:\Users\T5M\Desktop\NARENDRA-SINGLE FAULT\FAULT DATASET\rec3_008-stuck.csv',
    r'C:\Users\T5M\Desktop\NARENDRA-SINGLE FAULT\FAULT DATASET\rec3_009-noise.csv',
    r'C:\Users\T5M\Desktop\NARENDRA-SINGLE FAULT\FAULT DATASET\rec3_010-drift.csv',
    r'C:\Users\T5M\Desktop\NARENDRA-SINGLE FAULT\FAULT DATASET\rec3_010-hard-over.csv',
    r'C:\Users\T5M\Desktop\NARENDRA-SINGLE FAULT\FAULT DATASET\rec3_012-delay-2.0.csv',
    r'C:\Users\T5M\Desktop\NARENDRA-SINGLE FAULT\FAULT DATASET\rec3_012-spike.csv'
]

# Fault types
fault_types = [
    'healthy', 'gain', 'offset', 'stuck-at', 'noise', 'drift', 'hard-over', 'delay-time', 'spike'
]

# Iterate over each file
for file_path, fault_type in zip(file_paths, fault_types):
    # Read CSV file
    df = pd.read_csv(file_path)
    
    # Add Fault Type column and fill with respective data
    df['Fault Type'] = fault_type
    
    # Write back to CSV
    df.to_csv(file_path, index=False)

    print(f"Fault Type column added to {file_path} and filled with {fault_type} data.")


Fault Type column added to C:\Users\T5M\Desktop\NARENDRA-SINGLE FAULT\FAULT DATASET\rec3_002-rpm-healthy.csv and filled with healthy data.
Fault Type column added to C:\Users\T5M\Desktop\NARENDRA-SINGLE FAULT\FAULT DATASET\rec3_004-gain-2.csv and filled with gain data.
Fault Type column added to C:\Users\T5M\Desktop\NARENDRA-SINGLE FAULT\FAULT DATASET\rec3_007-offset-1000.csv and filled with offset data.
Fault Type column added to C:\Users\T5M\Desktop\NARENDRA-SINGLE FAULT\FAULT DATASET\rec3_008-stuck.csv and filled with stuck-at data.
Fault Type column added to C:\Users\T5M\Desktop\NARENDRA-SINGLE FAULT\FAULT DATASET\rec3_009-noise.csv and filled with noise data.
Fault Type column added to C:\Users\T5M\Desktop\NARENDRA-SINGLE FAULT\FAULT DATASET\rec3_010-drift.csv and filled with drift data.
Fault Type column added to C:\Users\T5M\Desktop\NARENDRA-SINGLE FAULT\FAULT DATASET\rec3_010-hard-over.csv and filled with hard-over data.
Fault Type column added to C:\Users\T5M\Desktop\NARENDRA-

In [53]:
# List to store DataFrames
dfs = []

# Read each CSV file and append to the list
for file_path in file_paths:
    df = pd.read_csv(file_path)
    dfs.append(df)

# Concatenate all DataFrames into a single DataFrame
concatenated_df = pd.concat(dfs, ignore_index=True)

# Display the concatenated DataFrame
print(concatenated_df)


            time  Engine-RPM Fault Type
0          0.001         0.0    healthy
1          0.002         2.0    healthy
2          0.003         2.0    healthy
3          0.004         2.0    healthy
4          0.005         2.0    healthy
...          ...         ...        ...
2284159  300.657       758.0      spike
2284160  300.658       758.0      spike
2284161  300.659       758.0      spike
2284162  300.660       758.0      spike
2284163  300.661       762.0      spike

[2284164 rows x 3 columns]


In [54]:
# Drop missing values
concatenated_df.dropna(inplace=True)

# Extract features and labels
X = concatenated_df.iloc[:, :2].values  # Features
y = concatenated_df.iloc[:, 2].values   # Labels

# Normalize the features
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

# One-hot encode the labels
encoder = OneHotEncoder(sparse_output=False)
y_encoded = encoder.fit_transform(y.reshape(-1, 1))

# Create sequences of 30
sequence_length = 30
X_sequences = []
y_sequences = []
for i in range(len(X_scaled) - sequence_length + 1):
    X_sequences.append(X_scaled[i:i+sequence_length])
    y_sequences.append(y_encoded[i+sequence_length-1])

# Convert sequences into arrays
X_array = np.array(X_sequences)
y_array = np.array(y_sequences)

print("Preprocessing completed successfully.")

# Now, you can use X_array and y_array for training your deep neural network.


Preprocessing completed successfully.


In [55]:
# Split the data into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X_array, y_array, test_size=0.2, random_state=42)

# Further split the training data into training and validation sets (80% train, 20% validation)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

print("Data split into training, validation, and testing sets successfully.")

print("Shapes of the datasets:")
print("X_train shape:", X_train.shape)
print("y_train shape:", y_train.shape)
print("X_val shape:", X_val.shape)
print("y_val shape:", y_val.shape)
print("X_test shape:", X_test.shape)
print("y_test shape:", y_test.shape)


Data split into training, validation, and testing sets successfully.
Shapes of the datasets:
X_train shape: (1461846, 30, 2)
y_train shape: (1461846, 9)
X_val shape: (365462, 30, 2)
y_val shape: (365462, 9)
X_test shape: (456827, 30, 2)
y_test shape: (456827, 9)


In [56]:
def model_builder(hp):
    model = Sequential()

    # Hyperparameters
    cnn_layers = hp.Int('cnn_layers', min_value=0, max_value=5, default=5)
    lstm_layers = hp.Int('lstm_layers', min_value=0, max_value=5, default=4)
    dense_layers = hp.Int('dense_layers', min_value=0, max_value=5, default=0)
    max_pooling = hp.Int('max_pooling', min_value=0, max_value=1, default=1)
    dropout = hp.Int('dropout', min_value=0, max_value=2, default=0)
    batch_norm = hp.Int('batch_norm', min_value=0, max_value=2, default=2)
    batch_size = hp.Int('batch_size', min_value=64, max_value=150, default=64)
    learning_rate = hp.Float('learning_rate', min_value=0.0001, max_value=0.001, default=0.0005, sampling='log')
    epochs = hp.Int('epochs', min_value=50, max_value=900, default=850)


    # CNN Layers
    for i in range(cnn_layers):
        model.add(Conv1D(filters=8, kernel_size=2, activation='relu', padding='same', input_shape=(30, 2)))
        if batch_norm:
            model.add(BatchNormalization())

    # Max Pooling Layer
    if max_pooling:
        model.add(MaxPooling1D(pool_size=2))

    # LSTM Layers
    for i in range(lstm_layers):
        model.add(LSTM(units=64, activation='relu', return_sequences=True))
        if batch_norm:
            model.add(BatchNormalization())

    # Flatten Layer
    model.add(Flatten())

    # Dense Layers
    for i in range(dense_layers):
        model.add(Dense(units=64, activation='relu'))
        if dropout:
            model.add(Dropout(0.5))
        if batch_norm:
            model.add(BatchNormalization())

    # Output Layer
    model.add(Dense(units=9, activation='softmax'))

    # Compile the model
    model.compile(optimizer=Adam(learning_rate=learning_rate),
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    
    #Print model structure
    model.summary()

    return model


In [46]:
class MetricsCallback(tf.keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs=None):
        y_pred = self.model.predict(X_val)
        y_pred_classes = np.argmax(y_pred, axis=1)
        y_true = np.argmax(y_val, axis=1)

        precision = precision_score(y_true, y_pred_classes, average='weighted')
        recall = recall_score(y_true, y_pred_classes, average='weighted')
        f1 = f1_score(y_true, y_pred_classes, average='weighted')

        print(f"Validation Precision: {precision:.4f}, Recall: {recall:.4f}, F1-Score: {f1:.4f}")

        if precision >= 0.9886 and recall >= 0.9890 and f1 >= 0.9888:
            print("Achieved desired metrics. Stopping training.")
            self.model.stop_training = True

# Define the tuner
tuner = kt.Hyperband(model_builder,
                     objective='val_accuracy',
                     max_epochs=50,
                     factor=3,
                     directory='hyperparameters_tuning',
                     project_name='fault_detection')

# Define metrics callback
metrics_callback = MetricsCallback()

# Perform the hyperparameter search
tuner.search(X_train, y_train, validation_data=(X_val, y_val), callbacks=[metrics_callback], batch_size=64, epochs=50)

# Get the best hyperparameters
best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]
print(f"Best hyperparameters found: {best_hps}")

# Build the model with the best hyperparameters
best_model = tuner.hypermodel.build(best_hps)

# Train the model
best_model.fit(X_train, y_train, validation_data=(X_val, y_val), batch_size=best_hps.get('batch_size'), epochs=best_hps.get('epochs'), callbacks=[metrics_callback])

# Save the best model
best_model.save('best_model.h5')

# Calculate metrics on the validation set
y_pred = best_model.predict(X_val)
y_pred_classes = np.argmax(y_pred, axis=1)
y_true = np.argmax(y_val, axis=1)

precision = precision_score(y_true, y_pred_classes, average=None)
recall = recall_score(y_true, y_pred_classes, average=None)
f1 = f1_score(y_true, y_pred_classes, average=None)

# Sensor fault types
fault_types = ['healthy', 'gain', 'offset', 'stuck-at', 'noise', 'drift', 'hard-over', 'delay-time', 'spike']

# Plotting precision
plt.figure(figsize=(10, 5))
plt.bar(fault_types, precision)
plt.title('Precision')
plt.xlabel('Fault Types')
plt.ylabel('Precision (%)')
plt.xticks(rotation=45)
plt.grid(axis='y')
plt.tight_layout()
plt.show()

# Plotting recall
plt.figure(figsize=(10, 5))
plt.bar(fault_types, recall)
plt.title('Recall')
plt.xlabel('Fault Types')
plt.ylabel('Recall (%)')
plt.xticks(rotation=45)
plt.grid(axis='y')
plt.tight_layout()
plt.show()

# Plotting F1-score
plt.figure(figsize=(10, 5))
plt.bar(fault_types, f1)
plt.title('F1-score')
plt.xlabel('Fault Types')
plt.ylabel('F1-score (%)')
plt.xticks(rotation=45)
plt.grid(axis='y')
plt.tight_layout()
plt.show()



Reloading Tuner from hyperparameters_tuning\fault_detection\tuner0.json

Search: Running Trial #4

Value             |Best Value So Far |Hyperparameter
4                 |2                 |cnn_layers
4                 |3                 |lstm_layers
3                 |3                 |dense_layers
1                 |1                 |max_pooling
2                 |2                 |dropout
1                 |1                 |batch_norm
121               |135               |batch_size
0.0001            |0.0001            |learning_rate
2                 |2                 |tuner/epochs
0                 |0                 |tuner/initial_epoch
3                 |3                 |tuner/bracket
0                 |0                 |tuner/round

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv1d (Conv1D)             (None, 30, 8)             40        
                      

KeyboardInterrupt: 