In [None]:
pip install ctgan

Collecting ctgan
  Downloading ctgan-0.10.2-py3-none-any.whl.metadata (10 kB)
Collecting rdt>=1.11.0 (from ctgan)
  Downloading rdt-1.14.0-py3-none-any.whl.metadata (10 kB)
Collecting Faker>=17 (from rdt>=1.11.0->ctgan)
  Downloading Faker-36.1.1-py3-none-any.whl.metadata (15 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=2.0.0->ctgan)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=2.0.0->ctgan)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=2.0.0->ctgan)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=2.0.0->ctgan)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from tor

In [None]:

pip install keras-tuner


Collecting keras-tuner
  Downloading keras_tuner-1.4.7-py3-none-any.whl.metadata (5.4 kB)
Collecting kt-legacy (from keras-tuner)
  Downloading kt_legacy-1.0.5-py3-none-any.whl.metadata (221 bytes)
Downloading keras_tuner-1.4.7-py3-none-any.whl (129 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/129.1 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m129.1/129.1 kB[0m [31m8.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading kt_legacy-1.0.5-py3-none-any.whl (9.6 kB)
Installing collected packages: kt-legacy, keras-tuner
Successfully installed keras-tuner-1.4.7 kt-legacy-1.0.5


In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
import keras_tuner as kt
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Bidirectional
from tensorflow.keras.regularizers import l2
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from ctgan import CTGAN

In [None]:
# Load dataset
data = pd.read_csv("Timestamp_Data.csv")

In [None]:

import pandas as pd

# Convert Timestamp column to datetime format
data['Timestamp'] = pd.to_datetime(data['Timestamp'], format='%M:%S.%f')

# Convert to total seconds
data['Timestamp'] = data['Timestamp'].dt.minute * 60 + data['Timestamp'].dt.second + data['Timestamp'].dt.microsecond / 1e6

print(data.head())  # Verify changes


   Timestamp  Pressure (kPa)  Voltage (V)     Vehicle State  \
0      371.1       64.065424     5.000000      Acceleration   
1     2003.7      119.041313     3.940121        Engine OFF   
2     2604.1       16.200441     3.582632  Throttle Release   
3     2586.2       19.689008     1.547226  Throttle Release   
4     3557.5       21.739076     5.000000  Throttle Release   

           Fault Type  
0  Power Supply Issue  
1        Sensor Fault  
2              Normal  
3  Power Supply Issue  
4  Power Supply Issue  


In [None]:
from sklearn.preprocessing import LabelEncoder

# Create label encoders
label_encoder_vehicle = LabelEncoder()
label_encoder_fault = LabelEncoder()

# Apply encoding
data['Vehicle State'] = label_encoder_vehicle.fit_transform(data['Vehicle State'])
data['Fault Type'] = label_encoder_fault.fit_transform(data['Fault Type'])

print(data.head())  # Verify encoding


   Timestamp  Pressure (kPa)  Voltage (V)  Vehicle State  Fault Type
0      371.1       64.065424     5.000000              0           1
1     2003.7      119.041313     3.940121              1           2
2     2604.1       16.200441     3.582632              3           0
3     2586.2       19.689008     1.547226              3           1
4     3557.5       21.739076     5.000000              3           1


In [None]:

# Balance 'Fault Type' using CTGAN
ctgan = CTGAN(epochs=2000)
ctgan.fit(data, discrete_columns=['Vehicle State', 'Fault Type'])
synthetic_data = ctgan.sample(len(data) * 2)  # Doubled the dataset


In [None]:

# 1. Adding Gaussian Noise
noise_std = 0.02
synthetic_data['Pressure (kPa)'] += np.random.normal(0, noise_std, size=len(synthetic_data))
synthetic_data['Voltage (V)'] += np.random.normal(0, noise_std, size=len(synthetic_data))


In [None]:
# 2. Jittering
jitter_factor = 0.01
synthetic_data['Pressure (kPa)'] *= (1 + np.random.uniform(-jitter_factor, jitter_factor, size=len(synthetic_data)))
synthetic_data['Voltage (V)'] *= (1 + np.random.uniform(-jitter_factor, jitter_factor, size=len(synthetic_data)))


In [None]:
# 3. Time Warping (Shifting data points slightly over time)
shift_range = 3
synthetic_data['Pressure (kPa)'] = np.roll(synthetic_data['Pressure (kPa)'], np.random.randint(-shift_range, shift_range))
synthetic_data['Voltage (V)'] = np.roll(synthetic_data['Voltage (V)'], np.random.randint(-shift_range, shift_range))


In [None]:
# 4. Feature Scaling Variations
scale_factor = 1.05
synthetic_data['Pressure (kPa)'] *= scale_factor
synthetic_data['Voltage (V)'] *= scale_factor

In [None]:
synthetic_data['Pressure_Diff'] = synthetic_data['Pressure (kPa)'].diff().fillna(0)
synthetic_data['Voltage_Diff'] = synthetic_data['Voltage (V)'].diff().fillna(0)
synthetic_data['Pressure_MA'] = synthetic_data['Pressure (kPa)'].rolling(window=3).mean().fillna(0)
synthetic_data['Voltage_MA'] = synthetic_data['Voltage (V)'].rolling(window=3).mean().fillna(0)


In [None]:
# Normalize numerical features
scaler = MinMaxScaler()
numeric_cols = ['Pressure (kPa)', 'Voltage (V)', 'Pressure_Diff', 'Voltage_Diff', 'Pressure_MA', 'Voltage_MA']
synthetic_data[numeric_cols] = scaler.fit_transform(synthetic_data[numeric_cols])


In [None]:
# Define target variable
synthetic_data['Fault Label'] = (synthetic_data['Fault Type'] == 2).astype(int)


In [None]:
def create_sequences(df, seq_length=20):
    sequences, labels = [], []
    for i in range(len(df) - seq_length):
        seq = df.iloc[i:i + seq_length, :-1].values
        label = df.iloc[i + seq_length, -1]
        sequences.append(seq)
        labels.append(label)
    return np.array(sequences), np.array(labels)


In [None]:
X, y = create_sequences(synthetic_data, seq_length=20)

In [None]:
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)


In [None]:
def build_model(hp):
    model = Sequential([
        Bidirectional(LSTM(hp.Int('lstm_units1', 128, 512, step=64), return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2]))),
        Dropout(hp.Choice('dropout1', [0.2, 0.3, 0.4])),

        Bidirectional(LSTM(hp.Int('lstm_units2', 64, 256, step=64), return_sequences=True)),
        Dropout(hp.Choice('dropout2', [0.2, 0.3, 0.4])),

        Bidirectional(LSTM(hp.Int('lstm_units3', 64, 256, step=64), return_sequences=False)),
        Dropout(hp.Choice('dropout3', [0.2, 0.3, 0.4])),

        Dense(hp.Int('dense_units1', 64, 256, step=64), activation='relu', kernel_regularizer=l2(0.01)),
        Dense(hp.Int('dense_units2', 64, 128, step=64), activation='relu', kernel_regularizer=l2(0.01)),
        Dense(1, activation='sigmoid')
    ])

    # **Ensure 'hp' is being used inside the function**
    optimizer = tf.keras.optimizers.AdamW(learning_rate=hp.Choice('learning_rate', [1e-3, 5e-4, 1e-4]))

    model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])

    return model


In [None]:
tuner = kt.RandomSearch(
    build_model,
    objective='val_accuracy',
    max_trials=10,
    directory='hyperparameter_tuning',
    project_name='LSTM_Tuning'
)

# Search for the best hyperparameters
tuner.search(X_train, y_train, epochs=50, validation_data=(X_test, y_test), batch_size=64, callbacks=[EarlyStopping(monitor='val_loss', patience=5)])

# Get the best hyperparameters
best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]

Trial 6 Complete [00h 59m 15s]
val_accuracy: 0.8320820927619934

Best val_accuracy So Far: 0.8320820927619934
Total elapsed time: 09h 39m 16s

Search: Running Trial #7

Value             |Best Value So Far |Hyperparameter
192               |256               |lstm_units1
0.4               |0.2               |dropout1
256               |64                |lstm_units2
0.3               |0.2               |dropout2
128               |256               |lstm_units3
0.2               |0.2               |dropout3
256               |192               |dense_units1
128               |128               |dense_units2
0.0001            |0.0001            |learning_rate

Epoch 1/50
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m172s[0m 636ms/step - accuracy: 0.8325 - loss: 4.1974 - val_accuracy: 0.8321 - val_loss: 2.8055
Epoch 2/50
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m203s[0m 639ms/step - accuracy: 0.8303 - loss: 2.4857 - val_accuracy: 0.8321 - val_loss: 1.6930


In [None]:
final_model = tuner.hypermodel.build(best_hps)

# Callbacks
early_stopping = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=10, verbose=1)

# Train the final model
final_model.fit(X_train, y_train, epochs=150, batch_size=best_hps.get('batch_size', 64), validation_data=(X_test, y_test), callbacks=[early_stopping, reduce_lr])

# Evaluate model
loss, accuracy = final_model.evaluate(X_test, y_test)
print(f"Test Accuracy: {accuracy * 100:.2f}%")

# Display dataset head
print(synthetic_data.head())