In [2]:
import pandas as pd
import numpy as np
from tensorflow.keras.models import Sequential, save_model
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

# Function to get confirmed input with optional validation
def get_confirmed_input(prompt, validation_func=None, error_message="Invalid input, please try again."):
    while True:
        user_input = input(prompt).strip()
        
        if validation_func:
            try:
                # Validate the input if a validation function is provided
                user_input = validation_func(user_input)
            except ValueError:
                print(error_message)
                continue
        
        confirm = input(f"You entered '{user_input}'. Is this correct? (y/n): ").strip().lower()
        if confirm == 'y':
            return user_input
        else:
            print("Let's try again.")

# Validation functions
def validate_integer(user_input):
    return int(user_input)  # Will raise ValueError if not an integer

def validate_single_column(user_input):
    if ',' in user_input or ' ' in user_input:
        raise ValueError("Only one column is allowed.")
    return user_input

# Step 1: Load the dataset and define the no fault and fault 1 subsets
file_name = get_confirmed_input("Enter the CSV filename (with .csv extension) to load the dataset: ")
data = pd.read_csv(file_name)
data = data.drop(0)  # Dropping the header row if necessary
data.reset_index(drop=True, inplace=True)  # Reset the index after dropping the row

# Step 2: Ask for data split indices for "no fault" and "fault" data
no_fault_end = get_confirmed_input("Enter the end row index for 'No Fault' data: ", validate_integer, "Please enter a valid integer.")
fault_1_end = get_confirmed_input("Enter the end row index for 'Fault 1' data: ", validate_integer, "Please enter a valid integer.")
fault_1_start = no_fault_end

# Divide the dataset into "no fault" and "fault 1" data based on user inputs
no_fault_data = data[:no_fault_end]
fault_1_data = data[fault_1_start:fault_1_end]

# Step 3: Separating features and labels
fault_columns = get_confirmed_input("Enter all the output coloumns (comma-separated, e.g., Fault_1,Fault_2,Fault_3): ").split(',')

# Get the target column with single column validation
target_column = get_confirmed_input("Enter the desired output fault column name (e.g., Fault_1): ", validate_single_column)

# Split function
def split_data(category_data, train_percent=0.6, val_percent=0.2, test_percent=0.2):
    train_end = int(train_percent * len(category_data))
    val_end = train_end + int(val_percent * len(category_data))
    train_data = category_data[:train_end]
    val_data = category_data[train_end:val_end]
    test_data = category_data[val_end:]
    return train_data, val_data, test_data

# Applying the split to both categories
no_fault_train, no_fault_val, no_fault_test = split_data(no_fault_data)
fault_1_train, fault_1_val, fault_1_test = split_data(fault_1_data)

# Concatenate each split part for final train, validation, and test sets
train_data = pd.concat([no_fault_train, fault_1_train], ignore_index=True)
val_data = pd.concat([no_fault_val, fault_1_val], ignore_index=True)
test_data = pd.concat([no_fault_test, fault_1_test], ignore_index=True)

# Separate features and labels based on user inputs
X_train = train_data.drop(columns=fault_columns)
# Save the number of columns (features) to a file for later validation
np.save("num_features.npy", X_train.shape[1])

y_train = train_data[target_column]

X_val = val_data.drop(columns=fault_columns)
y_val = val_data[target_column]

X_test = test_data.drop(columns=fault_columns)
y_test = test_data[target_column]

# Save the test data to a new .dat file
X_test.to_csv("X_test.dat", header=False, index=False, sep=" ")

# Standardizing the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)
X_test_scaled = scaler.transform(X_test)

# Train and save the model
def train_and_save_model():
    model = Sequential([
        Dense(128, activation='relu', input_shape=(X_train_scaled.shape[1],)),
        Dropout(0.3),
        Dense(64, activation='relu'),
        Dense(32, activation='relu'),
        Dense(1, activation='sigmoid')
    ])
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

    early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
    history = model.fit(
        X_train_scaled, y_train,
        epochs=30,
        batch_size=64,
        validation_split=0.2,
        callbacks=[early_stopping],
        verbose=1
    )
    
    test_loss, test_accuracy = model.evaluate(X_test_scaled, y_test, verbose=1)
    print(f"\nTest Accuracy: {test_accuracy:.4f}")
    
    save_model(model, "fault_detection_nn2.keras")
    np.save("scaler.npy", scaler.mean_)
    np.save("scaler_var.npy", scaler.var_)
    
    print("Model and scaler saved.")
    return history

# Run training
history = train_and_save_model()


Enter the CSV filename (with .csv extension) to load the dataset:  TEP_Fault_dataset_modified.csv
You entered 'TEP_Fault_dataset_modified.csv'. Is this correct? (y/n):  y
Enter the end row index for 'No Fault' data:  7201
You entered '7201'. Is this correct? (y/n):  y
Enter the end row index for 'Fault 1' data:  14402
You entered '14402'. Is this correct? (y/n):  y
Enter the fault columns to drop (comma-separated, e.g., Fault_1,Fault_2,Fault_3):  Fault_1,Fault_2,Fault_3
You entered 'Fault_1,Fault_2,Fault_3'. Is this correct? (y/n):  y
Enter the desired output fault column name (e.g., Fault_1):  Fault_1
You entered 'Fault_1'. Is this correct? (y/n):  y


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/30
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.9025 - loss: 0.3093 - val_accuracy: 1.0000 - val_loss: 0.0030
Epoch 2/30
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9986 - loss: 0.0100 - val_accuracy: 1.0000 - val_loss: 5.4325e-04
Epoch 3/30
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9988 - loss: 0.0069 - val_accuracy: 1.0000 - val_loss: 2.0836e-04
Epoch 4/30
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9993 - loss: 0.0041 - val_accuracy: 1.0000 - val_loss: 8.2802e-05
Epoch 5/30
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9989 - loss: 0.0038 - val_accuracy: 1.0000 - val_loss: 6.4030e-05
Epoch 6/30
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9994 - loss: 0.0031 - val_accuracy: 1.0000 - val_loss: 1.6781e-05
Epoch 7/