This section imports all necessary libraries and loads your data. It also creates the two different target variables:

y_stage1: Binary (0 for 'Normal', 1 for 'Fault')

y_stage2: Multiclass (0 for 'Normal', 1-20 for each fault type)

In [5]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, Flatten, MaxPooling2D, Dropout, LSTM
from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, accuracy_score

# --- HELPER FUNCTIONS ---

def get_factors(n):
    """Calculates two factors for reshaping (e.g., 51 -> (17, 3))"""
    for i in range(int(np.sqrt(n)), 0, -1):
        if n % i == 0:
            return (n // i, i)
    return (n, 1) # Fallback for prime numbers

def create_cnn_dataset(X, img_rows, img_cols):
    """Reshapes 1D feature vector into 2D image"""
    if X.shape[1] != img_rows * img_cols:
        raise ValueError(f"Cannot reshape {X.shape[1]} features into ({img_rows}, {img_cols})")
    return X.reshape((X.shape[0], img_rows, img_cols, 1))

def create_lstm_dataset(X, y, time_steps=10):
    """Converts data into sequences for LSTM"""
    Xs, ys = [], []
    for i in range(len(X) - time_steps):
        v = X[i:(i + time_steps)]
        Xs.append(v)
        ys.append(y[i + time_steps])
    return np.array(Xs), np.array(ys)
    
def pad_features(X, target_features):
    """Pads feature vector X to have target_features columns"""
    num_samples = X.shape[0]
    num_features = X.shape[1]
    pad_width = target_features - num_features
    
    if pad_width < 0:
        # This shouldn't happen, but good to check
        return X[:, :target_features]
        
    # Create a padding array of zeros
    padding = np.zeros((num_samples, pad_width))
    # Concatenate original features with padding
    return np.concatenate([X, padding], axis=1)

# --- 1. DATA PREPARATION (Using your code) ---
print("Loading data...")
train_data = pd.read_csv('modified_data_train.csv')
test_data  = pd.read_csv('modified_data_test.csv')

train_data['binary_fault'] = train_data['faultNumber'].apply(lambda x: 0 if x == 0 else 1)
test_data['binary_fault'] = test_data['faultNumber'].apply(lambda x: 0 if x == 0 else 1)

drop_cols = ['faultNumber', 'simulationRun', 'sample','binary_fault']

X_train = train_data.drop(columns=drop_cols, errors='ignore')
y_train = train_data['binary_fault']

X_test = test_data.drop(columns=drop_cols, errors='ignore')
y_test = test_data['binary_fault']

# Find common columns to fix potential mismatch
common_cols = sorted(list(set(X_train.columns).intersection(set(X_test.columns))))

X_train = X_train[common_cols]
X_test  = X_test[common_cols]

y_train = y_train.astype(int)
y_test = y_test.astype(int)

print(f"Data prepared with {len(common_cols)} common features.")

# --- 2. SCALING ---
print("Scaling data...")
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

N_FEATURES = X_train_scaled.shape[1] # This will be 37

# --- 3. MODEL 1: CNN FOR STAGE 1 (DETECTION) --- [CORRECTED SECTION]

print("\n--- Preparing CNN Data ---")

# Find next perfect square to pad to
TARGET_DIM = int(np.ceil(np.sqrt(N_FEATURES))) # ceil(sqrt(37)) = 7
IMG_ROWS, IMG_COLS = TARGET_DIM, TARGET_DIM     # 7, 7
TARGET_FEATURES = TARGET_DIM * TARGET_DIM      # 49

print(f"Original features: {N_FEATURES}. Padding to {TARGET_FEATURES} for a ({IMG_ROWS}, {IMG_COLS}) image.")

# Pad the scaled data
X_train_padded = pad_features(X_train_scaled, TARGET_FEATURES)
X_test_padded = pad_features(X_test_scaled, TARGET_FEATURES)

CNN_INPUT_SHAPE = (IMG_ROWS, IMG_COLS, 1) # (7, 7, 1)

# Use the *padded* data to create the CNN dataset
X_train_cnn = create_cnn_dataset(X_train_padded, IMG_ROWS, IMG_COLS)
X_test_cnn = create_cnn_dataset(X_test_padded, IMG_ROWS, IMG_COLS)


def build_cnn_stage1(input_shape):
    model = Sequential()
    # This layer now works with input_shape=(7, 7, 1)
    model.add(Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=input_shape))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))
    model.add(Flatten())
    model.add(Dense(128, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(1, activation='sigmoid'))
    
    model.compile(loss='binary_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])
    return model

print("\n--- Training CNN Stage 1 (Detection) ---")
cnn_model_s1 = build_cnn_stage1(CNN_INPUT_SHAPE)
cnn_model_s1.summary()

# Fit on the padded CNN data
cnn_model_s1.fit(X_train_cnn, y_train,
                 batch_size=128,
                 epochs=10,
                 validation_split=0.1)

print("\n--- CNN Stage 1 Evaluation ---")
# Predict on the padded test data
y_pred_cnn_s1 = (cnn_model_s1.predict(X_test_cnn) > 0.5).astype("int32")
print(classification_report(y_test, y_pred_cnn_s1))

# --- 4. MODEL 2: LSTM FOR STAGE 1 (DETECTION) --- [UNCHANGED]

print("\n--- Preparing LSTM Data ---")
TIME_STEPS = 10
# Use the *original* N_FEATURES (37) for the LSTM
LSTM_INPUT_SHAPE = (TIME_STEPS, N_FEATURES) 

# Create sequences from the *original, unpadded* scaled data
X_train_lstm, y_train_lstm = create_lstm_dataset(X_train_scaled, y_train.values, TIME_STEPS)
X_test_lstm, y_test_lstm = create_lstm_dataset(X_test_scaled, y_test.values, TIME_STEPS)

def build_lstm_stage1(input_shape):
    model = Sequential()
    model.add(LSTM(50, activation='relu', input_shape=input_shape, return_sequences=True))
    model.add(LSTM(50, activation='relu'))
    model.add(Dense(50, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))
    
    model.compile(optimizer='adam', 
                  loss='binary_crossentropy', 
                  metrics=['accuracy'])
    return model

print("\n--- Training LSTM Stage 1 (Detection) ---")
lstm_model_s1 = build_lstm_stage1(LSTM_INPUT_SHAPE)
lstm_model_s1.summary()

lstm_model_s1.fit(X_train_lstm, y_train_lstm,
                  batch_size=128,
                  epochs=10,
                  validation_split=0.1)
                  
print("\n--- LSTM Stage 1 Evaluation ---")
y_pred_lstm_s1 = (lstm_model_s1.predict(X_test_lstm) > 0.5).astype("int32")
print(classification_report(y_test_lstm, y_pred_lstm_s1))

Loading data...
Data prepared with 37 common features.
Scaling data...

--- Preparing CNN Data ---
Original features: 37. Padding to 49 for a (7, 7) image.

--- Training CNN Stage 1 (Detection) ---
Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_2 (Conv2D)           (None, 5, 5, 32)          320       
                                                                 
 max_pooling2d (MaxPooling2D  (None, 2, 2, 32)         0         
 )                                                               
                                                                 
 dropout (Dropout)           (None, 2, 2, 32)          0         
                                                                 
 flatten (Flatten)           (None, 128)               0         
                                                                 
 dense (Dense)               (None, 128)              

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


              precision    recall  f1-score   support

           0       0.00      0.00      0.00      2820
           1       0.94      1.00      0.97     47940

    accuracy                           0.94     50760
   macro avg       0.47      0.50      0.49     50760
weighted avg       0.89      0.94      0.92     50760


--- Preparing LSTM Data ---

--- Training LSTM Stage 1 (Detection) ---
Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 10, 50)            17600     
                                                                 
 lstm_1 (LSTM)               (None, 50)                20200     
                                                                 
 dense_2 (Dense)             (None, 50)                2550      
                                                                 
 dense_3 (Dense)             (None, 1)             

We can see that due to the imbalance present in the data which results in low precision and f1-score too. Now, to tackle this we could use oversampling, undersampling or SMOTE technique too but as the architecture of the model is complex thus using these techniques may result in slow model performance. Thus we are using class weights here.

In [6]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, Flatten, MaxPooling2D, Dropout, LSTM
from tensorflow.keras.metrics import AUC
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report
from sklearn.utils import class_weight

# --- HELPER FUNCTIONS ---

def get_factors(n):
    for i in range(int(np.sqrt(n)), 0, -1):
        if n % i == 0:
            return (n // i, i)
    return (n, 1)

def create_cnn_dataset(X, img_rows, img_cols):
    if X.shape[1] != img_rows * img_cols:
        raise ValueError(f"Cannot reshape {X.shape[1]} features into ({img_rows}, {img_cols})")
    return X.reshape((X.shape[0], img_rows, img_cols, 1))

def create_lstm_dataset(X, y, time_steps=10):
    Xs, ys = [], []
    for i in range(len(X) - time_steps):
        v = X[i:(i + time_steps)]
        Xs.append(v)
        ys.append(y[i + time_steps])
    return np.array(Xs), np.array(ys)
    
def pad_features(X, target_features):
    num_samples = X.shape[0]
    num_features = X.shape[1]
    pad_width = target_features - num_features
    if pad_width < 0:
        return X[:, :target_features]
    padding = np.zeros((num_samples, pad_width))
    return np.concatenate([X, padding], axis=1)

# --- 1. DATA PREPARATION ---
print("Loading data...")
train_data = pd.read_csv('modified_data_train.csv')
test_data  = pd.read_csv('modified_data_test.csv')

train_data['binary_fault'] = train_data['faultNumber'].apply(lambda x: 0 if x == 0 else 1)
test_data['binary_fault'] = test_data['faultNumber'].apply(lambda x: 0 if x == 0 else 1)

drop_cols = ['faultNumber', 'simulationRun', 'sample','binary_fault']

X_train = train_data.drop(columns=drop_cols, errors='ignore')
y_train = train_data['binary_fault']

X_test = test_data.drop(columns=drop_cols, errors='ignore')
y_test = test_data['binary_fault']

common_cols = sorted(list(set(X_train.columns).intersection(set(X_test.columns))))
X_train = X_train[common_cols]
X_test  = X_test[common_cols]

y_train = y_train.astype(int)
y_test = y_test.astype(int)

print(f"Data prepared with {len(common_cols)} common features.")

# --- 2. SCALING ---
print("Scaling data...")
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

N_FEATURES = X_train_scaled.shape[1]

# --- [NEW] CALCULATE CLASS WEIGHTS ---
print("Calculating class weights...")
weights = class_weight.compute_class_weight(
    'balanced',
    classes=np.unique(y_train),
    y=y_train
)
class_weights_dict = dict(zip(np.unique(y_train), weights))
print(f"Class weights calculated: {class_weights_dict}")

# --- 3. MODEL 1: CNN FOR STAGE 1 (DETECTION) ---

print("\n--- Preparing CNN Data ---")
TARGET_DIM = int(np.ceil(np.sqrt(N_FEATURES)))
IMG_ROWS, IMG_COLS = TARGET_DIM, TARGET_DIM
TARGET_FEATURES = TARGET_DIM * TARGET_DIM

print(f"Original features: {N_FEATURES}. Padding to {TARGET_FEATURES} for a ({IMG_ROWS}, {IMG_COLS}) image.")

X_train_padded = pad_features(X_train_scaled, TARGET_FEATURES)
X_test_padded = pad_features(X_test_scaled, TARGET_FEATURES)

CNN_INPUT_SHAPE = (IMG_ROWS, IMG_COLS, 1)

X_train_cnn = create_cnn_dataset(X_train_padded, IMG_ROWS, IMG_COLS)
X_test_cnn = create_cnn_dataset(X_test_padded, IMG_ROWS, IMG_COLS)

def build_cnn_stage1(input_shape):
    model = Sequential()
    model.add(Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=input_shape))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))
    model.add(Flatten())
    model.add(Dense(128, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(1, activation='sigmoid'))
    
    model.compile(loss='binary_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy', AUC(name='auc')]) # Added AUC
    return model

print("\n--- Training CNN Stage 1 (Detection) ---")
cnn_model_s1 = build_cnn_stage1(CNN_INPUT_SHAPE)
cnn_model_s1.summary()

cnn_model_s1.fit(X_train_cnn, y_train,
                 batch_size=128,
                 epochs=10,
                 validation_split=0.1,
                 class_weight=class_weights_dict) # <-- ADDED CLASS WEIGHTS

print("\n--- CNN Stage 1 Evaluation ---")
y_pred_cnn_s1 = (cnn_model_s1.predict(X_test_cnn) > 0.5).astype("int32")
print(classification_report(y_test, y_pred_cnn_s1))

# --- 4. MODEL 2: LSTM FOR STAGE 1 (DETECTION) ---

print("\n--- Preparing LSTM Data ---")
TIME_STEPS = 10
LSTM_INPUT_SHAPE = (TIME_STEPS, N_FEATURES) 

X_train_lstm, y_train_lstm = create_lstm_dataset(X_train_scaled, y_train.values, TIME_STEPS)
X_test_lstm, y_test_lstm = create_lstm_dataset(X_test_scaled, y_test.values, TIME_STEPS)

# Calculate weights for the time-stepped data
weights_lstm = class_weight.compute_class_weight(
    'balanced',
    classes=np.unique(y_train_lstm),
    y=y_train_lstm
)
class_weights_dict_lstm = dict(zip(np.unique(y_train_lstm), weights_lstm))

def build_lstm_stage1(input_shape):
    model = Sequential()
    model.add(LSTM(50, activation='relu', input_shape=input_shape, return_sequences=True))
    model.add(LSTM(50, activation='relu'))
    model.add(Dense(50, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))
    
    model.compile(optimizer='adam', 
                  loss='binary_crossentropy', 
                  metrics=['accuracy', AUC(name='auc')]) # Added AUC
    return model

print("\n--- Training LSTM Stage 1 (Detection) ---")
lstm_model_s1 = build_lstm_stage1(LSTM_INPUT_SHAPE)
lstm_model_s1.summary()

lstm_model_s1.fit(X_train_lstm, y_train_lstm,
                  batch_size=128,
                  epochs=10,
                  validation_split=0.1,
                  class_weight=class_weights_dict_lstm) # <-- ADDED CLASS WEIGHTS
                  
print("\n--- LSTM Stage 1 Evaluation ---")
y_pred_lstm_s1 = (lstm_model_s1.predict(X_test_lstm) > 0.5).astype("int32")
print(classification_report(y_test_lstm, y_pred_lstm_s1))

Loading data...
Data prepared with 37 common features.
Scaling data...
Calculating class weights...
Class weights calculated: {0: 9.0, 1: 0.5294117647058824}

--- Preparing CNN Data ---
Original features: 37. Padding to 49 for a (7, 7) image.

--- Training CNN Stage 1 (Detection) ---
Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_3 (Conv2D)           (None, 5, 5, 32)          320       
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 2, 2, 32)         0         
 2D)                                                             
                                                                 
 dropout_2 (Dropout)         (None, 2, 2, 32)          0         
                                                                 
 flatten_1 (Flatten)         (None, 128)               0         
                                 

In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, Flatten, MaxPooling2D, Dropout, LSTM
from tensorflow.keras.metrics import AUC
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report
from sklearn.utils import class_weight
from tensorflow.keras.callbacks import EarlyStopping # <--- IMPORT

# --- HELPER FUNCTIONS ---

def get_factors(n):
    for i in range(int(np.sqrt(n)), 0, -1):
        if n % i == 0:
            return (n // i, i)
    return (n, 1)

def create_cnn_dataset(X, img_rows, img_cols):
    if X.shape[1] != img_rows * img_cols:
        raise ValueError(f"Cannot reshape {X.shape[1]} features into ({img_rows}, {img_cols})")
    return X.reshape((X.shape[0], img_rows, img_cols, 1))

def create_lstm_dataset(X, y, time_steps=10):
    Xs, ys = [], []
    for i in range(len(X) - time_steps):
        v = X[i:(i + time_steps)]
        Xs.append(v)
        ys.append(y[i + time_steps])
    return np.array(Xs), np.array(ys)
    
def pad_features(X, target_features):
    num_samples = X.shape[0]
    num_features = X.shape[1]
    pad_width = target_features - num_features
    if pad_width < 0:
        return X[:, :target_features]
    padding = np.zeros((num_samples, pad_width))
    return np.concatenate([X, padding], axis=1)

# --- 1. DATA PREPARATION ---
print("Loading data...")
train_data = pd.read_csv('modified_data_train.csv')
test_data  = pd.read_csv('modified_data_test.csv')

train_data['binary_fault'] = train_data['faultNumber'].apply(lambda x: 0 if x == 0 else 1)
test_data['binary_fault'] = test_data['faultNumber'].apply(lambda x: 0 if x == 0 else 1)

drop_cols = ['faultNumber', 'simulationRun', 'sample','binary_fault']

X_train = train_data.drop(columns=drop_cols, errors='ignore')
y_train = train_data['binary_fault']

X_test = test_data.drop(columns=drop_cols, errors='ignore')
y_test = test_data['binary_fault']

common_cols = sorted(list(set(X_train.columns).intersection(set(X_test.columns))))
X_train = X_train[common_cols]
X_test  = X_test[common_cols]

y_train = y_train.astype(int)
y_test = y_test.astype(int)

print(f"Data prepared with {len(common_cols)} common features.")

# --- 2. SCALING ---
print("Scaling data...")
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

N_FEATURES = X_train_scaled.shape[1]

# --- CALCULATE CLASS WEIGHTS ---
print("Calculating class weights...")
weights = class_weight.compute_class_weight(
    'balanced',
    classes=np.unique(y_train),
    y=y_train
)
class_weights_dict = dict(zip(np.unique(y_train), weights))
print(f"Class weights calculated: {class_weights_dict}")

# --- [NEW] DEFINE EARLY STOPPING ---
early_stopper = EarlyStopping(
    monitor='val_loss', # Watch the validation loss
    patience=3,         # Stop if it doesn't improve for 3 epochs
    verbose=1,
    restore_best_weights=True # Restore the best model weights
)

# --- 3. MODEL 1: CNN FOR STAGE 1 (DETECTION) ---

print("\n--- Preparing CNN Data ---")
TARGET_DIM = int(np.ceil(np.sqrt(N_FEATURES)))
IMG_ROWS, IMG_COLS = TARGET_DIM, TARGET_DIM
TARGET_FEATURES = TARGET_DIM * TARGET_DIM

print(f"Original features: {N_FEATURES}. Padding to {TARGET_FEATURES} for a ({IMG_ROWS}, {IMG_COLS}) image.")

X_train_padded = pad_features(X_train_scaled, TARGET_FEATURES)
X_test_padded = pad_features(X_test_scaled, TARGET_FEATURES)

CNN_INPUT_SHAPE = (IMG_ROWS, IMG_COLS, 1)

X_train_cnn = create_cnn_dataset(X_train_padded, IMG_ROWS, IMG_COLS)
X_test_cnn = create_cnn_dataset(X_test_padded, IMG_ROWS, IMG_COLS)

def build_cnn_stage1(input_shape):
    model = Sequential()
    model.add(Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=input_shape))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))
    model.add(Flatten())
    model.add(Dense(128, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(1, activation='sigmoid'))
    
    model.compile(loss='binary_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy', AUC(name='auc')])
    return model

print("\n--- Training CNN Stage 1 (Detection) ---")
cnn_model_s1 = build_cnn_stage1(CNN_INPUT_SHAPE)
cnn_model_s1.summary()

cnn_model_s1.fit(X_train_cnn, y_train,
                 batch_size=128,
                 epochs=20, # Increase epochs, EarlyStopping will handle it
                 validation_split=0.1,
                 class_weight=class_weights_dict,
                 callbacks=[early_stopper]) # <-- ADDED CALLBACK

print("\n--- CNN Stage 1 Evaluation ---")
y_pred_cnn_s1 = (cnn_model_s1.predict(X_test_cnn) > 0.5).astype("int32")
print(classification_report(y_test, y_pred_cnn_s1))

# --- 4. MODEL 2: LSTM FOR STAGE 1 (DETECTION) ---

print("\n--- Preparing LSTM Data ---")
TIME_STEPS = 10
LSTM_INPUT_SHAPE = (TIME_STEPS, N_FEATURES) 

X_train_lstm, y_train_lstm = create_lstm_dataset(X_train_scaled, y_train.values, TIME_STEPS)
X_test_lstm, y_test_lstm = create_lstm_dataset(X_test_scaled, y_test.values, TIME_STEPS)

weights_lstm = class_weight.compute_class_weight(
    'balanced',
    classes=np.unique(y_train_lstm),
    y=y_train_lstm
)
class_weights_dict_lstm = dict(zip(np.unique(y_train_lstm), weights_lstm))

def build_lstm_stage1(input_shape):
    model = Sequential()
    model.add(LSTM(50, activation='relu', input_shape=input_shape, return_sequences=True))
    model.add(LSTM(50, activation='relu'))
    model.add(Dense(50, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))
    
    model.compile(optimizer='adam', 
                  loss='binary_crossentropy', 
                  metrics=['accuracy', AUC(name='auc')])
    return model

print("\n--- Training LSTM Stage 1 (Detection) ---")
lstm_model_s1 = build_lstm_stage1(LSTM_INPUT_SHAPE)
lstm_model_s1.summary()

lstm_model_s1.fit(X_train_lstm, y_train_lstm,
                  batch_size=128,
                  epochs=20, # Increase epochs, EarlyStopping will handle it
                  validation_split=0.1,
                  class_weight=class_weights_dict_lstm,
                  callbacks=[early_stopper]) # <-- ADDED CALLBACK
                  
print("\n--- LSTM Stage 1 Evaluation ---")
y_pred_lstm_s1 = (lstm_model_s1.predict(X_test_lstm) > 0.5).astype("int32")
print(classification_report(y_test_lstm, y_pred_lstm_s1))

The EarlyStopping callback monitored the validation loss (val_loss) and prevented both models from overfitting.

For the CNN: It stopped training after Epoch 4 and restored the weights from Epoch 1, which had the best val_loss (0.6686). This tells us the CNN model wasn't improving after the very first epoch.

For the LSTM: It trained for 12 epochs, but it restored the weights from Epoch 9, which had the best val_loss (0.5089). This is exactly what we wanted. It found the "sweet spot" before the model started to overfit (which we saw in your previous run).

## Stage 2 - Multiclass Classification

In [13]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, Flatten, MaxPooling2D, Dropout, LSTM
from tensorflow.keras.metrics import AUC
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report
from sklearn.utils import class_weight

# --- HELPER FUNCTIONS ---

def get_factors(n):
    for i in range(int(np.sqrt(n)), 0, -1):
        if n % i == 0:
            return (n // i, i)
    return (n, 1)

def create_cnn_dataset(X, img_rows, img_cols):
    if X.shape[1] != img_rows * img_cols:
        raise ValueError(f"Cannot reshape {X.shape[1]} features into ({img_rows}, {img_cols})")
    return X.reshape((X.shape[0], img_rows, img_cols, 1))

def create_lstm_dataset(X, y, time_steps=10):
    Xs, ys = [], []
    for i in range(len(X) - time_steps):
        v = X[i:(i + time_steps)]
        Xs.append(v)
        ys.append(y[i + time_steps])
    return np.array(Xs), np.array(ys)
    
def pad_features(X, target_features):
    num_samples = X.shape[0]
    num_features = X.shape[1]
    pad_width = target_features - num_features
    if pad_width < 0:
        return X[:, :target_features]
    padding = np.zeros((num_samples, pad_width))
    return np.concatenate([X, padding], axis=1)

# --- 1. DATA PREPARATION (for Multiclass) ---
print("Loading data...")
train_data = pd.read_csv('modified_data_train.csv')
test_data  = pd.read_csv('modified_data_test.csv')

drop_cols = ['faultNumber', 'simulationRun', 'sample']

# --- [FIX 1: Cast labels to int] ---
X_train = train_data.drop(columns=drop_cols, errors='ignore')
y_train_labels = train_data['faultNumber'].astype(int)

X_test = test_data.drop(columns=drop_cols, errors='ignore')
y_test_labels = test_data['faultNumber'].astype(int)
# --- [END OF FIX 1] ---

common_cols = sorted(list(set(X_train.columns).intersection(set(X_test.columns))))

X_train = X_train[common_cols]
X_test  = X_test[common_cols]

print(f"Data prepared with {len(common_cols)} common features.")

# Determine number of classes
all_labels = pd.concat([y_train_labels, y_test_labels])
N_CLASSES = int(all_labels.max()) + 1 

print(f"Found {len(all_labels.unique())} unique fault types. Setting N_CLASSES to {N_CLASSES}.")

# Convert labels to categorical (one-hot encoding)
y_train_cat = to_categorical(y_train_labels, num_classes=N_CLASSES)
y_test_cat = to_categorical(y_test_labels, num_classes=N_CLASSES)


# --- 2. SCALING ---
print("Scaling data...")
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

N_FEATURES = X_train_scaled.shape[1]

# --- [FIX 2: Create a full class_weight dictionary] ---
print("Calculating class weights...")

# Get the unique integer labels from the training set
unique_labels = np.unique(y_train_labels)

# Calculate weights only for the classes present
weights = class_weight.compute_class_weight(
    'balanced',
    classes=unique_labels,
    y=y_train_labels
)

# Create a full dictionary mapping ALL classes (0 to N_CLASSES-1) to 1.0
class_weights_dict = {i: 1.0 for i in range(N_CLASSES)}

# Update the dictionary with the calculated weights for the classes we have
calculated_dict = dict(zip(unique_labels, weights))
class_weights_dict.update(calculated_dict)

print(f"Class weights calculated for {len(unique_labels)} classes and applied to all {N_CLASSES} classes.")
# --- [END OF FIX 2] ---

# --- DEFINE EARLY STOPPING ---
early_stopper = EarlyStopping(
    monitor='val_loss',
    patience=3,
    verbose=1,
    restore_best_weights=True
)


# --- 3. MODEL 1: CNN FOR STAGE 2 (CLASSIFICATION) ---

print("\n--- Preparing CNN Data ---")
TARGET_DIM = int(np.ceil(np.sqrt(N_FEATURES)))
IMG_ROWS, IMG_COLS = TARGET_DIM, TARGET_DIM
TARGET_FEATURES = TARGET_DIM * TARGET_DIM

print(f"Original features: {N_FEATURES}. Padding to {TARGET_FEATURES} for a ({IMG_ROWS}, {IMG_COLS}) image.")

X_train_padded = pad_features(X_train_scaled, TARGET_FEATURES)
X_test_padded = pad_features(X_test_scaled, TARGET_FEATURES)

CNN_INPUT_SHAPE = (IMG_ROWS, IMG_COLS, 1)

X_train_cnn = create_cnn_dataset(X_train_padded, IMG_ROWS, IMG_COLS)
X_test_cnn = create_cnn_dataset(X_test_padded, IMG_ROWS, IMG_COLS)


def build_cnn_stage2(input_shape, num_classes):
    model = Sequential()
    model.add(Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=input_shape))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))
    model.add(Flatten())
    model.add(Dense(128, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(num_classes, activation='softmax'))
    
    model.compile(loss='categorical_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])
    return model

print("\n--- Training CNN Stage 2 (Classification) ---")
cnn_model_s2 = build_cnn_stage2(CNN_INPUT_SHAPE, N_CLASSES)
cnn_model_s2.summary()

# Pass the fixed, full class_weights_dict
cnn_model_s2.fit(X_train_cnn, y_train_cat,
                 batch_size=128,
                 epochs=20,
                 validation_split=0.1,
                 class_weight=class_weights_dict,
                 callbacks=[early_stopper])

print("\n--- CNN Stage 2 Evaluation ---")
y_pred_cnn_s2_probs = cnn_model_s2.predict(X_test_cnn)
y_pred_cnn_s2_labels = np.argmax(y_pred_cnn_s2_probs, axis=1)
print(classification_report(y_test_labels, y_pred_cnn_s2_labels, zero_division=0))


# --- 4. MODEL 2: LSTM FOR STAGE 2 (CLASSIFICATION) ---

print("\n--- Preparing LSTM Data ---")
TIME_STEPS = 10
LSTM_INPUT_SHAPE = (TIME_STEPS, N_FEATURES) 

X_train_lstm, y_train_lstm_cat = create_lstm_dataset(X_train_scaled, y_train_cat, TIME_STEPS)
X_test_lstm, y_test_lstm_cat = create_lstm_dataset(X_test_scaled, y_test_cat, TIME_STEPS)

_, y_test_lstm_labels = create_lstm_dataset(X_test_scaled, y_test_labels.values, TIME_STEPS)

# --- [FIX 2 applied to LSTM weights] ---
_, y_train_lstm_labels = create_lstm_dataset(X_train_scaled, y_train_labels.values, TIME_STEPS)

# Get unique integer labels from the time-stepped training data
unique_labels_lstm = np.unique(y_train_lstm_labels)

weights_lstm = class_weight.compute_class_weight(
    'balanced',
    classes=unique_labels_lstm,
    y=y_train_lstm_labels
)

# Create a full dictionary for all classes
class_weights_dict_lstm = {i: 1.0 for i in range(N_CLASSES)}

# Update with calculated weights
calculated_dict_lstm = dict(zip(unique_labels_lstm, weights_lstm))
class_weights_dict_lstm.update(calculated_dict_lstm)
# --- [END OF FIX 2] ---


def build_lstm_stage2(input_shape, num_classes):
    model = Sequential()
    model.add(LSTM(50, activation='relu', input_shape=input_shape, return_sequences=True))
    model.add(LSTM(50, activation='relu'))
    model.add(Dense(50, activation='relu'))
    model.add(Dense(num_classes, activation='softmax'))
    
    model.compile(optimizer='adam', 
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    return model

print("\n--- Training LSTM Stage 2 (Classification) ---")
lstm_model_s2 = build_lstm_stage2(LSTM_INPUT_SHAPE, N_CLASSES)
lstm_model_s2.summary()

# Pass the fixed, full class_weights_dict_lstm
lstm_model_s2.fit(X_train_lstm, y_train_lstm_cat,
                  batch_size=128,
                  epochs=20,
                  validation_split=0.1,
                  class_weight=class_weights_dict_lstm,
                  callbacks=[early_stopper])
                  
print("\n--- LSTM Stage 2 Evaluation ---")
y_pred_lstm_s2_probs = lstm_model_s2.predict(X_test_lstm)
y_pred_lstm_s2_labels = np.argmax(y_pred_lstm_s2_probs, axis=1)

print(classification_report(y_test_lstm_labels, y_pred_lstm_s2_labels, zero_division=0))

Loading data...
Data prepared with 37 common features.
Found 18 unique fault types. Setting N_CLASSES to 21.
Scaling data...
Calculating class weights...
Class weights calculated for 18 classes and applied to all 21 classes.

--- Preparing CNN Data ---
Original features: 37. Padding to 49 for a (7, 7) image.

--- Training CNN Stage 2 (Classification) ---
Model: "sequential_9"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_6 (Conv2D)           (None, 5, 5, 32)          320       
                                                                 
 max_pooling2d_4 (MaxPooling  (None, 2, 2, 32)         0         
 2D)                                                             
                                                                 
 dropout_8 (Dropout)         (None, 2, 2, 32)          0         
                                                                 
 flatten_4 (Flatten)       