In [None]:
# Importing necessary packages
import pandas as pd
import numpy as np
import sklearn
from sklearn import linear_model
import matplotlib.pyplot as plt

from sklearn.metrics import mean_squared_error
from sklearn.metrics import accuracy_score,f1_score
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import make_pipeline

from sklearn.neural_network import MLPClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.svm import LinearSVC,SVC
from sklearn.neighbors import KNeighborsClassifier

In [None]:
import pandas as pd

# Load datasets from Colab's default upload directory
detection_train = pd.read_csv('/content/Dataset.csv').dropna(axis=1)
class_train = pd.read_csv('/content/Dataset_2.csv').dropna(axis=1)

features=['Va','Vb','Vc','Ia','Ib','Ic']
class_target = ['G','C','B','A']

In [None]:
  #Defining the inputs and outputs
detection_data_X = detection_train[features]
class_data_X = class_train[features]
detection_data_Y = detection_train['l']
class_data_Y = class_train[class_target]

In [None]:
#Defining accuracy and error vectors
detect_accuracy = list()
detect_error = list()
class_accuracy = list()
class_error = list()

In [None]:
#Splitting the data
class_train_X,class_test_X,class_train_Y,class_test_Y= train_test_split(class_data_X,class_data_Y,test_size=0.33,random_state=1)
detection_train_X,detection_test_X,detection_train_Y,detection_test_Y = train_test_split(detection_data_X,detection_data_Y,test_size=0.33,random_state=1)

**RBN**

In [None]:
import numpy as np
import pandas as pd
from sklearn.neural_network import BernoulliRBM
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.metrics import accuracy_score, mean_squared_error
from sklearn.model_selection import train_test_split

# ===================== ✅ CHECK DATA SHAPES ✅ ===================== #

print(f"detection_data_X shape: {detection_data_X.shape}")
print(f"detection_data_Y shape: {detection_data_Y.shape}")

print(f"class_data_X shape: {class_data_X.shape}")
print(f"class_data_Y shape: {class_data_Y.shape}")

# ✅ Ensure Features (X) and Labels (Y) have the SAME number of samples
assert detection_data_X.shape[0] == detection_data_Y.shape[0], "❌ Mismatch in Detection Dataset!"
assert class_data_X.shape[0] == class_data_Y.shape[0], "❌ Mismatch in Classification Dataset!"

# ✅ Convert Multi-Column Target (Y) to 1D Array
if isinstance(detection_data_Y, pd.DataFrame) and detection_data_Y.shape[1] > 1:
    detection_data_Y = detection_data_Y.values.argmax(axis=1)  # Convert One-Hot Encoding to Single Column

if isinstance(class_data_Y, pd.DataFrame) and class_data_Y.shape[1] > 1:
    class_data_Y = class_data_Y.values.argmax(axis=1)

# ✅ Train-Test Split (Corrected)
detection_train_X, detection_test_X, detection_train_Y, detection_test_Y = train_test_split(
    detection_data_X, detection_data_Y, test_size=0.2, random_state=42, stratify=detection_data_Y)

class_train_X, class_test_X, class_train_Y, class_test_Y = train_test_split(
    class_data_X, class_data_Y, test_size=0.2, random_state=42, stratify=class_data_Y)

# ✅ Convert Y to 1D array
detection_train_Y = detection_train_Y.ravel()
detection_test_Y = detection_test_Y.ravel()
class_train_Y = class_train_Y.ravel()
class_test_Y = class_test_Y.ravel()
# ===================== RBM + LOGISTIC REGRESSION FOR DETECTION ===================== #

scaler = StandardScaler()
pca_detection = PCA(n_components=min(5, detection_train_X.shape[1]))  # Prevent PCA errors

rbm_detection = BernoulliRBM(n_components=128, learning_rate=0.05, n_iter=200, random_state=42)
logistic_detection = LogisticRegression(max_iter=5000, solver='saga', random_state=42)

rbm_detection_pipeline = Pipeline([
    ('scaler', scaler),
    ('pca', pca_detection),
    ('rbm', rbm_detection),
    ('logistic', logistic_detection)
])

rbm_detection_pipeline.fit(detection_train_X, detection_train_Y)
detection_preds = rbm_detection_pipeline.predict(detection_test_X)

detection_accuracy = accuracy_score(detection_test_Y, detection_preds)
detection_mse = mean_squared_error(detection_test_Y, detection_preds)

print(f" Detection Accuracy: {detection_accuracy:.4f} | MSE: {detection_mse:.4f}")

# =====================  RBM + LOGISTIC REGRESSION FOR CLASSIFICATION ===================== #

pca_classification = PCA(n_components=min(10, class_train_X.shape[1]))  # Prevent PCA errors

rbm_class = BernoulliRBM(n_components=256, learning_rate=0.05, n_iter=200, random_state=42)
logistic_class = LogisticRegression(max_iter=5000, solver='saga', random_state=42)

rbm_class_pipeline = Pipeline([
    ('scaler', scaler),
    ('pca', pca_classification),
    ('rbm', rbm_class),
    ('logistic', logistic_class)
])

rbm_class_pipeline.fit(class_train_X, class_train_Y)
class_preds = rbm_class_pipeline.predict(class_test_X)

classification_accuracy = accuracy_score(class_test_Y, class_preds)
classification_mse = mean_squared_error(class_test_Y, class_preds)

print(f"Classification Accuracy: {classification_accuracy:.4f} | MSE: {classification_mse:.4f}")


**DBN**

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, BatchNormalization, Dropout
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report, mean_squared_error
from sklearn.model_selection import train_test_split

# ===================== DATA PREPROCESSING ===================== #

# Standardize Data
scaler = StandardScaler()

# Train-Test Split (if not already split)
detection_train_X, detection_test_X, detection_train_Y, detection_test_Y = train_test_split(
    detection_data_X, detection_data_Y, test_size=0.2, random_state=42)

class_train_X, class_test_X, class_train_Y, class_test_Y = train_test_split(
    class_data_X, class_data_Y, test_size=0.2, random_state=42)

# Standardizing Features
detection_train_X = scaler.fit_transform(detection_train_X)
detection_test_X = scaler.transform(detection_test_X)

class_train_X = scaler.fit_transform(class_train_X)
class_test_X = scaler.transform(class_test_X)

# ===================== DEEP BELIEF NETWORK (DBN) FOR DETECTION ===================== #

# Define Model for Detection
model_detection = Sequential([
    Dense(256, activation='relu', input_shape=(detection_train_X.shape[1],)),
    BatchNormalization(),
    Dropout(0.2),
    Dense(128, activation='relu'),
    BatchNormalization(),
    Dense(64, activation='relu'),
    Dense(1, activation='sigmoid')  # Binary classification
])

# Compile Model
model_detection.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train Model
model_detection.fit(detection_train_X, detection_train_Y, epochs=70, batch_size=32, verbose=0)

# Predictions
detection_preds = (model_detection.predict(detection_test_X) > 0.5).astype(int)

# Metrics
detection_mse = mean_squared_error(detection_test_Y, detection_preds)
detection_accuracy = accuracy_score(detection_test_Y, detection_preds)

# Print Detection Model Metrics
print("\n🔹 Optimized Detection Model Performance")
print("---------------------------------------------------")
print(f"Detection Model Mean Squared Error: {detection_mse:.4f}")
print(f"DBN Detection Model Accuracy: {detection_accuracy:.4f}")
print("---------------------------------------------------")

# ===================== DEEP BELIEF NETWORK (DBN) FOR CLASSIFICATION ===================== #

# Define Model for Classification (Multi-Class)
num_classes = len(np.unique(class_train_Y))

model_class = Sequential([
    Dense(512, activation='relu', input_shape=(class_train_X.shape[1],)),
    BatchNormalization(),
    Dropout(0.2),
    Dense(256, activation='relu'),
    BatchNormalization(),
    Dropout(0.2),
    Dense(128, activation='relu'),
    Dense(num_classes, activation='softmax')  # Multi-class classification
])

# Compile Model
model_class.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Train Model
model_class.fit(class_train_X, class_train_Y, epochs=80, batch_size=32, verbose=0)

# Predictions
class_preds = np.argmax(model_class.predict(class_test_X), axis=1)

# Metrics
classification_mse = mean_squared_error(class_test_Y, class_preds)
classification_accuracy = accuracy_score(class_test_Y, class_preds)

# Print Classification Model Metrics
print("\n🔹 Optimized Classification Model Performance")
print("---------------------------------------------------")
print(f"Classification Model Mean Squared Error: {classification_mse:.4f}")
print(f"DBN Classification Model Accuracy: {classification_accuracy:.4f}")
print("---------------------------------------------------")

# Precision, Recall, F1-Score, and Support Count
print("\nClassification Report")
print(classification_report(class_test_Y, class_preds))


**CNN**

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv1D, BatchNormalization, Add, GlobalAveragePooling1D, Dense, Dropout, Activation
from tensorflow.keras.optimizers import AdamW
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import random
from sklearn.metrics import accuracy_score, classification_report, mean_squared_error
# ===================== DATA PREPROCESSING ===================== #
features = ['Va', 'Vb', 'Vc', 'Ia', 'Ib', 'Ic']
class_target = ['G', 'C', 'B', 'A']

# Splitting Data
detection_data_X = detection_train[features]
class_data_X = class_train[features]
detection_data_Y = detection_train['Output(S)']
class_data_Y = class_train[class_target]

# Convert One-Hot Encoding to Integer Labels
class_data_Y = np.argmax(class_data_Y.values, axis=1)

# Train-Test Split
class_train_X, class_test_X, class_train_Y, class_test_Y = train_test_split(
    class_data_X, class_data_Y, test_size=0.1, random_state=00)

detection_train_X, detection_test_X, detection_train_Y, detection_test_Y = train_test_split(
    detection_data_X, detection_data_Y, test_size=0.9, random_state=00)

# Standardizing Data
scaler = StandardScaler()
class_train_X = scaler.fit_transform(class_train_X)
class_test_X = scaler.transform(class_test_X)
detection_train_X = scaler.fit_transform(detection_train_X)
detection_test_X = scaler.transform(detection_test_X)

# Reshape for Conv1D
class_train_X = np.expand_dims(class_train_X, axis=-1)
class_test_X = np.expand_dims(class_test_X, axis=-1)
detection_train_X = np.expand_dims(detection_train_X, axis=-1)
detection_test_X = np.expand_dims(detection_test_X, axis=-1)

# ===================== DATA AUGMENTATION FUNCTION ===================== #
def augment_data(X, Y):
    augmented_X, augmented_Y = [], []
    for i in range(len(X)):
        sample = X[i]
        # Apply random noise
        noise = np.random.normal(0, 0.02, sample.shape)
        augmented_X.append(sample + noise)
        augmented_Y.append(Y[i])
    return np.array(augmented_X), np.array(augmented_Y)

# Augment Training Data
aug_X, aug_Y = augment_data(class_train_X, class_train_Y)
class_train_X = np.concatenate((class_train_X, aug_X), axis=0)
class_train_Y = np.concatenate((class_train_Y, aug_Y), axis=0)

# ===================== RESIDUAL CNN MODEL FOR CLASSIFICATION ===================== #
def build_classification_model():
    inputs = Input(shape=(6, 1))

    # Initial Conv Block
    x = Conv1D(128, kernel_size=3, padding='same')(inputs)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)

    # Residual Block 1
    res = Conv1D(128, kernel_size=3, padding='same')(x)
    res = BatchNormalization()(res)
    res = Activation('relu')(res)
    res = Conv1D(128, kernel_size=3, padding='same')(res)
    res = BatchNormalization()(res)

    x = Add()([x, res])  # Skip Connection
    x = Activation('relu')(x)

    # Residual Block 2
    res = Conv1D(256, kernel_size=3, padding='same')(x)
    res = BatchNormalization()(res)
    res = Activation('relu')(res)
    res = Conv1D(256, kernel_size=3, padding='same')(res)
    res = BatchNormalization()(res)

    # Projection Layer to Match Shapes
    x = Conv1D(256, kernel_size=1, padding="same")(x)  # 1x1 Conv to align shape
    x = Add()([x, res])  # Skip Connection
    x = Activation('relu')(x)

    x = GlobalAveragePooling1D()(x)
    x = Dense(512, activation='relu')(x)
    x = Dropout(0.4)(x)  # Prevent overfitting
    x = Dense(256, activation='relu')(x)
    outputs = Dense(len(class_target), activation='softmax')(x)  # Multi-Class Output

    model = Model(inputs, outputs)
    return model

# Build & Compile Model
model_classification = build_classification_model()
model_classification.compile(optimizer=AdamW(learning_rate=0.001), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Learning Rate & Early Stopping
lr_scheduler = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, verbose=1)
early_stopping = EarlyStopping(monitor='val_accuracy', patience=5, restore_best_weights=True, verbose=1)

# Train Model
model_classification.fit(class_train_X, class_train_Y, epochs=15, batch_size=64, validation_data=(class_test_X, class_test_Y), callbacks=[lr_scheduler, early_stopping], verbose=1)

# Predictions
class_preds = np.argmax(model_classification.predict(class_test_X), axis=1)

# Accuracy
classification_accuracy = accuracy_score(class_test_Y, class_preds)
print(f"\n🔹 CNN Classification Model Accuracy: {classification_accuracy:.4f}")
print("\nClassification Report")
print(classification_report(class_test_Y, class_preds))


In [None]:
!pip install pytorch-tabnet


In [None]:
import numpy as np
import pandas as pd
import xgboost as xgb
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import accuracy_score
from imblearn.over_sampling import SMOTE
import shap

# ===================== DATA PREPROCESSING ===================== #
features = ['Va', 'Vb', 'Vc', 'Ia', 'Ib', 'Ic']
class_target = ['G', 'C', 'B', 'A']

# Splitting Data
detection_data_X = detection_train[features]
class_data_X = class_train[features]
detection_data_Y = detection_train['Output(S)']
class_data_Y = class_train[class_target]

# Convert One-Hot Encoding to Integer Labels
encoder = LabelEncoder()
class_data_Y = encoder.fit_transform(np.argmax(class_data_Y.values, axis=1))

# Train-Test Split
class_train_X, class_test_X, class_train_Y, class_test_Y = train_test_split(
    class_data_X, class_data_Y, test_size=0.2, random_state=1, stratify=class_data_Y)

detection_train_X, detection_test_X, detection_train_Y, detection_test_Y = train_test_split(
    detection_data_X, detection_data_Y, test_size=0.2, random_state=1, stratify=detection_data_Y)

# Standardizing Data
scaler = StandardScaler()
class_train_X = scaler.fit_transform(class_train_X)
class_test_X = scaler.transform(class_test_X)
detection_train_X = scaler.fit_transform(detection_train_X)
detection_test_X = scaler.transform(detection_test_X)

# ===================== DATA BALANCING WITH SMOTE ===================== #
smote = SMOTE(sampling_strategy='auto', random_state=1)
class_train_X_smote, class_train_Y_smote = smote.fit_resample(class_train_X, class_train_Y)

# ===================== FEATURE SELECTION USING SHAP ===================== #
xgb_temp = xgb.XGBClassifier(n_estimators=10, random_state=42)
xgb_temp.fit(class_train_X_smote, class_train_Y_smote)

explainer = shap.Explainer(xgb_temp)
shap_values = explainer(class_train_X_smote)
shap_importance = np.abs(shap_values.values).mean(axis=0)

# Selecting top features
top_features = np.argsort(shap_importance)[-4:]  # Select best 4 features
class_train_X_smote = class_train_X_smote[:, top_features]
class_test_X = class_test_X[:, top_features]

# ===================== XGBOOST MODEL WITH GRID SEARCH ===================== #
xgb_model = xgb.XGBClassifier(objective='multi:softmax', num_class=len(class_target), random_state=42)

# Hyperparameter tuning
param_grid = {
    'learning_rate': [0.01, 0.05, 0.1],
    'max_depth': [4, 6, 8],
    'n_estimators': [30, 40, 50],
}

grid_search = GridSearchCV(xgb_model, param_grid, cv=3, scoring='accuracy', verbose=1)
grid_search.fit(class_train_X_smote, class_train_Y_smote)

best_xgb = grid_search.best_estimator_

# ===================== FINAL TRAINING WITH OPTIMIZED PARAMETERS ===================== #
best_xgb.fit(class_train_X_smote, class_train_Y_smote)

# Predictions
xgb_preds = best_xgb.predict(class_test_X)

# Accuracy Calculation
xgb_accuracy = accuracy_score(class_test_Y, xgb_preds)
print(f"XGBoost Optimized Accuracy: {xgb_accuracy:.4f}")

# ===================== RESULT ===================== #
if xgb_accuracy >= 0.99:
    print(" Achieved near-perfect accuracy!")
elif xgb_accuracy >= 0.95:
    print("Accuracy is very high! Further tuning might be unnecessary.")
else:
    print("⚠️ Still below 95%, consider more hyperparameter tuning.")



In [None]:
!pip install optuna


In [None]:
pip install catboost

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import StackingClassifier
from sklearn.metrics import accuracy_score
from sklearn.pipeline import Pipeline
from sklearn.decomposition import PCA
from sklearn.impute import SimpleImputer
from imblearn.over_sampling import SMOTE
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from catboost import CatBoostClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier

# ===================== DATA PREPROCESSING ===================== #
features = ['Va', 'Vb', 'Vc', 'Ia', 'Ib', 'Ic']
class_target = ['G', 'C', 'B', 'A']

# Splitting Data
detection_data_X = detection_train[features]
class_data_X = class_train[features]
detection_data_Y = detection_train['Output(S)']
class_data_Y = class_train[class_target]

# Convert One-Hot Encoding to Integer Labels
encoder = LabelEncoder()
class_data_Y = encoder.fit_transform(np.argmax(class_data_Y.values, axis=1))

# Train-Test Split
class_train_X, class_test_X, class_train_Y, class_test_Y = train_test_split(
    class_data_X, class_data_Y, test_size=0.2, random_state=42, stratify=class_data_Y)

# Standardizing Data
scaler = StandardScaler()
class_train_X = scaler.fit_transform(class_train_X)
class_test_X = scaler.transform(class_test_X)

# ===================== DATA BALANCING WITH SMOTE ===================== #
smote = SMOTE(sampling_strategy='auto', random_state=42)
class_train_X_smote, class_train_Y_smote = smote.fit_resample(class_train_X, class_train_Y)

# ===================== FEATURE ENGINEERING ===================== #
pca = PCA(n_components=4)  # Reduce dimensionality while retaining information
class_train_X_smote = pca.fit_transform(class_train_X_smote)
class_test_X = pca.transform(class_test_X)

# ===================== BASE MODELS ===================== #
base_models = [
    ('xgb', XGBClassifier(n_estimators=100, learning_rate=0.05, max_depth=6, use_label_encoder=False, eval_metric='mlogloss')),
    ('lgbm', LGBMClassifier(n_estimators=100, learning_rate=0.05, max_depth=6)),
    ('rf', RandomForestClassifier(n_estimators=200, max_depth=10, random_state=42)),
    ('cat', CatBoostClassifier(iterations=200, depth=6, learning_rate=0.05, verbose=0))
]

# ===================== STACKING CLASSIFIER ===================== #
stacking_model = StackingClassifier(
    estimators=base_models,
    final_estimator=XGBClassifier(n_estimators=50, learning_rate=0.05, max_depth=6, use_label_encoder=False, eval_metric='mlogloss'),
    passthrough=True  # Allow base models' predictions as features
)

# Train Stacking Model
stacking_model.fit(class_train_X_smote, class_train_Y_smote)

# Predictions
stacking_preds = stacking_model.predict(class_test_X)

# Accuracy Score
stacking_accuracy = accuracy_score(class_test_Y, stacking_preds)
print(f"FINAL STACKING CLASSIFICATION ACCURACY: {stacking_accuracy:.4f}")

# ===================== CHECK ACCURACY ===================== #
if stacking_accuracy >= 0.95:
    print("Achieved 95%+ Accuracy! 🎉")
else:
    print("⚠️ Still below 95%, consider further fine-tuning.")


EX **AI**

In [None]:
!pip install interpret shap


In [None]:
import numpy as np
import pandas as pd
import xgboost as xgb
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import accuracy_score
from imblearn.over_sampling import SMOTE
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization

# ===================== DATA PREPROCESSING ===================== #
features = ['Va', 'Vb', 'Vc', 'Ia', 'Ib', 'Ic']
class_target = ['G', 'C', 'B', 'A']

# Splitting Data
detection_data_X = detection_train[features]
class_data_X = class_train[features]
detection_data_Y = detection_train['Output(S)']
class_data_Y = class_train[class_target]

# Convert One-Hot Encoding to Integer Labels
encoder = LabelEncoder()
class_data_Y = encoder.fit_transform(np.argmax(class_data_Y.values, axis=1))

# Train-Test Split
class_train_X, class_test_X, class_train_Y, class_test_Y = train_test_split(
    class_data_X, class_data_Y, test_size=0.2, random_state=42, stratify=class_data_Y)

# Standardizing Data
scaler = StandardScaler()
class_train_X = scaler.fit_transform(class_train_X)
class_test_X = scaler.transform(class_test_X)

# ===================== DATA BALANCING WITH SMOTE ===================== #
smote = SMOTE(sampling_strategy='auto', random_state=42)
class_train_X_smote, class_train_Y_smote = smote.fit_resample(class_train_X, class_train_Y)

# ===================== XGBOOST MODEL ===================== #
xgb_model = xgb.XGBClassifier(
    n_estimators=180,  # More trees for better learning
    learning_rate=0.03,
    max_depth=10,  # Deeper trees for better classification
    colsample_bytree=0.9,  # Feature selection
    subsample=0.9,  # Sample reduction for better generalization
    objective='multi:softmax',
    num_class=len(class_target),
    random_state=42
)

# Train XGBoost
xgb_model.fit(class_train_X_smote, class_train_Y_smote)

# XGBoost Predictions
xgb_preds = xgb_model.predict(class_test_X)
xgb_accuracy = accuracy_score(class_test_Y, xgb_preds)
print(f"XGBoost Accuracy: {xgb_accuracy:.4f}")

# ===================== MULTI-LAYER PERCEPTRON (MLP) ===================== #
def build_mlp_model():
    model = Sequential()
    model.add(Dense(512, activation='relu', input_shape=(class_train_X.shape[1],)))
    model.add(BatchNormalization())
    model.add(Dropout(0.3))
    model.add(Dense(256, activation='relu'))
    model.add(BatchNormalization())
    model.add(Dropout(0.3))
    model.add(Dense(128, activation='relu'))
    model.add(Dense(len(class_target), activation='softmax'))  # Multi-class output
    return model

# Compile Model
mlp_model = build_mlp_model()
mlp_model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Train Model
mlp_model.fit(class_train_X_smote, class_train_Y_smote, epochs=50, batch_size=32, validation_data=(class_test_X, class_test_Y), verbose=1)

# MLP Predictions
mlp_preds = np.argmax(mlp_model.predict(class_test_X), axis=1)
mlp_accuracy = accuracy_score(class_test_Y, mlp_preds)
print(f"MLP Accuracy: {mlp_accuracy:.4f}")

# ===================== STACKING ENSEMBLE ===================== #
final_preds = (xgb_preds + mlp_preds) // 2  # Majority Voting

# Final Accuracy
final_accuracy = accuracy_score(class_test_Y, final_preds)
print(f"FINAL STACKING CLASSIFICATION ACCURACY: {final_accuracy:.4f}")

# ===================== CHECK ACCURACY ===================== #
if final_accuracy >= 0.95:
    print("✅ Achieved 95%+ Accuracy! 🎉")
else:
    print("Still below 95%, consider more tuning.")


**LSTM**

In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, BatchNormalization, Bidirectional
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import accuracy_score
from imblearn.over_sampling import SMOTE

# ===================== DATA PREPROCESSING ===================== #
features = ['Va', 'Vb', 'Vc', 'Ia', 'Ib', 'Ic']
class_target = ['G', 'C', 'B', 'A']

# Splitting Data
detection_data_X = detection_train[features]
class_data_X = class_train[features]
detection_data_Y = detection_train['Output(S)']
class_data_Y = class_train[class_target]

# Convert One-Hot Encoding to Integer Labels
encoder = LabelEncoder()
class_data_Y = encoder.fit_transform(np.argmax(class_data_Y.values, axis=1))

# Train-Test Split
class_train_X, class_test_X, class_train_Y, class_test_Y = train_test_split(
    class_data_X, class_data_Y, test_size=0.9, random_state=1, stratify=class_data_Y)

# Standardizing Data
scaler = StandardScaler()
class_train_X = scaler.fit_transform(class_train_X)
class_test_X = scaler.transform(class_test_X)

# Reshape Data for LSTM (Samples, Time Steps, Features)
class_train_X = class_train_X.reshape((class_train_X.shape[0], 1, class_train_X.shape[1]))
class_test_X = class_test_X.reshape((class_test_X.shape[0], 1, class_test_X.shape[1]))

# ===================== DATA BALANCING WITH SMOTE ===================== #
smote = SMOTE(sampling_strategy='auto', random_state=42)
class_train_X_smote, class_train_Y_smote = smote.fit_resample(
    class_train_X.reshape(class_train_X.shape[0], -1), class_train_Y
)
class_train_X_smote = class_train_X_smote.reshape((class_train_X_smote.shape[0], 1, class_train_X_smote.shape[1]))

# ===================== LSTM MODEL ===================== #
def build_lstm_model():
    model = Sequential()
    model.add(Bidirectional(LSTM(128, return_sequences=True, activation='relu'), input_shape=(1, class_train_X.shape[2])))
    model.add(Dropout(0.3))
    model.add(BatchNormalization())

    model.add(Bidirectional(LSTM(64, return_sequences=False, activation='relu')))
    model.add(Dropout(0.3))
    model.add(BatchNormalization())

    model.add(Dense(64, activation='relu'))
    model.add(Dropout(0.2))
    model.add(Dense(len(class_target), activation='softmax'))  # Multi-class output

    return model

# Compile Model
lstm_model = build_lstm_model()
lstm_model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
                   loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Train Model
lstm_model.fit(class_train_X_smote, class_train_Y_smote, epochs=100, batch_size=32, validation_data=(class_test_X, class_test_Y), verbose=1)

# Predictions
lstm_preds = np.argmax(lstm_model.predict(class_test_X), axis=1)
lstm_accuracy = accuracy_score(class_test_Y, lstm_preds)
print(f"LSTM Classification Accuracy: {lstm_accuracy:.4f}")

# ===================== CHECK ACCURACY ===================== #
if lstm_accuracy >= 0.95:
    print("Achieved 95%+ Accuracy! 🎉")
else:
    print("⚠️ Still below 95%, consider deeper tuning.")


LSTM- **DWT**

In [None]:
!pip install PyWavelets


In [None]:
import numpy as np
import pandas as pd
import pywt
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, BatchNormalization, Bidirectional
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import accuracy_score
from imblearn.over_sampling import SMOTE

# ===================== DATA PREPROCESSING ===================== #
features = ['Va', 'Vb', 'Vc', 'Ia', 'Ib', 'Ic']
class_target = ['G', 'C', 'B', 'A']

# Load Data
detection_data_X = detection_train[features]
class_data_X = class_train[features]
detection_data_Y = detection_train['Output(S)']
class_data_Y = class_train[class_target]

# Convert One-Hot Encoding to Integer Labels
encoder = LabelEncoder()
class_data_Y = encoder.fit_transform(np.argmax(class_data_Y.values, axis=1))

# Function to Apply DWT on Each Feature Column
def apply_dwt(signal):
    coeffs = pywt.wavedec(signal, 'db4', level=3)  # Using Daubechies wavelet (db4)
    features = np.concatenate([np.ravel(c) for c in coeffs])  # Flatten coefficients
    return features

# Apply DWT Transformation
class_data_X_dwt = np.apply_along_axis(apply_dwt, axis=1, arr=class_data_X)

# Train-Test Split
class_train_X, class_test_X, class_train_Y, class_test_Y = train_test_split(
    class_data_X_dwt, class_data_Y, test_size=0.2, random_state=42, stratify=class_data_Y)

# Standardizing Data
scaler = StandardScaler()
class_train_X = scaler.fit_transform(class_train_X)
class_test_X = scaler.transform(class_test_X)

# Reshape for LSTM (Samples, Time Steps, Features)
class_train_X = class_train_X.reshape((class_train_X.shape[0], 1, class_train_X.shape[1]))
class_test_X = class_test_X.reshape((class_test_X.shape[0], 1, class_test_X.shape[1]))

# Apply SMOTE for Balancing
smote = SMOTE(sampling_strategy='auto', random_state=42)
class_train_X_smote, class_train_Y_smote = smote.fit_resample(
    class_train_X.reshape(class_train_X.shape[0], -1), class_train_Y
)
class_train_X_smote = class_train_X_smote.reshape((class_train_X_smote.shape[0], 1, class_train_X_smote.shape[1]))

# ===================== LSTM MODEL ===================== #
def build_lstm_model():
    model = Sequential()
    model.add(Bidirectional(LSTM(128, return_sequences=True, activation='relu'), input_shape=(1, class_train_X.shape[2])))
    model.add(Dropout(0.3))
    model.add(BatchNormalization())

    model.add(Bidirectional(LSTM(64, return_sequences=False, activation='relu')))
    model.add(Dropout(0.3))
    model.add(BatchNormalization())

    model.add(Dense(64, activation='relu'))
    model.add(Dropout(0.2))
    model.add(Dense(len(class_target), activation='softmax'))  # Multi-class output

    return model

# Compile and Train Model
lstm_model = build_lstm_model()
lstm_model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
                   loss='sparse_categorical_crossentropy', metrics=['accuracy'])

lstm_model.fit(class_train_X_smote, class_train_Y_smote, epochs=100, batch_size=32, validation_data=(class_test_X, class_test_Y), verbose=1)

# Predictions and Accuracy
lstm_preds = np.argmax(lstm_model.predict(class_test_X), axis=1)
lstm_accuracy = accuracy_score(class_test_Y, lstm_preds)
print(f"LSTM + DWT Classification Accuracy: {lstm_accuracy:.4f}")

# ===================== CHECK ACCURACY ===================== #
if lstm_accuracy >= 0.95:
    print("Achieved 95%+ Accuracy! 🎉")
else:
    print("⚠️ Still below 95%, consider deeper tuning.")


**ANN**

In [None]:
# Import Libraries
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv1D, LSTM, Flatten, Dropout, BatchNormalization
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import xgboost as Xgb
import matplotlib.pyplot as plt
import seaborn as sns

# ===================== Load Data ===================== #
# Assuming class_train is your dataset
print("Columns in dataset:", class_train.columns)  # Debugging Step

# Fix KeyError: Ensure column names are correct
if 'Fault_Type' not in class_train.columns:
    print("Error: Column 'Fault_Type' not found! Check column names.")
    exit()

features = ['Va', 'Vb', 'Vc', 'Ia', 'Ib', 'Ic']
target = 'Fault_Type'

# Split Features & Labels
X = class_train[features]
y = class_train[target]

# Encode Labels (Convert to Numeric)
encoder = LabelEncoder()
y = encoder.fit_transform(y)

# Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Standardize Data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# ===================== XGBoost for Detection ===================== #
xgb = Xgb.XGBClassifier(n_estimators=50, learning_rate=0.05, max_depth=6, objective='multi:softmax', num_class=len(set(y)))
xgb.fit(X_train, y_train)

y_pred_xgb = xgb.predict(X_test)
xgb_accuracy = accuracy_score(y_test, y_pred_xgb)
print(f"XGBoost Accuracy: {xgb_accuracy:.4f}")

# ===================== CNN-LSTM Model for Classification ===================== #
X_train_lstm = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
X_test_lstm = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)

model = Sequential([
    Conv1D(filters=64, kernel_size=2, activation='relu', input_shape=(X_train_lstm.shape[1], 1)),
    BatchNormalization(),
    LSTM(64, return_sequences=True),
    Dropout(0.3),
    LSTM(32),
    Dense(16, activation='relu'),
    Dense(len(set(y)), activation='softmax')
])

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
history = model.fit(X_train_lstm, y_train, epochs=50, batch_size=32, validation_data=(X_test_lstm, y_test), verbose=1)

# Evaluate Model
lstm_preds = np.argmax(model.predict(X_test_lstm), axis=1)
lstm_accuracy = accuracy_score(y_test, lstm_preds)
print(f" CNN-LSTM Accuracy: {lstm_accuracy:.4f}")

# ===================== Confusion Matrix ===================== #
plt.figure(figsize=(8, 6))
sns.heatmap(confusion_matrix(y_test, lstm_preds), annot=True, fmt="d", cmap="Blues")
plt.title("Confusion Matrix - CNN-LSTM")
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.show()

# ===================== Model Selection ===================== #
best_model, best_accuracy = max([('XGBoost', xgb_accuracy), ('CNN-LSTM', lstm_accuracy)], key=lambda x: x[1])
print(f"Best Model: {best_model} with Accuracy: {best_accuracy:.4f}")


In [None]:
# Import necessary libraries
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from imblearn.over_sampling import SMOTE

# ===================== DATA PREPROCESSING ===================== #

# Define input features and class labels
features = ['Va', 'Vb', 'Vc', 'Ia', 'Ib', 'Ic']
class_target = ['G', 'C', 'B', 'A']

# Load your dataset (Assuming detection_train and class_train are already loaded)
detection_data_X = detection_train[features]
class_data_X = class_train[features]
detection_data_Y = detection_train['Output(S)']
class_data_Y = class_train[class_target]

# Convert class labels to numerical format
encoder = LabelEncoder()
class_data_Y = encoder.fit_transform(np.argmax(class_data_Y.values, axis=1))

# Split data into train and test sets
class_train_X, class_test_X, class_train_Y, class_test_Y = train_test_split(
    class_data_X, class_data_Y, test_size=0.2, random_state=1, stratify=class_data_Y)

detection_train_X, detection_test_X, detection_train_Y, detection_test_Y = train_test_split(
    detection_data_X, detection_data_Y, test_size=0.2, random_state=1, stratify=detection_data_Y)

# Standardizing Data
scaler = StandardScaler()
class_train_X = scaler.fit_transform(class_train_X)
class_test_X = scaler.transform(class_test_X)
detection_train_X = scaler.fit_transform(detection_train_X)
detection_test_X = scaler.transform(detection_test_X)

# ===================== DATA BALANCING WITH SMOTE ===================== #
smote = SMOTE(sampling_strategy='auto', random_state=1)
class_train_X_smote, class_train_Y_smote = smote.fit_resample(class_train_X, class_train_Y)

# ===================== BUILDING ANN MODEL ===================== #
def build_ann_model():
    model = Sequential([
        Dense(512, activation='relu', input_shape=(class_train_X.shape[1],)),
        BatchNormalization(),
        Dropout(0.3),

        Dense(256, activation='relu'),
        BatchNormalization(),
        Dropout(0.3),

        Dense(128, activation='relu'),
        Dense(len(class_target), activation='softmax')  # Multi-Class Output
    ])

    model.compile(optimizer=Adam(learning_rate=0.001),
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])
    return model

# Instantiate and train the ANN model
model_ann = build_ann_model()
history = model_ann.fit(class_train_X_smote, class_train_Y_smote,
                        epochs=100, batch_size=32,
                        validation_data=(class_test_X, class_test_Y), verbose=1)

# ===================== MODEL EVALUATION ===================== #
# Make predictions
ann_preds = np.argmax(model_ann.predict(class_test_X), axis=1)
ann_accuracy = accuracy_score(class_test_Y, ann_preds)

# Print final accuracy
print(f" ANN Classification Accuracy: {ann_accuracy:.4f}")



In [None]:
#Defining different Models for different classification problems
detection_model = linear_model.Lasso(alpha = 2.0)
class_model = LinearRegression()

In [None]:
#Fitting the data in different models
detection_model.fit(detection_train_X,detection_train_Y)
class_Y = np.array([class_train_Y['G']*1+class_train_Y['A']*2+class_train_Y['B']*3+class_train_Y['C']*5])
class_Y= class_Y.transpose().ravel()
class_model.fit(class_train_X,class_Y)

In [None]:
#Predicting test values and printing out Mean Squared Error
detection_preds = detection_model.predict(detection_test_X)
print('The Error of our Detection Model is: ',mean_squared_error(detection_test_Y,detection_preds))

class_Y = np.array([class_test_Y['G']*1+class_test_Y['A']*2+class_test_Y['B']*3+class_test_Y['C']*4])
class_Y= class_Y.transpose().ravel()
class_preds = class_model.predict(class_test_X)
print('The Error of our Classification Model is: ',mean_squared_error(class_Y,class_preds))

#storing error values
detect_error.append(mean_squared_error(detection_test_Y,detection_preds))
class_error.append(mean_squared_error(class_Y,class_preds))

In [None]:
# Printing out accuracy scores of our models
print('The accuracy score of our Detection Model is: ',(detection_model.score(detection_test_X,detection_test_Y)))
print('The accuracy score of our Classification Model is: ',(class_model.score(class_test_X,class_Y)))

#Storing accuracy values
detect_accuracy.append((detection_model.score(detection_test_X,detection_test_Y)))
class_accuracy.append((class_model.score(class_test_X,class_Y)))

**Multi layer Perceptron**

In [None]:
#Defining different Models for different classification problems

detection_model = MLPClassifier(solver='lbfgs', alpha=1e-5,
                    hidden_layer_sizes=(5, 2), random_state=1,max_iter = 1000)
class_model = MLPClassifier(solver='lbfgs', alpha=1e-5,
                    hidden_layer_sizes=(10, 6), random_state=1,max_iter = 5000)

#Fitting the data in different models

detection_model.fit(detection_train_X,detection_train_Y)
class_model.fit(class_train_X,class_train_Y)

#Predicting test values and printing out Mean Squared Error

detection_preds = detection_model.predict(detection_test_X)
print('The Error of our Detection Model is: ',mean_squared_error(detection_test_Y,detection_preds))

class_preds = class_model.predict(class_test_X)
print('The Error of our Classification Model is: ',mean_squared_error(class_test_Y,class_preds))

#storing error values

detect_error.append(mean_squared_error(detection_test_Y,detection_preds))
class_error.append(mean_squared_error(class_test_Y,class_preds))

# Printing out accuracy scores of our models

print('\nThe accuracy score of our Detection Model is: ',(detection_model.score(detection_test_X,detection_test_Y)))
print('The accuracy score of our Classification Model is: ',(class_model.score(class_test_X,class_test_Y)))

#Storing accuracy values
detect_accuracy.append((detection_model.score(detection_test_X,detection_test_Y)))
class_accuracy.append((class_model.score(class_test_X,class_test_Y)))

****Multi layer Perceptron - Hyperparameter Tuning****

In [None]:
import numpy as np
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import RandomizedSearchCV
from sklearn.metrics import accuracy_score, classification_report, mean_squared_error

# 🟢 Step 1: Define Hyperparameter Grid
mlp_param_grid = {
    'hidden_layer_sizes': [(5, 2), (10, 5), (50, 25), (100, 50)],
    'activation': ['relu', 'tanh', 'logistic'],
    'solver': ['adam', 'lbfgs'],
    'alpha': [1e-5, 1e-4, 1e-3, 1e-2],
    'learning_rate': ['constant', 'adaptive']
}

# 🟢 Step 2: Hyperparameter Tuning for Detection Model
random_search_detection = RandomizedSearchCV(MLPClassifier(max_iter=5000, random_state=42),
                                             param_distributions=mlp_param_grid,
                                             n_iter=10, cv=3, verbose=1, n_jobs=-1, random_state=42)

random_search_detection.fit(detection_train_X, detection_train_Y)
best_mlp_detection_model = random_search_detection.best_estimator_

# 🟢 Step 3: Hyperparameter Tuning for Classification Model
random_search_classification = RandomizedSearchCV(MLPClassifier(max_iter=5000, random_state=42),
                                                  param_distributions=mlp_param_grid,
                                                  n_iter=10, cv=3, verbose=1, n_jobs=-1, random_state=42)

random_search_classification.fit(class_train_X, class_train_Y)
best_mlp_class_model = random_search_classification.best_estimator_

# 🟢 Step 4: Model Predictions
detection_preds = best_mlp_detection_model.predict(detection_test_X)
class_preds = best_mlp_class_model.predict(class_test_X)

# 🟢 Step 5: Mean Squared Error
detection_mse = mean_squared_error(detection_test_Y, detection_preds)
classification_mse = mean_squared_error(class_test_Y, class_preds)

# 🟢 Step 6: Accuracy Scores
detection_accuracy = accuracy_score(detection_test_Y, detection_preds)
classification_accuracy = accuracy_score(class_test_Y, class_preds)

# 🟢 Step 7: Print Accuracy & MSE Results
print("\n Optimized MLP Classifier Model Performance ")
print("---------------------------------------------------")
print(f"Detection Model Mean Squared Error: {detection_mse:.4f}")
print(f"Classification Model Mean Squared Error: {classification_mse:.4f}")
print(f"MLP Detection Model Accuracy: {detection_accuracy:.4f}")
print(f"MLP Classification Model Accuracy: {classification_accuracy:.4f}")
print("---------------------------------------------------")

# 🟢 Step 8: Precision, Recall, F1-Score, and Support Count
print("\n📊 Classification Report for Detection Model 📊")
print(classification_report(detection_test_Y, detection_preds))

print("\n📊 Classification Report for Classification Model 📊")
print(classification_report(class_test_Y, class_preds))

# 🟢 Step 9: Storing Accuracy & Error Values
detect_error.append(detection_mse)
class_error.append(classification_mse)
detect_accuracy.append(detection_accuracy)
class_accuracy.append(classification_accuracy)


**KNN (K-Nearest Neighbors)**

In [None]:
#Defining different Models for different classification problems
detection_model = KNeighborsClassifier(n_neighbors=2)
class_model = KNeighborsClassifier(n_neighbors=6)

#Fitting the data in different models
detection_model.fit(detection_train_X,detection_train_Y)
class_Y = np.array([class_train_Y['G']*1+class_train_Y['A']
                    *2+class_train_Y['B']*3+class_train_Y['C']*5])
class_Y= class_Y.transpose().ravel()
class_model.fit(class_train_X,class_Y)

#Predicting test values and printing out Mean Squared Error
detection_preds = detection_model.predict(detection_test_X)
print('The Error of our Detection Model is: ',mean_squared_error(detection_test_Y,detection_preds))

class_Y = np.array([class_test_Y['G']*1+class_test_Y['A']*2+class_test_Y['B']*3+class_test_Y['C']*5])
class_Y = class_Y.transpose().ravel()
class_preds = class_model.predict(class_test_X)
print('The Error of our Classification Model is: ',mean_squared_error(class_Y,class_preds))

#storing error values
detect_error.append(mean_squared_error(detection_test_Y,detection_preds))
class_error.append(mean_squared_error(class_Y,class_preds))

# Printing out accuracy scores of our models
print('\nThe accuracy score of our Detection Model is: ',(detection_model.score(detection_test_X,detection_test_Y)))
print('The accuracy score of our Classification Model is: ',(class_model.score(class_test_X,class_Y)))

#Storing accuracy values
detect_accuracy.append((detection_model.score(detection_test_X,detection_test_Y)))
class_accuracy.append((class_model.score(class_test_X,class_Y)))

**KNN - Hyperparameter Tuning**

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score, classification_report, mean_squared_error, confusion_matrix, ConfusionMatrixDisplay

# ===================== DETECTION MODEL (KNN) ===================== #

# Define Pipeline (Feature Scaling + KNN)
knn_detection_pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('knn', KNeighborsClassifier())
])

# Define Hyperparameter Grid for Detection Model
knn_detection_param_grid = {
    'knn__n_neighbors': [3, 5, 7, 9, 11],
    'knn__weights': ['uniform', 'distance'],
    'knn__metric': ['euclidean', 'manhattan', 'minkowski']
}

# Perform Grid Search for Best Hyperparameters
grid_search_detection = GridSearchCV(knn_detection_pipeline, param_grid=knn_detection_param_grid,
                                     cv=5, scoring='accuracy', n_jobs=-1, verbose=1)
grid_search_detection.fit(detection_train_X, detection_train_Y)

# Best Detection Model
best_knn_detection_model = grid_search_detection.best_estimator_

# Predictions
detection_preds = best_knn_detection_model.predict(detection_test_X)

# Mean Squared Error
detection_mse = mean_squared_error(detection_test_Y, detection_preds)

# Accuracy
detection_accuracy = accuracy_score(detection_test_Y, detection_preds)

# Print Detection Model Metrics
print("\n🔹 Optimized KNN Detection Model Performance")
print("---------------------------------------------------")
print(f"Detection Model Mean Squared Error: {detection_mse:.4f}")
print(f"Optimized KNN Detection Model Accuracy: {detection_accuracy:.4f}")
print("---------------------------------------------------")

# Precision, Recall, F1-Score, and Support Count
print("\nClassification Report")
print(classification_report(detection_test_Y, detection_preds))

# Store Detection Accuracy & Error
detect_error.append(detection_mse)
detect_accuracy.append(detection_accuracy)

# ===================== DETECTION MODEL KNN CONFUSION MATRIX ===================== #
plt.figure(figsize=(6, 5))
cm = confusion_matrix(detection_test_Y, detection_preds)
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=["No Fault (0)", "Fault (1)"], yticklabels=["No Fault (0)", "Fault (1)"])
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.title("Detection Model KNN Confusion Matrix")
plt.show()

# ===================== CLASSIFICATION MODEL (KNN) ===================== #

# Define Pipeline (Feature Scaling + KNN)
knn_class_pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('knn', KNeighborsClassifier())
])

# Define Hyperparameter Grid for Classification Model
knn_class_param_grid = {
    'knn__n_neighbors': [3, 5, 7, 9, 11],
    'knn__weights': ['uniform', 'distance'],
    'knn__metric': ['euclidean', 'manhattan', 'minkowski']
}

# Perform Grid Search for Best Hyperparameters
grid_search_class = GridSearchCV(knn_class_pipeline, param_grid=knn_class_param_grid,
                                 cv=5, scoring='accuracy', n_jobs=-1, verbose=1)
grid_search_class.fit(class_train_X, class_train_Y)

# Best Classification Model
best_knn_class_model = grid_search_class.best_estimator_

# Predictions
class_preds = best_knn_class_model.predict(class_test_X)

# Mean Squared Error
classification_mse = mean_squared_error(class_test_Y, class_preds)

# Accuracy
classification_accuracy = accuracy_score(class_test_Y, class_preds)

# Print Classification Model Metrics
print("\n🔹 Optimized KNN Classification Model Performance")
print("---------------------------------------------------")
print(f"Classification Model Mean Squared Error: {classification_mse:.4f}")
print(f"Optimized KNN Classification Model Accuracy: {classification_accuracy:.4f}")
print("---------------------------------------------------")

# Precision, Recall, F1-Score, and Support Count
print("\nClassification Report")
print(classification_report(class_test_Y, class_preds))

# Store Classification Accuracy & Error
class_error.append(classification_mse)
class_accuracy.append(classification_accuracy)

# ===================== CLASSIFICATION MODEL KNN CONFUSION MATRIX ===================== #
plt.figure(figsize=(6, 5))
cm = confusion_matrix(class_test_Y, class_preds)
sns.heatmap(cm, annot=True, fmt='d', cmap='Oranges')
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.title("Confusion Matrix ---- KNN Classification Model")
plt.show()


**Decision tree classifier**

In [None]:
#Defining different Models for different classification problems
detection_model = DecisionTreeClassifier()
class_model = DecisionTreeClassifier()

#Fitting the data in different models
detection_model.fit(detection_train_X,detection_train_Y)
class_Y = np.array([class_train_Y['G']*1+class_train_Y['A']
                    *2+class_train_Y['B']*3+class_train_Y['C']*5])
class_Y= class_Y.transpose().ravel()
class_model.fit(class_train_X,class_Y)

#Predicting test values and printing out Mean Squared Error
detection_preds = detection_model.predict(detection_test_X)
print('The Error of our Detection Model is: ',mean_squared_error(detection_test_Y,detection_preds))

class_Y = np.array([class_test_Y['G']*1+class_test_Y['A']*2+class_test_Y['B']*3+class_test_Y['C']*5])
class_Y = class_Y.transpose().ravel()
class_preds = class_model.predict(class_test_X)
print('The Error of our Classification Model is: ',mean_squared_error(class_Y,class_preds))

#storing error values
detect_error.append(mean_squared_error(detection_test_Y,detection_preds))
class_error.append(mean_squared_error(class_Y,class_preds))

# Printing out accuracy scores of our models
print('\nThe accuracy score of our Detection Model is: ',(detection_model.score(detection_test_X,detection_test_Y)))
print('The accuracy score of our Classification Model is: ',(class_model.score(class_test_X,class_Y)))

#Storing accuracy values
detect_accuracy.append((detection_model.score(detection_test_X,detection_test_Y)))
class_accuracy.append((class_model.score(class_test_X,class_Y)))

**Decision tree classifier- hyper parameter tuning**

In [None]:
import numpy as np
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import RandomizedSearchCV
from sklearn.metrics import accuracy_score, classification_report, mean_squared_error

#  Step 1: Define Hyperparameter Grid
dt_param_grid = {
    'criterion': ['gini', 'entropy'],
    'splitter': ['best', 'random'],
    'max_depth': list(range(3, 30)),
    'min_samples_split': list(range(2, 10)),
    'min_samples_leaf': list(range(1, 10))
}

#  Step 2: Hyperparameter Tuning for Detection Model
random_search_detection = RandomizedSearchCV(DecisionTreeClassifier(),
                                             param_distributions=dt_param_grid,
                                             n_iter=10, cv=3, verbose=1, n_jobs=-1, random_state=42)

random_search_detection.fit(detection_train_X, detection_train_Y)
best_dt_detection_model = random_search_detection.best_estimator_

#  Step 3: Hyperparameter Tuning for Classification Model
random_search_classification = RandomizedSearchCV(DecisionTreeClassifier(),
                                                  param_distributions=dt_param_grid,
                                                  n_iter=10, cv=3, verbose=1, n_jobs=-1, random_state=42)

random_search_classification.fit(class_train_X, class_train_Y)
best_dt_class_model = random_search_classification.best_estimator_

# Step 4: Model Predictions
detection_preds = best_dt_detection_model.predict(detection_test_X)
class_preds = best_dt_class_model.predict(class_test_X)

#  Step 5: Mean Squared Error
detection_mse = mean_squared_error(detection_test_Y, detection_preds)
classification_mse = mean_squared_error(class_test_Y, class_preds)

#  Step 6: Accuracy Scores
detection_accuracy = accuracy_score(detection_test_Y, detection_preds)
classification_accuracy = accuracy_score(class_test_Y, class_preds)

#  Step 7: Print Accuracy & MSE Results
print("\n🟢 Optimized Decision Tree Classifier Model Performance 🟢")
print("---------------------------------------------------")
print(f"Detection Model Mean Squared Error: {detection_mse:.4f}")
print(f"Classification Model Mean Squared Error: {classification_mse:.4f}")
print(f"Decision Tree Detection Model Accuracy: {detection_accuracy:.4f}")
print(f"Decision Tree Classification Model Accuracy: {classification_accuracy:.4f}")
print("---------------------------------------------------")

#  Step 8: Precision, Recall, F1-Score, and Support Count
print("\n📊 Classification Report for Detection Model 📊")
print(classification_report(detection_test_Y, detection_preds))

print("\n📊 Classification Report for Classification Model 📊")
print(classification_report(class_test_Y, class_preds))

#  Step 9: Storing Accuracy & Error Values
detect_error.append(detection_mse)
class_error.append(classification_mse)
detect_accuracy.append(detection_accuracy)
class_accuracy.append(classification_accuracy)


**SVM (Support Vector Machine)**

In [None]:
#Defining different Models for different classification problems

detection_model = SVC()
class_model = LinearSVC()

#Fitting the data in different models

detection_model.fit(detection_train_X,detection_train_Y)
class_Y = np.array([class_train_Y['G']*1+class_train_Y['A']
                    *2+class_train_Y['B']*3+class_train_Y['C']*5])
class_Y= class_Y.transpose().ravel()
class_model.fit(class_train_X,class_Y)

#Predicting test values and printing out Mean Squared Error

detection_preds = detection_model.predict(detection_test_X)
print('The Error of our Detection Model is: ',mean_squared_error(detection_test_Y,detection_preds))

class_Y = np.array([class_test_Y['G']*1+class_test_Y['A']*2+class_test_Y['B']*3+class_test_Y['C']*5])
class_Y = class_Y.transpose().ravel()
class_preds = class_model.predict(class_test_X)
print('The Error of our Classification Model is: ',mean_squared_error(class_Y,class_preds))

#storing error values

detect_error.append(mean_squared_error(detection_test_Y,detection_preds))
class_error.append(mean_squared_error(class_Y,class_preds))

# Printing out accuracy scores of our models
print('\nThe accuracy score of our Detection Model is: ',(detection_model.score(detection_test_X,detection_test_Y)))
print('The accuracy score of our Classification Model is: ',(class_model.score(class_test_X,class_Y)))

#Storing accuracy values
detect_accuracy.append((detection_model.score(detection_test_X,detection_test_Y)))
class_accuracy.append((class_model.score(class_test_X,class_Y)))

**Support vector machine- Hyperparameter Tuning**

In [None]:
import numpy as np
from sklearn.svm import SVC, LinearSVC
from sklearn.model_selection import RandomizedSearchCV
from sklearn.metrics import accuracy_score, classification_report, mean_squared_error

#  Step 1: Define Hyperparameter Grids
svc_param_grid = {
    'C': [0.1, 1, 10, 100],
    'kernel': ['linear', 'rbf', 'poly', 'sigmoid'],
    'gamma': ['scale', 'auto']
}

linear_svc_param_grid = {
    'C': [0.1, 1, 10, 100]
}

#  Step 2: Hyperparameter Tuning for Detection Model (SVC)
random_search_detection = RandomizedSearchCV(SVC(), param_distributions=svc_param_grid,
                                             n_iter=10, cv=3, verbose=1, n_jobs=-1, random_state=42)

random_search_detection.fit(detection_train_X, detection_train_Y)
best_svc_detection_model = random_search_detection.best_estimator_

#  Step 3: Encode Multi-Class Labels for Classification
class_Y_train = (class_train_Y['G']*1 + class_train_Y['A']*2 +
                 class_train_Y['B']*3 + class_train_Y['C']*5).ravel()

class_Y_test = (class_test_Y['G']*1 + class_test_Y['A']*2 +
                class_test_Y['B']*3 + class_test_Y['C']*5).ravel()

#  Step 4: Hyperparameter Tuning for Classification Model (LinearSVC)
random_search_classification = RandomizedSearchCV(LinearSVC(max_iter=5000), param_distributions=linear_svc_param_grid,
                                                  n_iter=10, cv=3, verbose=1, n_jobs=-1, random_state=42)

random_search_classification.fit(class_train_X, class_Y_train)
best_svc_class_model = random_search_classification.best_estimator_

#  Step 5: Model Predictions
detection_preds = best_svc_detection_model.predict(detection_test_X)
class_preds = best_svc_class_model.predict(class_test_X)

#  Step 6: Mean Squared Error
detection_mse = mean_squared_error(detection_test_Y, detection_preds)
classification_mse = mean_squared_error(class_Y_test, class_preds)

#  Step 7: Accuracy Scores
detection_accuracy = accuracy_score(detection_test_Y, detection_preds)
classification_accuracy = accuracy_score(class_Y_test, class_preds)

#  Step 8: Print Accuracy & MSE Results
print("\n🟢 Optimized SVM Model Performance 🟢")
print("---------------------------------------------------")
print(f"Detection Model Mean Squared Error: {detection_mse:.4f}")
print(f"Classification Model Mean Squared Error: {classification_mse:.4f}")
print(f"SVM Detection Model Accuracy: {detection_accuracy:.4f}")
print(f"SVM Classification Model Accuracy: {classification_accuracy:.4f}")
print("---------------------------------------------------")

#  Step 9: Precision, Recall, F1-Score, and Support Count
print("\n📊 Classification Report for Detection Model 📊")
print(classification_report(detection_test_Y, detection_preds))

print("\n📊 Classification Report for Classification Model 📊")
print(classification_report(class_Y_test, class_preds))

#  Step 10: Storing Accuracy & Error Values
detect_error.append(detection_mse)
class_error.append(classification_mse)
detect_accuracy.append(detection_accuracy)
class_accuracy.append(classification_accuracy)


**Updated Code with Ensemble Learning**

**Bagging**

In [None]:
import numpy as np
from sklearn.ensemble import BaggingClassifier, StackingClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, mean_squared_error

# Defining Bagging KNN Models
bagging_detection_model = BaggingClassifier(base_estimator=KNeighborsClassifier(n_neighbors=2),
                                            n_estimators=10, random_state=42)
bagging_class_model = BaggingClassifier(base_estimator=KNeighborsClassifier(n_neighbors=6),
                                        n_estimators=10, random_state=42)

# Fit detection model
bagging_detection_model.fit(detection_train_X, detection_train_Y)

# Encode multi-class labels properly (for training)
class_Y_train = (class_train_Y['G']*1 + class_train_Y['A']*2 +
                 class_train_Y['B']*3 + class_train_Y['C']*5).values.ravel()

# Fit classification model
bagging_class_model.fit(class_train_X, class_Y_train)

# Now, create test labels correctly
class_Y_test = (class_test_Y['G']*1 + class_test_Y['A']*2 +
                class_test_Y['B']*3 + class_test_Y['C']*5).values.ravel()

# Ensure shape consistency before evaluation
print("Class Test X Shape:", class_test_X.shape)
print("Class Y Test Shape:", class_Y_test.shape)  # Should match class_test_X.shape[0]

# Now, compute accuracy with correct test labels
print('Bagging Classification Model Accuracy:', bagging_class_model.score(class_test_X, class_Y_test))

print("Detection Test X Shape:", detection_test_X.shape)
print("Detection Test Y Shape:", detection_test_Y.shape)
print("Class Test X Shape:", class_test_X.shape)
print("Class Y Shape:", class_Y.shape)

# Predictions
detection_preds = bagging_detection_model.predict(detection_test_X)
class_preds = bagging_class_model.predict(class_test_X)

# Printing Accuracy
print('Bagging Detection Model Accuracy:', bagging_detection_model.score(detection_test_X, detection_test_Y))
print('Bagging Classification Model Accuracy:', bagging_class_model.score(class_test_X, class_Y))


**Bagging - Hyperparameter Tuning**

**Gradient Boosting**

In [None]:
import numpy as np
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score

# Defining Gradient Boosting Models
gb_detection_model = GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, max_depth=3, random_state=42)
gb_class_model = GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, max_depth=3, random_state=42)

# Fit detection model
gb_detection_model.fit(detection_train_X, detection_train_Y)

# Encode multi-class labels properly (for training)
class_Y_train = (class_train_Y['G']*1 + class_train_Y['A']*2 +
                 class_train_Y['B']*3 + class_train_Y['C']*5).values.ravel()

# Fit classification model
gb_class_model.fit(class_train_X, class_Y_train)

# Now, create test labels correctly
class_Y_test = (class_test_Y['G']*1 + class_test_Y['A']*2 +
                class_test_Y['B']*3 + class_test_Y['C']*5).values.ravel()

# Ensure shape consistency before evaluation
print("Class Test X Shape:", class_test_X.shape)
print("Class Y Test Shape:", class_Y_test.shape)  # Should match class_test_X.shape[0]

# Compute accuracy with correct test labels
print('Gradient Boosting Classification Model Accuracy:', gb_class_model.score(class_test_X, class_Y_test))

print("Detection Test X Shape:", detection_test_X.shape)
print("Detection Test Y Shape:", detection_test_Y.shape)

# Predictions
detection_preds = gb_detection_model.predict(detection_test_X)
class_preds = gb_class_model.predict(class_test_X)

# Printing Accuracy
print('Gradient Boosting Detection Model Accuracy:', gb_detection_model.score(detection_test_X, detection_test_Y))
print('Gradient Boosting Classification Model Accuracy:', gb_class_model.score(class_test_X, class_Y_test))


**Gradient - hyperParameter Tuning**

In [None]:
import numpy as np
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import RandomizedSearchCV
from sklearn.metrics import accuracy_score

# 🟢 Step 1: Define Hyperparameter Grid
param_grid = {
    'n_estimators': [100, 200, 300],
    'learning_rate': [0.01, 0.05, 0.1],
    'max_depth': [3, 10, 20],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

# 🟢 Step 2: Define Gradient Boosting Classifier
gb_model = GradientBoostingClassifier(random_state=42)

# 🟢 Step 3: Hyperparameter Tuning for Detection Model
random_search_detection = RandomizedSearchCV(gb_model, param_distributions=param_grid,
                                             n_iter=10, cv=3, verbose=1, n_jobs=-1, random_state=42)

random_search_detection.fit(detection_train_X, detection_train_Y)
best_gb_detection_model = random_search_detection.best_estimator_

# 🟢 Step 4: Encode Multi-Class Labels Correctly (for Classification)
class_Y_train = (class_train_Y['G']*1 + class_train_Y['A']*2 +
                 class_train_Y['B']*3 + class_train_Y['C']*5).ravel()

class_Y_test = (class_test_Y['G']*1 + class_test_Y['A']*2 +
                class_test_Y['B']*3 + class_test_Y['C']*5).ravel()

# 🟢 Step 5: Hyperparameter Tuning for Classification Model
random_search_classification = RandomizedSearchCV(gb_model, param_distributions=param_grid,
                                                  n_iter=10, cv=3, verbose=1, n_jobs=-1, random_state=42)

random_search_classification.fit(class_train_X, class_Y_train)
best_gb_class_model = random_search_classification.best_estimator_

# 🟢 Step 6: Model Predictions
detection_preds = best_gb_detection_model.predict(detection_test_X)
class_preds = best_gb_class_model.predict(class_test_X)

# 🟢 Step 7: Accuracy Scores
detection_accuracy = accuracy_score(detection_test_Y, detection_preds)
classification_accuracy = accuracy_score(class_Y_test, class_preds)

# 🟢 Step 8: Print Results
print("Class Test X Shape:", class_test_X.shape)
print("Class Y Test Shape:", class_Y_test.shape)
print("Gradient Boosting Classification Model Accuracy:", classification_accuracy)
print("Detection Test X Shape:", detection_test_X.shape)
print("Detection Test Y Shape:", detection_test_Y.shape)
print("Gradient Boosting Detection Model Accuracy:", detection_accuracy)


In [None]:
import numpy as np
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import RandomizedSearchCV
from sklearn.metrics import accuracy_score, classification_report

# 🟢 Step 1: Define Hyperparameter Grid
param_grid = {
    'n_estimators': [100, 200, 300],
    'learning_rate': [0.01, 0.05, 0.1],
    'max_depth': [3, 10, 20],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

# 🟢 Step 2: Define Gradient Boosting Classifier
gb_model = GradientBoostingClassifier(random_state=42)

# 🟢 Step 3: Hyperparameter Tuning for Detection Model
random_search_detection = RandomizedSearchCV(gb_model, param_distributions=param_grid,
                                             n_iter=10, cv=3, verbose=1, n_jobs=-1, random_state=42)

random_search_detection.fit(detection_train_X, detection_train_Y)
best_gb_detection_model = random_search_detection.best_estimator_

# 🟢 Step 4: Encode Multi-Class Labels Correctly (for Classification)
class_Y_train = (class_train_Y['G']*1 + class_train_Y['A']*2 +
                 class_train_Y['B']*3 + class_train_Y['C']*5).ravel()

class_Y_test = (class_test_Y['G']*1 + class_test_Y['A']*2 +
                class_test_Y['B']*3 + class_test_Y['C']*5).ravel()

# 🟢 Step 5: Hyperparameter Tuning for Classification Model
random_search_classification = RandomizedSearchCV(gb_model, param_distributions=param_grid,
                                                  n_iter=10, cv=3, verbose=1, n_jobs=-1, random_state=42)

random_search_classification.fit(class_train_X, class_Y_train)
best_gb_class_model = random_search_classification.best_estimator_

# 🟢 Step 6: Model Predictions
detection_preds = best_gb_detection_model.predict(detection_test_X)
class_preds = best_gb_class_model.predict(class_test_X)

# 🟢 Step 7: Accuracy Scores
detection_accuracy = accuracy_score(detection_test_Y, detection_preds)
classification_accuracy = accuracy_score(class_Y_test, class_preds)

# 🟢 Step 8: Print Accuracy Results
print("\n🟢 Gradient Boosting Model Performance 🟢")
print("---------------------------------------------------")
print(f"Gradient Boosting Detection Model Accuracy: {detection_accuracy:.4f}")
print(f"Gradient Boosting Classification Model Accuracy: {classification_accuracy:.4f}")
print("---------------------------------------------------")

# 🟢 Step 9: Precision, Recall, F1-Score, and Support Count
print("\n📊 Classification Report for Detection Model 📊")
print(classification_report(detection_test_Y, detection_preds))

print("\n📊 Classification Report for Classification Model 📊")
print(classification_report(class_Y_test, class_preds))


**random forest**

In [None]:
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# Defining Random Forest Models
rf_detection_model = RandomForestClassifier(n_estimators=100, max_depth=5, random_state=42)
rf_class_model = RandomForestClassifier(n_estimators=100, max_depth=5, random_state=42)

# Fit detection model
rf_detection_model.fit(detection_train_X, detection_train_Y)

# Encode multi-class labels properly (for training)
class_Y_train = (class_train_Y['G']*1 + class_train_Y['A']*2 +
                 class_train_Y['B']*3 + class_train_Y['C']*5).values.ravel()

# Fit classification model
rf_class_model.fit(class_train_X, class_Y_train)

# Now, create test labels correctly
class_Y_test = (class_test_Y['G']*1 + class_test_Y['A']*2 +
                class_test_Y['B']*3 + class_test_Y['C']*5).values.ravel()

# Ensure shape consistency before evaluation
print("Class Test X Shape:", class_test_X.shape)
print("Class Y Test Shape:", class_Y_test.shape)  # Should match class_test_X.shape[0]

# Compute accuracy with correct test labels
print('Random Forest Classification Model Accuracy:', rf_class_model.score(class_test_X, class_Y_test))

print("Detection Test X Shape:", detection_test_X.shape)
print("Detection Test Y Shape:", detection_test_Y.shape)

# Predictions
detection_preds = rf_detection_model.predict(detection_test_X)
class_preds = rf_class_model.predict(class_test_X)

# Printing Accuracy
print('Random Forest Detection Model Accuracy:', rf_detection_model.score(detection_test_X, detection_test_Y))
print('Random Forest Classification Model Accuracy:', rf_class_model.score(class_test_X, class_Y_test))


In [None]:
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import RandomizedSearchCV
from sklearn.metrics import accuracy_score, classification_report

# 🟢 Step 1: Define Hyperparameter Grid
param_grid = {
    'n_estimators': [100, 200, 300],
    'max_depth': [5, 10, 20, None],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
    'bootstrap': [True, False]
}

# 🟢 Step 2: Define Random Forest Model
rf_model = RandomForestClassifier(random_state=42)

# 🟢 Step 3: Hyperparameter Tuning for Detection Model
random_search_detection = RandomizedSearchCV(rf_model, param_distributions=param_grid,
                                             n_iter=10, cv=3, verbose=1, n_jobs=-1, random_state=42)

random_search_detection.fit(detection_train_X, detection_train_Y)
best_rf_detection_model = random_search_detection.best_estimator_

# 🟢 Step 4: Encode Multi-Class Labels Correctly (for Classification)
class_Y_train = (class_train_Y['G']*1 + class_train_Y['A']*2 +
                 class_train_Y['B']*3 + class_train_Y['C']*5).ravel()

class_Y_test = (class_test_Y['G']*1 + class_test_Y['A']*2 +
                class_test_Y['B']*3 + class_test_Y['C']*5).ravel()

# 🟢 Step 5: Hyperparameter Tuning for Classification Model
random_search_classification = RandomizedSearchCV(rf_model, param_distributions=param_grid,
                                                  n_iter=10, cv=3, verbose=1, n_jobs=-1, random_state=42)

random_search_classification.fit(class_train_X, class_Y_train)
best_rf_class_model = random_search_classification.best_estimator_

# 🟢 Step 6: Model Predictions
detection_preds = best_rf_detection_model.predict(detection_test_X)
class_preds = best_rf_class_model.predict(class_test_X)

# 🟢 Step 7: Accuracy Scores
detection_accuracy = accuracy_score(detection_test_Y, detection_preds)
classification_accuracy = accuracy_score(class_Y_test, class_preds)

# 🟢 Step 8: Print Accuracy Results
print("\n🟢 Random Forest Model Performance 🟢")
print("---------------------------------------------------")
print(f"Random Forest Detection Model Accuracy: {detection_accuracy:.4f}")
print(f"Random Forest Classification Model Accuracy: {classification_accuracy:.4f}")
print("---------------------------------------------------")

# 🟢 Step 9: Precision, Recall, F1-Score, and Support Count
print("\n📊 Classification Report for Detection Model 📊")
print(classification_report(detection_test_Y, detection_preds))

print("\n📊 Classification Report for Classification Model 📊")
print(classification_report(class_Y_test, class_preds))


**Voting Ensemble Implementation**

In [None]:
import numpy as np
from sklearn.ensemble import VotingClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score

# Feature scaling (Important for KNN & Logistic Regression)
scaler = StandardScaler()

# Optimized base models
rf_model = RandomForestClassifier(n_estimators=400, max_depth=20, min_samples_split=3,
                                  min_samples_leaf=2, class_weight='balanced', random_state=42)

knn_model = Pipeline([
    ('scaler', StandardScaler()),  # Apply feature scaling for KNN
    ('knn', KNeighborsClassifier(n_neighbors=3, weights='distance', metric='euclidean'))
])

lr_model = Pipeline([
    ('scaler', StandardScaler()),  # Scale features for Logistic Regression
    ('lr', LogisticRegression(C=1.2, max_iter=1000, solver='lbfgs', penalty='l2', random_state=42))
])

# Define Voting Classifier with Soft Voting and better weights
voting_detection_model = VotingClassifier(estimators=[
    ('rf', rf_model), ('knn', knn_model), ('lr', lr_model)],
    voting='soft', weights=[5, 3, 1])  # Higher weight for RF

voting_class_model = VotingClassifier(estimators=[
    ('rf', rf_model), ('knn', knn_model), ('lr', lr_model)],
    voting='soft', weights=[5, 3, 1])  # Higher weight for RF

# Fit detection model
voting_detection_model.fit(detection_train_X, detection_train_Y)

# Encode multi-class labels properly (for training)
class_Y_train = (class_train_Y['G']*1 + class_train_Y['A']*2 +
                 class_train_Y['B']*3 + class_train_Y['C']*5).values.ravel()

# Fit classification model
voting_class_model.fit(class_train_X, class_Y_train)

# Test labels
class_Y_test = (class_test_Y['G']*1 + class_test_Y['A']*2 +
                class_test_Y['B']*3 + class_test_Y['C']*5).values.ravel()

# Predictions
detection_preds = voting_detection_model.predict(detection_test_X)
class_preds = voting_class_model.predict(class_test_X)

# Calculate and print accuracy
detection_accuracy = voting_detection_model.score(detection_test_X, detection_test_Y)
classification_accuracy = voting_class_model.score(class_test_X, class_Y_test)

print('Optimized Voting Detection Model Accuracy:', detection_accuracy)
print('Optimized Voting Classification Model Accuracy:', classification_accuracy)


**Voting - Hyperparameter Tuning**

In [None]:
import numpy as np
from sklearn.ensemble import VotingClassifier, RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import RandomizedSearchCV
from sklearn.metrics import accuracy_score, classification_report

# 🟢 Step 1: Define Hyperparameter Grids for Each Model
rf_param_grid = {
    'n_estimators': [100, 200, 400],
    'max_depth': [10, 20, None],
    'min_samples_split': [2, 3, 5],
    'min_samples_leaf': [1, 2, 4],
    'class_weight': ['balanced', None]
}

knn_param_grid = {
    'knn__n_neighbors': [3, 5, 7],
    'knn__weights': ['uniform', 'distance'],
    'knn__metric': ['euclidean', 'manhattan']
}

lr_param_grid = {
    'lr__C': [0.5, 1, 1.5],
    'lr__solver': ['lbfgs', 'saga'],
    'lr__penalty': ['l2']
}

# 🟢 Step 2: Define Optimized Base Models
rf_model = RandomForestClassifier(random_state=42)

knn_model = Pipeline([
    ('scaler', StandardScaler()),
    ('knn', KNeighborsClassifier())
])

lr_model = Pipeline([
    ('scaler', StandardScaler()),
    ('lr', LogisticRegression(max_iter=1000, random_state=42))
])

# 🟢 Step 3: Define Voting Classifier (with Default Models)
voting_model = VotingClassifier(estimators=[
    ('rf', rf_model), ('knn', knn_model), ('lr', lr_model)],
    voting='soft')

# 🟢 Step 4: Hyperparameter Tuning for Detection Model
param_grid = {**{f'rf__{key}': value for key, value in rf_param_grid.items()},
              **{f'knn__{key}': value for key, value in knn_param_grid.items()},
              **{f'lr__{key}': value for key, value in lr_param_grid.items()}}

random_search_detection = RandomizedSearchCV(voting_model, param_distributions=param_grid,
                                             n_iter=10, cv=3, verbose=1, n_jobs=-1, random_state=42)

random_search_detection.fit(detection_train_X, detection_train_Y)
best_voting_detection_model = random_search_detection.best_estimator_

# 🟢 Step 5: Encode Multi-Class Labels for Classification
class_Y_train = (class_train_Y['G']*1 + class_train_Y['A']*2 +
                 class_train_Y['B']*3 + class_train_Y['C']*5).ravel()

class_Y_test = (class_test_Y['G']*1 + class_test_Y['A']*2 +
                class_test_Y['B']*3 + class_test_Y['C']*5).ravel()

# 🟢 Step 6: Hyperparameter Tuning for Classification Model
random_search_classification = RandomizedSearchCV(voting_model, param_distributions=param_grid,
                                                  n_iter=10, cv=3, verbose=1, n_jobs=-1, random_state=42)

random_search_classification.fit(class_train_X, class_Y_train)
best_voting_class_model = random_search_classification.best_estimator_

# 🟢 Step 7: Model Predictions
detection_preds = best_voting_detection_model.predict(detection_test_X)
class_preds = best_voting_class_model.predict(class_test_X)

# 🟢 Step 8: Accuracy Scores
detection_accuracy = accuracy_score(detection_test_Y, detection_preds)
classification_accuracy = accuracy_score(class_Y_test, class_preds)

# 🟢 Step 9: Print Accuracy Results
print("\n🟢 Optimized Voting Model Performance 🟢")
print("---------------------------------------------------")
print(f"Voting Detection Model Accuracy: {detection_accuracy:.4f}")
print(f"Voting Classification Model Accuracy: {classification_accuracy:.4f}")
print("---------------------------------------------------")

# 🟢 Step 10: Precision, Recall, F1-Score, and Support Count
print("\n📊 Classification Report for Detection Model 📊")
print(classification_report(detection_test_Y, detection_preds))

print("\n📊 Classification Report for Classification Model 📊")
print(classification_report(class_Y_test, class_preds))


**Stacking Ensemble Implementation**

In [None]:
from sklearn.ensemble import StackingClassifier

# Define base models
base_models = [
    ('rf', RandomForestClassifier(n_estimators=100, max_depth=5, random_state=42)),
    ('knn', KNeighborsClassifier(n_neighbors=5)),
    ('lr', LogisticRegression(max_iter=1000, random_state=42))
]

# Define meta-model (final classifier)
meta_model = LogisticRegression(max_iter=1000, random_state=42)

# Define Stacking Classifier
stacking_detection_model = StackingClassifier(estimators=base_models, final_estimator=meta_model)
stacking_class_model = StackingClassifier(estimators=base_models, final_estimator=meta_model)

# Fit models
stacking_detection_model.fit(detection_train_X, detection_train_Y)

# Encode multi-class labels properly (for training)
class_Y_train = (class_train_Y['G']*1 + class_train_Y['A']*2 +
                 class_train_Y['B']*3 + class_train_Y['C']*5).values.ravel()

stacking_class_model.fit(class_train_X, class_Y_train)

# Test labels
class_Y_test = (class_test_Y['G']*1 + class_test_Y['A']*2 +
                class_test_Y['B']*3 + class_test_Y['C']*5).values.ravel()

# Predictions
detection_preds = stacking_detection_model.predict(detection_test_X)
class_preds = stacking_class_model.predict(class_test_X)

# Printing Accuracy
print('Stacking Detection Model Accuracy:', stacking_detection_model.score(detection_test_X, detection_test_Y))
print('Stacking Classification Model Accuracy:', stacking_class_model.score(class_test_X, class_Y_test))


**Stacking HyperParameter Tuning**

In [None]:
# Convert y_train and y_test into 1D arrays (Fixes the error)
detection_train_Y = detection_train_Y.values.ravel()
detection_test_Y = detection_test_Y.values.ravel()

class_train_Y = class_train_Y.values.ravel()
class_test_Y = class_test_Y.values.ravel()

# ===================== Hyperparameter Tuning: Detection Model ===================== #
random_search_detection = RandomizedSearchCV(
    stacking_detection_model, param_distributions=param_grid,
    n_iter=5, cv=3, verbose=1, n_jobs=-1, random_state=42
)

random_search_detection.fit(detection_train_X, detection_train_Y)  # ✅ No more errors
best_detection_model = random_search_detection.best_estimator_

# ===================== Hyperparameter Tuning: Classification Model ===================== #
random_search_classification = RandomizedSearchCV(
    stacking_class_model, param_distributions=param_grid,
    n_iter=5, cv=3, verbose=1, n_jobs=-1, random_state=42
)

random_search_classification.fit(class_train_X, class_train_Y)  # ✅ No more errors
best_class_model = random_search_classification.best_estimator_

# ===================== Model Predictions ===================== #
detection_preds = best_detection_model.predict(detection_test_X)
class_preds = best_class_model.predict(class_test_X)

# ===================== Accuracy Scores ===================== #
detection_accuracy = accuracy_score(detection_test_Y, detection_preds)
classification_accuracy = accuracy_score(class_test_Y, class_preds)

# ===================== Classification Reports ===================== #
print("========= Stacking Detection Model =========")
print("🔥 Detection Accuracy:", detection_accuracy)
print("\nClassification Report for Detection Model:")
print(classification_report(detection_test_Y, detection_preds))

print("\n========= Stacking Classification Model =========")
print("🔥 Classification Accuracy:", classification_accuracy)
print("\nClassification Report for Classification Model:")
print(classification_report(class_test_Y, class_preds))


**Graph of ensemble models**

In [None]:
import matplotlib.pyplot as plt
import numpy as np

# Accuracy values (Replace these with actual accuracy results from your models)
models = ["Bagging", "Gradient Boosting", "Random Forest", "Voting", "Stacking"]
detection_accuracies = [0.988, 0.985, 0.974, 0.974, 0.986]  # Example detection accuracies
classification_accuracies = [0.805, 0.798, 0.777, 0.729, 0.867]  # Example classification accuracies

# Set the width of the bars
bar_width = 0.35
index = np.arange(len(models))

# Create the bar chart
plt.figure(figsize=(10, 6))
plt.bar(index, detection_accuracies, bar_width, label="Detection Accuracy", color="royalblue")
plt.bar(index + bar_width, classification_accuracies, bar_width, label="Classification Accuracy", color="darkorange")

# Formatting the chart
plt.xlabel("Models")
plt.ylabel("Accuracy")
plt.title("Comparison of Ensemble Models Based on Accuracy")
plt.xticks(index + bar_width / 2, models)
plt.ylim(0.7, 1.0)  # Adjust based on your accuracy range
plt.legend()
plt.grid(axis='y', linestyle="--", alpha=0.7)

# Show the plot
plt.show()


**Cat Boost**

In [None]:
import numpy as np
from catboost import CatBoostClassifier
from sklearn.metrics import accuracy_score

# Optimized CatBoost Models
cat_detection_model = CatBoostClassifier(iterations=3000, learning_rate=0.03, depth=8, random_state=42, verbose=500)
cat_class_model = CatBoostClassifier(iterations=3000, learning_rate=0.03, depth=8, random_state=42, verbose=500)

# Fit detection model
cat_detection_model.fit(detection_train_X, detection_train_Y)

# Encode multi-class labels properly (for training)
class_Y_train = (class_train_Y['G']*1 + class_train_Y['A']*2 +
                 class_train_Y['B']*3 + class_train_Y['C']*5).values.ravel()

# Fit classification model
cat_class_model.fit(class_train_X, class_Y_train)

# Create test labels correctly
class_Y_test = (class_test_Y['G']*1 + class_test_Y['A']*2 +
                class_test_Y['B']*3 + class_test_Y['C']*5).values.ravel()

# Predictions
detection_preds = cat_detection_model.predict(detection_test_X)
class_preds = cat_class_model.predict(class_test_X)

# Printing Accuracy (Formatted Output)
print("Class Test X Shape:", class_test_X.shape)
print("Class Y Test Shape:", class_Y_test.shape)
print("CatBoost Classification Model Accuracy:", round(cat_class_model.score(class_test_X, class_Y_test), 4))

print("Detection Test X Shape:", detection_test_X.shape)
print("Detection Test Y Shape:", detection_test_Y.shape)
print("CatBoost Detection Model Accuracy:", round(cat_detection_model.score(detection_test_X, detection_test_Y), 4))


**Cat Boost-HyperParameter Tuning**

In [None]:
import numpy as np
import pandas as pd
from catboost import CatBoostClassifier
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score

# ======= Step 1: Encode Labels =======
label_encoder = LabelEncoder()
Y_train_encoded = label_encoder.fit_transform(class_Y_train)
Y_test_encoded = label_encoder.transform(class_Y_test)

# ======= Step 2: Scale Features =======
scaler = StandardScaler()
class_train_X_scaled = scaler.fit_transform(class_train_X)
class_test_X_scaled = scaler.transform(class_test_X)
detection_train_X_scaled = scaler.fit_transform(detection_train_X)
detection_test_X_scaled = scaler.transform(detection_test_X)

# ======= Step 3: Define Hyperparameter Grids =======
param_grid_catboost = {
    'iterations': [200, 500],          # Number of boosting iterations
    'depth': [6, 10],                  # Tree depth
    'learning_rate': [0.05, 0.1],       # Learning rate
    'l2_leaf_reg': [3, 5],              # L2 regularization
    'border_count': [32, 64],           # Splitting points in numerical features
}

# ======= Step 4: Train CatBoost Detection Model =======
catboost_detection = CatBoostClassifier(task_type="CPU", verbose=0, random_state=42)

grid_search_detection = GridSearchCV(catboost_detection, param_grid_catboost, cv=3, scoring='accuracy', n_jobs=-1)
grid_search_detection.fit(detection_train_X_scaled, detection_train_Y)

best_catboost_detection = grid_search_detection.best_estimator_

# ======= Step 5: Train CatBoost Classification Model =======
catboost_classification = CatBoostClassifier(task_type="CPU", verbose=0, random_state=42)

grid_search_classification = GridSearchCV(catboost_classification, param_grid_catboost, cv=3, scoring='accuracy', n_jobs=-1)
grid_search_classification.fit(class_train_X_scaled, Y_train_encoded)

best_catboost_classification = grid_search_classification.best_estimator_

# ======= Step 6: Evaluate Models =======
detection_preds = best_catboost_detection.predict(detection_test_X_scaled)
class_preds = best_catboost_classification.predict(class_test_X_scaled)

detection_accuracy = accuracy_score(detection_test_Y, detection_preds)
classification_accuracy = accuracy_score(Y_test_encoded, class_preds)

# ======= Step 7: Print Results =======
print(f'CatBoost Detection Model Accuracy: {detection_accuracy:.4f}')
print(f'CatBoost Classification Model Accuracy: {classification_accuracy:.4f}')


**Light GBM**

In [None]:
import numpy as np
import lightgbm as lgb
from sklearn.metrics import accuracy_score

# Optimized LightGBM Models
lgb_detection_model = lgb.LGBMClassifier(n_estimators=3000, learning_rate=0.03, max_depth=10, random_state=42, verbose=-1)
lgb_class_model = lgb.LGBMClassifier(n_estimators=3000, learning_rate=0.03, max_depth=10, random_state=42, verbose=-1)

# Fit detection model
lgb_detection_model.fit(detection_train_X, detection_train_Y)

# Encode multi-class labels properly (for training)
class_Y_train = (class_train_Y['G']*1 + class_train_Y['A']*2 +
                 class_train_Y['B']*3 + class_train_Y['C']*5).values.ravel()

# Fit classification model
lgb_class_model.fit(class_train_X, class_Y_train)

# Create test labels correctly
class_Y_test = (class_test_Y['G']*1 + class_test_Y['A']*2 +
                class_test_Y['B']*3 + class_test_Y['C']*5).values.ravel()

# Predictions
detection_preds = lgb_detection_model.predict(detection_test_X)
class_preds = lgb_class_model.predict(class_test_X)

# Printing Accuracy (Formatted Output)
print("Class Test X Shape:", class_test_X.shape)
print("Class Y Test Shape:", class_Y_test.shape)
print("LightGBM Classification Model Accuracy:", round(lgb_class_model.score(class_test_X, class_Y_test), 4))

print("Detection Test X Shape:", detection_test_X.shape)
print("Detection Test Y Shape:", detection_test_Y.shape)
print("LightGBM Detection Model Accuracy:", round(lgb_detection_model.score(detection_test_X, detection_test_Y), 4))


**Light GBM- Hyperparameter Tuning**

In [None]:
import numpy as np
import pandas as pd
import lightgbm as lgb
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score

# ======= Step 1: Encode Labels =======
label_encoder = LabelEncoder()
Y_train_encoded = label_encoder.fit_transform(class_Y_train)
Y_test_encoded = label_encoder.transform(class_Y_test)

# ======= Step 2: Scale Features =======
scaler = StandardScaler()
class_train_X_scaled = scaler.fit_transform(class_train_X)
class_test_X_scaled = scaler.transform(class_test_X)
detection_train_X_scaled = scaler.fit_transform(detection_train_X)
detection_test_X_scaled = scaler.transform(detection_test_X)

# ======= Step 3: Define Hyperparameter Grids =======
param_grid_lgbm = {
    'n_estimators': [200, 500],          # Number of boosting iterations
    'learning_rate': [0.05, 0.1],        # Learning rate
    'max_depth': [6, 10],                # Max depth of trees
    'num_leaves': [31, 50],              # Number of leaves in trees
    'reg_alpha': [0.1, 0.5],             # L1 regularization
    'reg_lambda': [0.1, 0.5],            # L2 regularization
}

# ======= Step 4: Train LightGBM Detection Model =======
lgbm_detection = lgb.LGBMClassifier(objective='binary', random_state=42)

grid_search_detection = GridSearchCV(lgbm_detection, param_grid_lgbm, cv=3, scoring='accuracy', n_jobs=-1)
grid_search_detection.fit(detection_train_X_scaled, detection_train_Y)

best_lgbm_detection = grid_search_detection.best_estimator_

# ======= Step 5: Train LightGBM Classification Model =======
lgbm_classification = lgb.LGBMClassifier(objective='multiclass', num_class=len(np.unique(Y_train_encoded)), random_state=42)

grid_search_classification = GridSearchCV(lgbm_classification, param_grid_lgbm, cv=3, scoring='accuracy', n_jobs=-1)
grid_search_classification.fit(class_train_X_scaled, Y_train_encoded)

best_lgbm_classification = grid_search_classification.best_estimator_

# ======= Step 6: Evaluate Models =======
detection_preds = best_lgbm_detection.predict(detection_test_X_scaled)
class_preds = best_lgbm_classification.predict(class_test_X_scaled)

detection_accuracy = accuracy_score(detection_test_Y, detection_preds)
classification_accuracy = accuracy_score(Y_test_encoded, class_preds)

# ======= Step 7: Print Results =======
print(f'LightGBM Detection Model Accuracy: {detection_accuracy:.4f}')
print(f'LightGBM Classification Model Accuracy: {classification_accuracy:.4f}')


**XG Boost**

In [None]:
import numpy as np
import xgboost as xgb
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score

# Encode detection labels (binary classification)
xgb_detection_model = xgb.XGBClassifier(n_estimators=3000, learning_rate=0.03, max_depth=10,
                                        subsample=0.8, colsample_bytree=0.8, random_state=42, verbosity=0)
xgb_detection_model.fit(detection_train_X, detection_train_Y)

# **Fix Multi-Class Labels for Classification**
# Step 1: Create a LabelEncoder to map unique labels to continuous numbers
label_encoder = LabelEncoder()
class_Y_train_encoded = label_encoder.fit_transform((class_train_Y['G']*1 + class_train_Y['A']*2 +
                                                     class_train_Y['B']*3 + class_train_Y['C']*5).values.ravel())
class_Y_test_encoded = label_encoder.transform((class_test_Y['G']*1 + class_test_Y['A']*2 +
                                                class_test_Y['B']*3 + class_test_Y['C']*5).values.ravel())

# Step 2: Train XGBoost Classification Model
xgb_class_model = xgb.XGBClassifier(n_estimators=3000, learning_rate=0.03, max_depth=10,
                                    subsample=0.8, colsample_bytree=0.8, random_state=42, verbosity=0)
xgb_class_model.fit(class_train_X, class_Y_train_encoded)

# Step 3: Predictions
detection_preds = xgb_detection_model.predict(detection_test_X)
class_preds_encoded = xgb_class_model.predict(class_test_X)

# Step 4: Convert predictions back to original labels
class_preds = label_encoder.inverse_transform(class_preds_encoded)

# **Print Accuracy**
print("Class Test X Shape:", class_test_X.shape)
print("Class Y Test Shape:", class_Y_test_encoded.shape)
print("XGBoost Classification Model Accuracy:", round(xgb_class_model.score(class_test_X, class_Y_test_encoded), 4))

print("Detection Test X Shape:", detection_test_X.shape)
print("Detection Test Y Shape:", detection_test_Y.shape)
print("XGBoost Detection Model Accuracy:", round(xgb_detection_model.score(detection_test_X, detection_test_Y), 4))



**XG Boost- hyperparameter tuning**

In [None]:
from sklearn.preprocessing import LabelEncoder
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Assuming detection_train_X, detection_train_Y, class_train_X, and class_Y_train are already defined

# Step 1: Encode class labels using LabelEncoder
label_encoder = LabelEncoder()
Y_train_encoded = label_encoder.fit_transform(class_Y_train)
Y_test_encoded = label_encoder.transform(class_Y_test)

# Step 2: Preprocessing the feature data (scaling)
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
class_train_X_scaled = scaler.fit_transform(class_train_X)
class_test_X_scaled = scaler.transform(class_test_X)

# Split detection data (similar to class data)
detection_train_X_scaled = scaler.fit_transform(detection_train_X)
detection_test_X_scaled = scaler.transform(detection_test_X)

# Step 3: Model Definition with Hyperparameter Tuning
param_grid = {
    'n_estimators': [100, 200, 300],
    'max_depth': [3, 6, 9],
    'learning_rate': [0.01, 0.1, 0.3],
    'subsample': [0.7, 0.8, 1.0],
    'colsample_bytree': [0.7, 0.8, 1.0]
}

# Initialize the XGBoost classifier
xgb_classifier = xgb.XGBClassifier(objective='multi:softmax', num_class=6, random_state=42)

# Step 4: Hyperparameter tuning using GridSearchCV
from sklearn.model_selection import GridSearchCV
grid_search = GridSearchCV(estimator=xgb_classifier, param_grid=param_grid, cv=5, scoring='accuracy', n_jobs=-1)
grid_search.fit(class_train_X_scaled, Y_train_encoded)

# Best model from GridSearchCV
best_xgb_model = grid_search.best_estimator_

# Step 5: Evaluate Model on Test Set
class_preds = best_xgb_model.predict(class_test_X_scaled)

# Calculate accuracy on the test set
classification_accuracy = accuracy_score(Y_test_encoded, class_preds)
print(f'XGBoost Classification Model Accuracy: {classification_accuracy}')

# Step 6: Train the Detection Model (XGBoost for binary classification)
detection_model = xgb.XGBClassifier(objective='binary:logistic', random_state=42)
detection_model.fit(detection_train_X_scaled, detection_train_Y)

# Step 7: Evaluate Detection Model
detection_preds = detection_model.predict(detection_test_X_scaled)
detection_accuracy = accuracy_score(detection_test_Y, detection_preds)
print(f'XGBoost Detection Model Accuracy: {detection_accuracy}')


In [None]:
import numpy as np
import pandas as pd
from catboost import CatBoostClassifier
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score

# ======= Step 1: Encode Labels =======
label_encoder = LabelEncoder()
Y_train_encoded = label_encoder.fit_transform(class_Y_train)
Y_test_encoded = label_encoder.transform(class_Y_test)

# ======= Step 2: Scale Features =======
scaler = StandardScaler()
class_train_X_scaled = scaler.fit_transform(class_train_X)
class_test_X_scaled = scaler.transform(class_test_X)
detection_train_X_scaled = scaler.fit_transform(detection_train_X)
detection_test_X_scaled = scaler.transform(detection_test_X)

# ======= Step 3: Define Hyperparameter Grids =======
param_grid_catboost = {
    'iterations': [200, 500],          # Number of boosting iterations
    'depth': [6, 10],                  # Tree depth
    'learning_rate': [0.05, 0.1],       # Learning rate
    'l2_leaf_reg': [3, 5],              # L2 regularization
    'border_count': [32, 64],           # Splitting points in numerical features
}

# ======= Step 4: Train CatBoost Detection Model =======
catboost_detection = CatBoostClassifier(task_type="CPU", verbose=0, random_state=42)

grid_search_detection = GridSearchCV(catboost_detection, param_grid_catboost, cv=3, scoring='accuracy', n_jobs=-1)
grid_search_detection.fit(detection_train_X_scaled, detection_train_Y)

best_catboost_detection = grid_search_detection.best_estimator_

# ======= Step 5: Train CatBoost Classification Model =======
catboost_classification = CatBoostClassifier(task_type="CPU", verbose=0, random_state=42)

grid_search_classification = GridSearchCV(catboost_classification, param_grid_catboost, cv=3, scoring='accuracy', n_jobs=-1)
grid_search_classification.fit(class_train_X_scaled, Y_train_encoded)

best_catboost_classification = grid_search_classification.best_estimator_

# ======= Step 6: Evaluate Models =======
detection_preds = best_catboost_detection.predict(detection_test_X_scaled)
class_preds = best_catboost_classification.predict(class_test_X_scaled)

detection_accuracy = accuracy_score(detection_test_Y, detection_preds)
classification_accuracy = accuracy_score(Y_test_encoded, class_preds)

# ======= Step 7: Print Results =======
print(f'CatBoost Detection Model Accuracy: {detection_accuracy:.4f}')
print(f'CatBoost Classification Model Accuracy: {classification_accuracy:.4f}')


In [None]:
import numpy as np
import pandas as pd
import lightgbm as lgb
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score

# ======= Step 1: Encode Labels =======
label_encoder = LabelEncoder()
Y_train_encoded = label_encoder.fit_transform(class_Y_train)
Y_test_encoded = label_encoder.transform(class_Y_test)

# ======= Step 2: Scale Features =======
scaler = StandardScaler()
class_train_X_scaled = scaler.fit_transform(class_train_X)
class_test_X_scaled = scaler.transform(class_test_X)
detection_train_X_scaled = scaler.fit_transform(detection_train_X)
detection_test_X_scaled = scaler.transform(detection_test_X)

# ======= Step 3: Define Hyperparameter Grids =======
param_grid_lgbm = {
    'n_estimators': [200, 500],          # Number of boosting iterations
    'learning_rate': [0.05, 0.1],        # Learning rate
    'max_depth': [6, 10],                # Max depth of trees
    'num_leaves': [31, 50],              # Number of leaves in trees
    'reg_alpha': [0.1, 0.5],             # L1 regularization
    'reg_lambda': [0.1, 0.5],            # L2 regularization
}

# ======= Step 4: Train LightGBM Detection Model =======
lgbm_detection = lgb.LGBMClassifier(objective='binary', random_state=42)

grid_search_detection = GridSearchCV(lgbm_detection, param_grid_lgbm, cv=3, scoring='accuracy', n_jobs=-1)
grid_search_detection.fit(detection_train_X_scaled, detection_train_Y)

best_lgbm_detection = grid_search_detection.best_estimator_

# ======= Step 5: Train LightGBM Classification Model =======
lgbm_classification = lgb.LGBMClassifier(objective='multiclass', num_class=len(np.unique(Y_train_encoded)), random_state=42)

grid_search_classification = GridSearchCV(lgbm_classification, param_grid_lgbm, cv=3, scoring='accuracy', n_jobs=-1)
grid_search_classification.fit(class_train_X_scaled, Y_train_encoded)

best_lgbm_classification = grid_search_classification.best_estimator_

# ======= Step 6: Evaluate Models =======
detection_preds = best_lgbm_detection.predict(detection_test_X_scaled)
class_preds = best_lgbm_classification.predict(class_test_X_scaled)

detection_accuracy = accuracy_score(detection_test_Y, detection_preds)
classification_accuracy = accuracy_score(Y_test_encoded, class_preds)

# ======= Step 7: Print Results =======
print(f'LightGBM Detection Model Accuracy: {detection_accuracy:.4f}')
print(f'LightGBM Classification Model Accuracy: {classification_accuracy:.4f}')
