In [1]:
# Import the required libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import loguniform

# Preprocessing and feature selection
from sklearn.preprocessing import StandardScaler, OneHotEncoder, LabelEncoder, LabelBinarizer, MinMaxScaler
from sklearn.feature_selection import RFE, VarianceThreshold
from sklearn.inspection import permutation_importance
from sklearn.manifold import TSNE

# Model selection and evaluation
from sklearn.model_selection import cross_val_predict, train_test_split, StratifiedKFold, cross_val_score, GridSearchCV, RandomizedSearchCV
from sklearn.metrics import classification_report, f1_score, confusion_matrix, precision_recall_curve, make_scorer, ConfusionMatrixDisplay

# Resampling techniques
from imblearn.over_sampling import SMOTE
from imblearn.pipeline import Pipeline

# Classifiers
from sklearn.linear_model import RidgeClassifier, LogisticRegression, SGDClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, BaggingClassifier, ExtraTreesClassifier, VotingClassifier, GradientBoostingClassifier, AdaBoostClassifier, StackingClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.multiclass import OneVsRestClassifier
from sklearn.base import BaseEstimator, ClassifierMixin
from lightgbm import LGBMClassifier
from xgboost import XGBClassifier

# SHapley Additive exPlanations
import shap

from sklearn.neural_network import MLPClassifier
from catboost import CatBoostClassifier

# Step1: Read Dataset

In [2]:
# Read the Data Set
path = '/Users/lks/Desktop/UM/Project Report/01_LBNL_FDD_Even/df_faults_even.xlsx'
DF_faulty = pd.read_excel(path, index_col=0) # With New Features

In [3]:
# Data Preparation
df_trial = DF_faulty.copy()
X_full = df_trial.drop(columns=['Datetime', 'Fault'])
y_full = df_trial["Fault"]

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_full, y_full, test_size=0.2, random_state=42, stratify=y_full)

# Training sets
train_df = X_train.copy()
train_df['Fault'] = y_train

# Testing set
test_df = X_test.copy()
test_df['Fault'] = y_test

# Shape
print(f"Training DataFrame shape: {train_df.shape}")
print(f"Testing DataFrame shape: {test_df.shape}")

# Class distribution of training dataset
class_distribution = train_df['Fault'].value_counts()
print("Class distribution in the training set:")
print(class_distribution)

# Class distribution of testing dataset
class_distribution_test = test_df['Fault'].value_counts()
print("Class distribution in the testing set:")
print(class_distribution_test)

Training DataFrame shape: (56062, 114)
Testing DataFrame shape: (14016, 114)
Class distribution in the training set:
Fault
BPS      7008
CHS      7008
CTF      7008
BPL      7008
CTS      7008
CPP      7008
N        7007
CTPID    7007
Name: count, dtype: int64
Class distribution in the testing set:
Fault
CTF      1752
CTS      1752
CTPID    1752
N        1752
CHS      1752
BPL      1752
CPP      1752
BPS      1752
Name: count, dtype: int64


# Step2: Training Phase

### Simple Individual Models Building

### "BPX" vs "Other"

In [4]:
# Prepare Data (Training and Testing)
df_train = train_df.copy()
X_train = df_train.drop(columns=['Fault'])
y_train = df_train['Fault']

#---------------------------------------------
# Step 0: Filtering Labels
# No Need
#---------------------------------------------

# Step1: Binary Classification Data Preparation
target_faults = (["BPS", "BPL"])  # 1:BPS, BPL (BPX), 0:Rest
y_train_BPX = np.where(np.isin(y_train, target_faults), 1, 0)

# Step2: Specific Sub-Features Set (BPX: 30 Features)
BPX_features = ['CDWL_Error', 'CDWL_DeltaFLOW', 'TWV_CTRL', 'CDWL_PM_POW_1', 'CWL_SEC_SW_TEMP', 
                'CWL_PRI_SW_TEMP', 'CHL_RW_TEMP_1', 'CDWL_DeltaTEMP', 'CT_FLOW_1', 'CHL_SWCD_TEMP_1', 
                'CHL_RWCD_TEMP_1', 'CT_TotalFLOW', 'CDWL_SW_TEMP', 'CHL_SW_TEMP_1', 'CWL_SEC_PM_POW_2', 
                'CWL_SEC_RW_TEMP', 'CT_SW_TEMP_1', 'CWL_SEC_PM_SPD_1', 'CDWL_RW_TEMP', 'CWL_SEC_CW_FLOW', 
                'CHL_COMP_SPD_CTRL_1', 'CWL_SEC_PM_SPD_2', 'OA_TEMP_WB', 'CWL_SEC_DP', 'CWL_SEC_POW', 
                'PLANT_POW_1', 'CHL_POW_1', 'CWL_SEC_PM_POW_1', 'OA_TEMP', 'CT_RW_TEMP_1']

X_train_BPX = X_train[BPX_features]

# Scaling Features
BPX_scaler = StandardScaler()
X_train_BPX_scaled = BPX_scaler.fit_transform(X_train_BPX)

# Step3: SMOTE for Training Dataset
smote = SMOTE(random_state=42)
X_train_BPX_resampled, y_train_BPX_resampled = smote.fit_resample(X_train_BPX_scaled, y_train_BPX)

# Step4: Model Building with Hyperparameters 
BPX_paras = {'learning_rate': 0.4, 'gamma': 0.2, 'reg_alpha': 0.2,'reg_lambda': 0.8}
model_BPX = XGBClassifier(random_state=42, n_jobs=-1, **BPX_paras)
model_BPX.fit(X_train_BPX_resampled, y_train_BPX_resampled)

### "BPL" vs "BPS"

In [5]:
# Prepare Data (Training and Testing)
df_train = train_df.copy()
X_train = df_train.drop(columns=['Fault'])
y_train = df_train['Fault']

#---------------------------------------------
# Step 0: Filtering Labels
BPSL_faults = ['BPL', 'BPS']

# Retain labels in Training Data
train_indices = np.isin(y_train, BPSL_faults)
X_BPSL_train = X_train[train_indices]
y_BPSL_train = y_train[train_indices]
#---------------------------------------------

# Step1: Binary Classification Data Preparation
target_faults = ["BPL"]
y_train_BPSL = np.where(np.isin(y_BPSL_train, target_faults), 1, 0)

# Step2: Specific Sub-Features Set (BPSL: 19 Features)
BPSL_features = ['CT_FLOW_1', 'CT_TotalFLOW', 'Relative_Humidity', 'CDWL_DeltaFLOW', 'CT_ApproachTEMP_1', 
                 'OA_TEMP_WB', 'OA_TEMP', 'TWV_CTRL', 'CDWL_POW', 'CDWL_PM_POW_1', 
                 'CDWL_Error', 'CDWL_SW_TEMP', 'CHL_RWCD_TEMP_1', 'CDWL_RW_TEMP', 'CHL_SWCD_TEMP_1', 
                 'CWL_SEC_RW_TEMP', 'CT_RW_TEMP_1', 'CHL_POW_1', 'CWL_SEC_SW_TEMP']

X_train_BPSL = X_BPSL_train[BPSL_features]

# Scaling Features
BPSL_scaler = StandardScaler()
X_train_BPSL_scaled = BPSL_scaler.fit_transform(X_train_BPSL)

# Step3: SMOTE for Training Dataset
smote = SMOTE(random_state=42)
X_train_BPSL_resampled, y_train_BPSL_resampled = smote.fit_resample(X_train_BPSL_scaled, y_train_BPSL)

# Step4: Model Building with Hyperparameters 
base_models = [
    ("Gradient Boosting", GradientBoostingClassifier(n_estimators=300, max_depth=5, random_state=42)),
    ("Random Forest", RandomForestClassifier(n_estimators=300, random_state=42, n_jobs=-1)),
    ("XGBoost", XGBClassifier(subsample=0.85, n_estimators=300, learning_rate=0.2, random_state=42, n_jobs=-1)),
    ("CatBoost", CatBoostClassifier(random_state=42, verbose=0, thread_count=-1))
]
meta_model = GradientBoostingClassifier(n_estimators=200, random_state=42)

# Stacking Classifier
model_BPSL = StackingClassifier(estimators=base_models, final_estimator=meta_model, n_jobs=-1, passthrough=True)
model_BPSL.fit(X_train_BPSL_resampled, y_train_BPSL_resampled)

### Model_CHS: CHS-vs-Rest

In [6]:
# Prepare Data (Training and Testing)
df_train = train_df.copy()
X_train = df_train.drop(columns=['Fault'])
y_train = df_train['Fault']

#---------------------------------------------
# Step 0: Filtering Labels
CHS_faults = ['CHS', 'CPP', 'CTS', 'CTF', 'CTPID', 'N']

# Retain labels in Training Data
train_indices = np.isin(y_train, CHS_faults)
X_CHS_train = X_train[train_indices]
y_CHS_train = y_train[train_indices]
#---------------------------------------------

# Step1: Binary Classification Data Preparation
target_faults = ["CHS"]
y_train_CHS = np.where(np.isin(y_CHS_train, target_faults), 1, 0)

# Step2: Specific Sub-Features Set (CHS: 7 Features)
CHS_features = ['CWL_PRI_SW_TEMP', 'CHL_SW_TEMP_1', 'CWL_SEC_RW_TEMP', 'CHL_COMP_SPD_CTRL_2', 'CHL_Cooling_Load_1', 
                'CHL_COP_1', 'CHL_COP_2']
X_train_CHS = X_CHS_train[CHS_features]

# Scaling Features
CHS_scaler = StandardScaler()
X_train_CHS_scaled = CHS_scaler.fit_transform(X_train_CHS)

# Step3: SMOTE for Training Dataset
smote = SMOTE(random_state=42)
X_train_CHS_resampled, y_train_CHS_resampled = smote.fit_resample(X_train_CHS_scaled, y_train_CHS)

# Step4: Model Building with Hyperparameters 
CHS_paras = {'gamma': 0.2, 'max_depth': 4, 'n_estimators': 200, 'reg_alpha': 0.2, 'reg_lambda': 1.2, 'subsample': 0.85}
model_CHS = XGBClassifier(random_state=42, n_jobs=-1, **CHS_paras)
model_CHS.fit(X_train_CHS_resampled, y_train_CHS_resampled)

### Model_CPP: CPP-vs-Rest

In [7]:
# Prepare Data (Training and Testing)
df_train = train_df.copy()
X_train = df_train.drop(columns=['Fault'])
y_train = df_train['Fault']

#---------------------------------------------
# Step 0: Filtering Labels
CPP_faults = ['CPP', 'CTS', 'CTF', 'CTPID', 'N']

# Retain labels in Training Data
train_indices = np.isin(y_train, CPP_faults)
X_CPP_train = X_train[train_indices]
y_CPP_train = y_train[train_indices]
#---------------------------------------------

# Step1: Binary Classification Data Preparation
target_faults = ["CPP"]
y_train_CPP = np.where(np.isin(y_CPP_train, target_faults), 1, 0)

# Step2: Specific Sub-Features Set (CPP: 17 Features)
CPP_features = ['CWL_SEC_PM_SPD_1', 'CWL_SEC_DP', 'CWL_SEC_CW_FLOW', 'CWL_SEC_PM_SPD_2', 'CHL_SWCD_TEMP_1', 
                'CWL_SEC_PM_POW_1', 'CDWL_DeltaTEMP', 'CDWL_Load', 'CWL_SEC_POW', 'CDWL_Error', 
                'CT_SW_TEMP_1', 'CWL_SEC_PM_POW_2', 'OA_TEMP_WB', 'CHL_RW_TEMP_1', 'CDWL_RW_TEMP', 
                'CWL_SEC_LOAD', 'CT_Error_1']


X_train_CPP = X_CPP_train[CPP_features]

# Scaling Features
CPP_scaler = StandardScaler()
X_train_CPP_scaled = CPP_scaler.fit_transform(X_train_CPP)

# Step3: SMOTE for Training Dataset
smote = SMOTE(random_state=42)
X_train_CPP_resampled, y_train_CPP_resampled = smote.fit_resample(X_train_CPP_scaled, y_train_CPP)

# Step4: Model Building with Hyperparameters 
CPP_paras = {'l2_leaf_reg': 1, 'learning_rate': 0.05}
model_CPP = CatBoostClassifier(random_state=42, verbose=0, thread_count=-1, **CPP_paras)
model_CPP.fit(X_train_CPP_resampled, y_train_CPP_resampled)

<catboost.core.CatBoostClassifier at 0x301355c60>

### Model_CTS: CTS-vs-Rest

In [8]:
# Prepare Data (Training and Testing)
df_train = train_df.copy()
X_train = df_train.drop(columns=['Fault'])
y_train = df_train['Fault']

#---------------------------------------------
# Step 0: Filtering Labels
CTS_faults = ['CTS', 'CTF', 'CTPID', 'N']

# Retain labels in Training Data
train_indices = np.isin(y_train, CTS_faults)
X_CTS_train = X_train[train_indices]
y_CTS_train = y_train[train_indices]
#---------------------------------------------

# Step1: Binary Classification Data Preparation
target_faults = ["CTS"]
y_train_CTS = np.where(np.isin(y_CTS_train, target_faults), 1, 0)

# Step2: Specific Sub-Features Set (CTS: 25 Features)
CTS_features = ['CT_SW_TEMP_1', 'CDWL_Error', 'CT_Error_1', 'CT_FLOW_1', 'CHL_SWCD_TEMP_1', 
                'CT_TotalFLOW', 'CDWL_DeltaFLOW', 'TWV_CTRL', 'CDWL_PM_POW_1', 'CDWL_POW', 
                'CT_Eff_1', 'CDWL_DeltaTEMP', 'OA_TEMP_WB', 'CDWL_SW_TEMP', 'CDWL_Load', 
                'CT_Load_1', 'CT_FAN_SPD_1', 'CT_ApproachTEMP_1', 'PLANT_POW_1', 'CT_DeltaTEMP_1', 
                'CT_FAN_SPD_CTRL_1', 'OA_TEMP', 'CT_Dist_1', 'CT_POW_1', 'CT_RW_TEMP_1']

X_train_CTS = X_CTS_train[CTS_features]

# Scaling Features
CTS_scaler = StandardScaler()
X_train_CTS_scaled = CTS_scaler.fit_transform(X_train_CTS)

# Step3: SMOTE for Training Dataset
smote = SMOTE(random_state=42)
X_train_CTS_resampled, y_train_CTS_resampled = smote.fit_resample(X_train_CTS_scaled, y_train_CTS)

# Step4: Model Building with Hyperparameters 
CTS_paras = {'iterations': 1250, 'l2_leaf_reg': 1, 'learning_rate': 0.05}
model_CTS = CatBoostClassifier(random_state=42, verbose=0, thread_count=-1, **CTS_paras)
model_CTS.fit(X_train_CTS_resampled, y_train_CTS_resampled)

<catboost.core.CatBoostClassifier at 0x3014378b0>

### Model_CTF: CTF-vs-Rest

In [9]:
# Prepare Data (Training and Testing)
df_train = train_df.copy()
X_train = df_train.drop(columns=['Fault'])
y_train = df_train['Fault']

#---------------------------------------------
# Step 0: Filtering Labels
CTF_faults = ['CTF', 'CTPID', 'N']

# Retain labels in Training Data
train_indices = np.isin(y_train, CTF_faults)
X_CTF_train = X_train[train_indices]
y_CTF_train = y_train[train_indices]
#---------------------------------------------

# Step1: Binary Classification Data Preparation
target_faults = ["CTF"]
y_train_CTF = np.where(np.isin(y_CTF_train, target_faults), 1, 0)

# Step2: Specific Sub-Features Set (CTF: 12 Features)
CTF_features = ['TWV_CTRL', 'CDWL_POW', 'CT_Error_1', 'CT_POW_1', 'OA_TEMP_WB', 
                'CT_SW_TEMP_1', 'CDWL_PM_POW_1', 'CDWL_Error', 'CHL_SWCD_TEMP_1', 'CDWL_DeltaFLOW', 
                'CT_Eff_1', 'CT_Eff_2']

X_train_CTF = X_CTF_train[CTF_features]

# Scaling Features
CTF_scaler = StandardScaler()
X_train_CTF_scaled = CTF_scaler.fit_transform(X_train_CTF)

# Step3: SMOTE for Training Dataset
smote = SMOTE(random_state=42)
X_train_CTF_resampled, y_train_CTF_resampled = smote.fit_resample(X_train_CTF_scaled, y_train_CTF)

# Step4: Model Building with Hyperparameters 
CTF_paras = {'learning_rate': 0.4, 'n_estimators': 200, 'max_depth': 8, 'reg_alpha': 0.1, 'reg_lambda': 1.2, 'gamma': 0.1}
model_CTF = XGBClassifier(random_state=42, n_jobs=-1, **CTF_paras)
model_CTF.fit(X_train_CTF_resampled, y_train_CTF_resampled)

### Model_CTPID: CTPID-vs-Rest

In [10]:
# Prepare Data (Training and Testing)
df_train = train_df.copy()
X_train = df_train.drop(columns=['Fault'])
y_train = df_train['Fault']
#---------------------------------------------
# Step 0: Filtering Labels
CTPID_faults = ['CTPID', 'N']

# Retain labels in Training Data
train_indices = np.isin(y_train, CTPID_faults)
X_CTPID_train = X_train[train_indices]
y_CTPID_train = y_train[train_indices]
#---------------------------------------------

# Step1: Binary Classification Data Preparation
target_faults = ["CTPID"]
y_train_CTPID = np.where(np.isin(y_CTPID_train, target_faults), 1, 0)

# Step2: Specific Sub-Features Set (CTPID: 17 Features)
CTPID_features = ['CT_Error_1', 'CDWL_Error', 'CT_FAN_SPD_CTRL_1', 'CT_POW_1', 'CT_FAN_SPD_1', 
                  'CWL_SEC_SW_TEMP', 'CWL_SEC_RW_TEMP', 'CT_Eff_1', 'CWL_SEC_CW_FLOW', 'CT_Dist_1', 
                  'Relative_Humidity', 'CT_ApproachTEMP_1', 'CWL_SEC_PM_SPD_1', 'CT_Load_1', 'CWL_SEC_PM_POW_1', 
                  'OA_TEMP', 'CT_SW_TEMP_1']
X_train_CTPID = X_CTPID_train[CTPID_features]

# Scaling Features
CTPID_scaler = StandardScaler()
X_train_CTPID_scaled = CTPID_scaler.fit_transform(X_train_CTPID)

# Step3: SMOTE for Training Dataset
smote = SMOTE(random_state=42)
X_train_CTPID_resampled, y_train_CTPID_resampled = smote.fit_resample(X_train_CTPID_scaled, y_train_CTPID)

# Step4: Model Building with Hyperparameters 
CTPID_paras = {'iterations': 750, 'subsample': 0.85, 'learning_rate': 0.05}
model_CTPID = CatBoostClassifier(random_state=42, verbose=0, thread_count=-1, **CTPID_paras)
model_CTPID.fit(X_train_CTPID_resampled, y_train_CTPID_resampled)

<catboost.core.CatBoostClassifier at 0x301355cc0>

# Step5: Custom Hierarchical Classifier

In [11]:
class HierarchicalClassifier:
    def __init__(self, hierarchical_dict, class_order):
        self.hierarchical_dict = hierarchical_dict
        self.class_order = class_order

    def predict(self, X_test):
        # Step 1: Initialize all samples as "N"
        num_samples = X_test.shape[0]
        final_pred = ['N'] * num_samples

        # Step 2: Predict BPX vs. N
        bpx_features = self.hierarchical_dict['BPX']['features'] # Get the features list
        bpx_scaler = self.hierarchical_dict['BPX']['scaler'] # Get the pre-trained scaler
        X_test_BPX_scaled = bpx_scaler.transform(X_test[bpx_features]) # Scale for the selected features
        proba_BPX = self.hierarchical_dict['BPX']['model'].predict_proba(X_test_BPX_scaled)[:, 1]
        bpx_threshold = self.hierarchical_dict['BPX']['threshold']
        
        for i in range(num_samples):
            if proba_BPX[i] >= bpx_threshold:
                final_pred[i] = 'BPX'

        # Step 3: Predict "BPS" or "BPL" from "BPX" samples
        bpx_indices = [i for i, pred in enumerate(final_pred) if pred == 'BPX']
        if bpx_indices:
            bpsl_features = self.hierarchical_dict['BPSL']['features'] # Get the features list
            bpsl_scaler = self.hierarchical_dict['BPSL']['scaler'] # Get the pre-trained scaler
            X_test_BPSL_scaled = bpsl_scaler.transform(X_test.iloc[bpx_indices][bpsl_features]) # Scale for the selected features
            proba_BPSL = self.hierarchical_dict['BPSL']['model'].predict_proba(X_test_BPSL_scaled)
            bpsl_threshold = self.hierarchical_dict['BPSL']['threshold']
            
            for idx, sample_idx in enumerate(bpx_indices):
                if proba_BPSL[idx, 1] >= bpsl_threshold:
                    final_pred[sample_idx] = 'BPL'
                else:
                    final_pred[sample_idx] = 'BPS'

        # Step 4: Predict the remaining "N" by different models sequentially
        n_indices = [i for i, pred in enumerate(final_pred) if pred == 'N']
        for cls in self.class_order:
            cls_features = self.hierarchical_dict[cls]['features'] # Get the features list
            cls_scaler = self.hierarchical_dict[cls]['scaler'] # Get the pre-trained scaler
            X_test_cls_scaled = cls_scaler.transform(X_test.iloc[n_indices][cls_features]) # Scale for the selected features
            proba = self.hierarchical_dict[cls]['model'].predict_proba(X_test_cls_scaled)[:, 1]
            threshold = self.hierarchical_dict[cls]['threshold']

            for idx, sample_idx in enumerate(n_indices):
                if proba[idx] >= threshold:
                    final_pred[sample_idx] = cls

            n_indices = [i for i in n_indices if final_pred[i] == 'N']

        return final_pred


In [12]:
# Prepare Data (Testing)

df_test = test_df.copy()
X_test = df_test.drop(columns=['Fault'])
y_test = df_test['Fault']

# Initialize and fit the encoder
encoder = LabelEncoder()
encoder.fit(y_train)
y_test_encoded = encoder.transform(y_test)

# Classes order for Hierarchical Classification
class_order = ['CHS', 'CTS', 'CPP','CTF', 'CTPID']

# Features Scaling
# TBC

# Dictionary for models and their sub-features dataset
hierarchical_classifier_dict = {
    'BPX': {'model': model_BPX, 'features': BPX_features, 'scaler': BPX_scaler, 'threshold': 0.5},
    'BPSL': {'model': model_BPSL, 'features': BPSL_features, 'scaler': BPSL_scaler, 'threshold': 0.5},
    'CHS': {'model': model_CHS, 'features': CHS_features, 'scaler': CHS_scaler, 'threshold': 0.5},
    'CTS': {'model': model_CTS, 'features': CTS_features, 'scaler': CTS_scaler, 'threshold': 0.5},
    'CPP': {'model': model_CPP, 'features': CPP_features, 'scaler': CPP_scaler, 'threshold': 0.5},
    'CTF': {'model': model_CTF, 'features': CTF_features, 'scaler': CTF_scaler, 'threshold': 0.5},
    'CTPID': {'model': model_CTPID, 'features': CTPID_features, 'scaler': CTPID_scaler, 'threshold': 0.5},
}

# Apply Hierarchical Classifier
hierarchical_classifier = HierarchicalClassifier(hierarchical_classifier_dict, class_order)
final_pred_labels = hierarchical_classifier.predict(X_test)

# Decode labels
y_test_decoded = encoder.inverse_transform(y_test_encoded)

# Confusion Matrix & Classification Report
conf_matrix = confusion_matrix(y_test_decoded, final_pred_labels, labels=encoder.classes_)
conf_matrix_df = pd.DataFrame(conf_matrix, index=encoder.classes_, columns=encoder.classes_)
print("Confusion Matrix (with class labels):")
print(conf_matrix_df)

# Generate the classification report
print("Classification Report:")
print(classification_report(y_test_decoded, final_pred_labels, target_names=encoder.classes_))

Confusion Matrix (with class labels):
        BPL   BPS   CHS   CPP   CTF  CTPID   CTS     N
BPL    1112   519     0     1     0     69     0    51
BPS     209  1542     0     0     0      0     1     0
CHS       0     0  1746     0     2      0     1     3
CPP       0     0     0  1737     2      1     3     9
CTF       0     0     0     1  1534     10     7   200
CTPID    12     1     1     2    28   1224     0   484
CTS       0     0     0     0     5      1  1742     4
N         4     1     0     5   280    276     4  1182
Classification Report:
              precision    recall  f1-score   support

         BPL       0.83      0.63      0.72      1752
         BPS       0.75      0.88      0.81      1752
         CHS       1.00      1.00      1.00      1752
         CPP       0.99      0.99      0.99      1752
         CTF       0.83      0.88      0.85      1752
       CTPID       0.77      0.70      0.73      1752
         CTS       0.99      0.99      0.99      1752
           

### Reports

In [13]:
# Generate the classification report
report = classification_report(y_test_decoded, final_pred_labels, output_dict=True)
report_df = pd.DataFrame(report).transpose()

# Generate the confusion matrix
conf_matrix = confusion_matrix(y_test_decoded, final_pred_labels)

# Convert confusion matrix to DataFrame for saving to Excel
conf_matrix_df = pd.DataFrame(conf_matrix, 
                              index=encoder.classes_, 
                              columns=encoder.classes_)

# Save the classification report and confusion matrix to an Excel file without using append mode
with pd.ExcelWriter("classification_report_Hierarchical (Hyper).xlsx") as writer:
    report_df.to_excel(writer, sheet_name="Classification Report")
    conf_matrix_df.to_excel(writer, sheet_name="Confusion Matrix")

print("Classification report and confusion matrix have been saved to classification_report_Hierarchical (Hyper).xlsx")

Classification report and confusion matrix have been saved to classification_report_Hierarchical (Hyper).xlsx
