In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
from sklearn.preprocessing import StandardScaler, OneHotEncoder, LabelEncoder
from sklearn.compose import ColumnTransformer
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier, StackingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras import Input
from scikeras.wrappers import KerasClassifier  # <-- modern wrapper

# -------------------------------
# 1. Load dataset
# -------------------------------
data = pd.read_csv(
    r"E:\Proj\FireExtinguisher\acoustic.csv"
)

# Encode target if not numeric
if data["STATUS"].dtype == "object":
    le = LabelEncoder()
    data["STATUS"] = le.fit_transform(data["STATUS"])

# Features & target
X = data.drop("STATUS", axis=1)
y = data["STATUS"]

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# -------------------------------
# 2. Preprocessing
# -------------------------------
numerical_features = ["SIZE", "DISTANCE", "DECIBEL", "AIRFLOW", "FREQUENCY"]
categorical_features = ["FUEL"]

preprocessor = ColumnTransformer(
    [
        ("num", StandardScaler(), numerical_features),
        ("cat", OneHotEncoder(), categorical_features),
    ]
)

X_train_processed = preprocessor.fit_transform(X_train)
X_test_processed = preprocessor.transform(X_test)

# -------------------------------
# 3. ANN/DNN creators
# -------------------------------
def create_ann():
    model = Sequential()
    model.add(Input(shape=(X_train_processed.shape[1],)))  # modern input layer
    model.add(Dense(64, activation="relu"))
    model.add(Dense(32, activation="relu"))
    model.add(Dense(1, activation="sigmoid"))
    model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])
    return model

def create_dnn():
    model = Sequential()
    model.add(Input(shape=(X_train_processed.shape[1],)))  # modern input layer
    model.add(Dense(128, activation="relu"))
    model.add(Dense(64, activation="relu"))
    model.add(Dense(32, activation="relu"))
    model.add(Dense(1, activation="sigmoid"))
    model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])
    return model
# -------------------------------
# 4. Classical Models with tuning
# -------------------------------
# k-NN
param_grid_knn = {
    "n_neighbors": [5, 7, 9],
    "weights": ["uniform", "distance"],
    "metric": ["euclidean"],
}
grid_knn = GridSearchCV(KNeighborsClassifier(), param_grid_knn, cv=5)
grid_knn.fit(X_train_processed, y_train)
best_knn = grid_knn.best_estimator_

# Random Forest
param_grid_rf = {
    "n_estimators": [200],
    "max_depth": [20],
    "min_samples_split": [5],
}
grid_rf = GridSearchCV(RandomForestClassifier(random_state=42), param_grid_rf, cv=5)
grid_rf.fit(X_train_processed, y_train)
best_rf = grid_rf.best_estimator_

# -------------------------------
# 5. ANN & DNN Wrappers
# -------------------------------
ann = KerasClassifier(
    model=create_ann, epochs=50, batch_size=32, verbose=0, random_state=42
)
dnn = KerasClassifier(
    model=create_dnn, epochs=50, batch_size=32, verbose=0, random_state=42
)

# -------------------------------
# 6. Stacking (includes ANN & DNN)
# -------------------------------
stacking = StackingClassifier(
    estimators=[
        ("knn", best_knn),
        ("rf", best_rf),
        ("ann", ann),
        ("dnn", dnn),
    ],
    final_estimator=LogisticRegression(),
    passthrough=False
)

# -------------------------------
# 7. Evaluation
# -------------------------------
models = {
    "k-NN": best_knn,
    "Random Forest": best_rf,
    "ANN": ann,
    "DNN": dnn,
    "Stacking": stacking,
}


In [2]:
results = {}
for name, model in models.items():
    scores = cross_val_score(
        model, X_train_processed, y_train, cv=3, scoring="accuracy"
    )  # reduce CV for speed
    results[name] = scores.mean()
    print(f"{name} CV Accuracy: {scores.mean():.4f}")


k-NN CV Accuracy: 0.9600
Random Forest CV Accuracy: 0.9644
ANN CV Accuracy: 0.9553
DNN CV Accuracy: 0.9637
Stacking CV Accuracy: 0.9716


In [3]:
for name, model in models.items():
    model.fit(X_train_processed, y_train)
    y_pred = model.predict(X_test_processed)

    # Convert keras float outputs to binary labels
    if isinstance(model, KerasClassifier):
        y_pred = (np.array(y_pred) > 0.5).astype(int).ravel()

    accuracy = accuracy_score(y_test, y_pred)
    print(f"\n{name} Test Accuracy: {accuracy:.4f}")
    print(classification_report(y_test, y_pred))


k-NN Test Accuracy: 0.9616
              precision    recall  f1-score   support

           0       0.96      0.96      0.96      1752
           1       0.96      0.96      0.96      1737

    accuracy                           0.96      3489
   macro avg       0.96      0.96      0.96      3489
weighted avg       0.96      0.96      0.96      3489


Random Forest Test Accuracy: 0.9693
              precision    recall  f1-score   support

           0       0.97      0.97      0.97      1752
           1       0.97      0.97      0.97      1737

    accuracy                           0.97      3489
   macro avg       0.97      0.97      0.97      3489
weighted avg       0.97      0.97      0.97      3489


ANN Test Accuracy: 0.9602
              precision    recall  f1-score   support

           0       0.96      0.96      0.96      1752
           1       0.96      0.96      0.96      1737

    accuracy                           0.96      3489
   macro avg       0.96      0.96   