## ANN Observations

- ANN captures non-linear relationships
- Performance comparable to ensemble methods on tabular data
- Requires careful preprocessing and tuning
- Increased complexity without guaranteed superiority


In [7]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer

from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense
from tensorflow.keras.optimizers import Adam


In [8]:
df = pd.read_csv(r"D:\FailureSense_MLProj\failuresense\data\raw\ai4i2020.csv")

prepre X and Y

In [9]:
DROP_COLUMNS = ["UDI", "TWF", "HDF", "PWF", "OSF", "RNF"]
TARGET = "Machine failure"

X = df.drop(columns=DROP_COLUMNS + [TARGET])
y = df[TARGET]


train - test split

In [10]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)


preprocessing (same as ANN)

In [11]:
numeric_features = [
    "Air temperature [K]",
    "Process temperature [K]",
    "Rotational speed [rpm]",
    "Torque [Nm]",
    "Tool wear [min]"
]

categorical_features = ["Type"]

preprocessor = ColumnTransformer(
    transformers=[
        ("num", StandardScaler(), numeric_features),
        ("cat", OneHotEncoder(sparse_output=False, handle_unknown="ignore"), categorical_features)
    ]
)

X_train_processed = preprocessor.fit_transform(X_train)
X_test_processed = preprocessor.transform(X_test)


reshape for cnn

CNN expects 3D input:
(samples, timesteps, channels)

We treat features as 1D sequences:

In [12]:
X_train_cnn = X_train_processed.reshape(
    X_train_processed.shape[0],
    X_train_processed.shape[1],
    1
)

X_test_cnn = X_test_processed.reshape(
    X_test_processed.shape[0],
    X_test_processed.shape[1],
    1
)


build 1d cnn model

In [13]:
cnn_model = Sequential([
    Conv1D(filters=32, kernel_size=3, activation="relu",
           input_shape=(X_train_cnn.shape[1], 1)),
    MaxPooling1D(pool_size=2),
    Flatten(),
    Dense(16, activation="relu"),
    Dense(1, activation="sigmoid")
])


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


compile cnn

In [14]:
cnn_model.compile(
    optimizer=Adam(learning_rate=0.001),
    loss="binary_crossentropy",
    metrics=["accuracy"]
)


train cnn

In [15]:
history = cnn_model.fit(
    X_train_cnn,
    y_train,
    epochs=15,
    batch_size=32,
    validation_split=0.2,
    verbose=1
)


Epoch 1/15
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 12ms/step - accuracy: 0.9578 - loss: 0.2699 - val_accuracy: 0.9656 - val_loss: 0.1553
Epoch 2/15
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 11ms/step - accuracy: 0.9663 - loss: 0.1311 - val_accuracy: 0.9656 - val_loss: 0.1158
Epoch 3/15
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step - accuracy: 0.9683 - loss: 0.1053 - val_accuracy: 0.9675 - val_loss: 0.1056
Epoch 4/15
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 8ms/step - accuracy: 0.9702 - loss: 0.0973 - val_accuracy: 0.9700 - val_loss: 0.1013
Epoch 5/15
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 10ms/step - accuracy: 0.9720 - loss: 0.0922 - val_accuracy: 0.9675 - val_loss: 0.0960
Epoch 6/15
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 10ms/step - accuracy: 0.9731 - loss: 0.0894 - val_accuracy: 0.9681 - val_loss: 0.0944
Epoch 7/15
[1m200/200[

evaluate cnn

In [16]:
y_prob_cnn = cnn_model.predict(X_test_cnn).ravel()
y_pred_cnn = (y_prob_cnn >= 0.5).astype(int)


[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step


In [17]:
confusion_matrix(y_test, y_pred_cnn)


array([[1929,    3],
       [  58,   10]])

In [20]:
print(classification_report(y_test, y_pred_cnn))


              precision    recall  f1-score   support

           0       0.97      1.00      0.98      1932
           1       0.77      0.15      0.25        68

    accuracy                           0.97      2000
   macro avg       0.87      0.57      0.62      2000
weighted avg       0.96      0.97      0.96      2000



In [19]:
roc_auc_score(y_test, y_prob_cnn)


0.9478062964316161

### CNN Observations

- CNN was applied using a 1D convolution over feature sequences
- Designed primarily for spatial/temporal data, CNN offers limited advantage here
- Performance comparable to ANN but not superior to ensemble models
- Demonstrates experimental adaptation rather than preferred production approach
