In [6]:
import pandas as pd
import numpy as np
from Gaussian import Gaussian
from KNN import KNN
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score, classification_report
import pickle

In [7]:
data = pd.read_csv("predictive_maintenance_raw_data.csv")
data.head()

Unnamed: 0,UDI,Product ID,Type,Air temperature [K],Process temperature [K],Rotational speed [rpm],Torque [Nm],Tool wear [min],Target,Failure Type
0,1,M14860,M,298.1,308.6,1551,42.8,0,0,No Failure
1,2,L47181,L,298.2,308.7,1408,46.3,3,0,No Failure
2,3,L47182,L,298.1,308.5,1498,49.4,5,0,No Failure
3,4,L47183,L,298.2,308.6,1433,39.5,7,0,No Failure
4,5,L47184,L,298.2,308.7,1408,40.0,9,0,No Failure


In [8]:
target = data['Failure Type']
label_encoder = LabelEncoder()
target_encoded = label_encoder.fit_transform(target)

label_mapping = dict(zip(label_encoder.classes_, label_encoder.transform(label_encoder.classes_)))
print("Label Mapping:", label_mapping)

Label Mapping: {'Heat Dissipation Failure': np.int64(0), 'No Failure': np.int64(1), 'Overstrain Failure': np.int64(2), 'Power Failure': np.int64(3), 'Random Failures': np.int64(4), 'Tool Wear Failure': np.int64(5)}


In [9]:
X_raw = data.drop(columns=['Failure Type'])

with open('preprocessing_pipeline.pkl', 'rb') as f:
    preprocessor = pickle.load(f)

X_processed = preprocessor.transform(X_raw)

In [10]:
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=31)

knn_accuracies = []
gaussian_accuracies = []

print("========== Stratified K-Fold Evaluation ==========")

for fold, (train_index, test_index) in enumerate(skf.split(X_processed, target_encoded), 1):
    X_train, X_test = X_processed[train_index], X_processed[test_index]
    y_train, y_test = target_encoded[train_index], target_encoded[test_index]

    print(f"\n--- Fold {fold} ---")

    # KNN
    knn = KNN(data=np.column_stack((X_train, y_train)), n=5)
    knn_preds = [knn.predict(x) for x in X_test]
    knn_acc = accuracy_score(y_test, knn_preds)
    knn_accuracies.append(knn_acc)
    
    knn_report = classification_report(y_test, knn_preds, output_dict=True, zero_division=0)
    print(f"KNN Accuracy:  {knn_acc:.4f}")
    print(f"KNN Precision: {knn_report['macro avg']['precision']:.4f}")
    print(f"KNN Recall:    {knn_report['macro avg']['recall']:.4f}")
    print(f"KNN F1-Score:  {knn_report['macro avg']['f1-score']:.4f}")

    # Gaussian
    gnb = Gaussian()
    gnb.fit(X_train, y_train)
    gnb_probs = np.array([gnb.predict_proba(x) for x in X_test])
    gnb_preds = np.argmax(gnb_probs, axis=1)
    gnb_acc = accuracy_score(y_test, gnb_preds)
    gaussian_accuracies.append(gnb_acc)
    
    gnb_report = classification_report(y_test, gnb_preds, output_dict=True, zero_division=0)
    print(f"\nGaussian Accuracy:  {gnb_acc:.4f}")
    print(f"Gaussian Precision: {gnb_report['macro avg']['precision']:.4f}")
    print(f"Gaussian Recall:    {gnb_report['macro avg']['recall']:.4f}")
    print(f"Gaussian F1-Score:  {gnb_report['macro avg']['f1-score']:.4f}")

# Overall averages
print("\n========== Overall Averages ==========")
print(f"KNN Mean Accuracy:      {np.mean(knn_accuracies):.4f}")
print(f"Gaussian Mean Accuracy: {np.mean(gaussian_accuracies):.4f}")



--- Fold 1 ---
KNN Accuracy:  0.9725
KNN Precision: 0.5511
KNN Recall:    0.3378
KNN F1-Score:  0.3979

Gaussian Accuracy:  0.9430
Gaussian Precision: 0.2862
Gaussian Recall:    0.3972
Gaussian F1-Score:  0.3224

--- Fold 2 ---
KNN Accuracy:  0.9720
KNN Precision: 0.4236
KNN Recall:    0.2873
KNN F1-Score:  0.3296

Gaussian Accuracy:  0.9570
Gaussian Precision: 0.3470
Gaussian Recall:    0.4821
Gaussian F1-Score:  0.3955

--- Fold 3 ---
KNN Accuracy:  0.9700
KNN Precision: 0.4733
KNN Recall:    0.2998
KNN F1-Score:  0.3505

Gaussian Accuracy:  0.9495
Gaussian Precision: 0.3295
Gaussian Recall:    0.4571
Gaussian F1-Score:  0.3745

--- Fold 4 ---
KNN Accuracy:  0.9710
KNN Precision: 0.4647
KNN Recall:    0.3176
KNN F1-Score:  0.3633

Gaussian Accuracy:  0.9575
Gaussian Precision: 0.3543
Gaussian Recall:    0.4767
Gaussian F1-Score:  0.3961

--- Fold 5 ---
KNN Accuracy:  0.9740
KNN Precision: 0.5103
KNN Recall:    0.3662
KNN F1-Score:  0.4119

Gaussian Accuracy:  0.9500
Gaussian Precisi