In [6]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import BaggingClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

In [7]:
X_resampled = pd.read_csv('X_oversampled.csv').drop(columns=['Unnamed: 0'], errors='ignore')
y_resampled = pd.read_csv('y_resampled.csv')
X_resampled.head()


Unnamed: 0,temperature,pressure,vibration,humidity,equipment_Compressor,equipment_Pump,equipment_Turbine,location_Atlanta,location_Chicago,location_Houston,location_New York,location_San Francisco
0,58.18018,25.029278,0.606516,45.694907,False,False,True,True,False,False,False,False
1,75.740712,22.954018,2.338095,41.867407,True,False,False,False,True,False,False,False
2,71.358594,27.27683,1.389198,58.954409,False,False,True,False,False,False,False,True
3,71.616985,32.242921,1.77069,40.565138,False,True,False,True,False,False,False,False
4,66.506832,45.197471,0.345398,43.253795,False,True,False,False,False,False,True,False


In [15]:
X_train, X_temp, y_train, y_temp = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

In [16]:
bagging_model_knn = BaggingClassifier(estimator=KNeighborsClassifier(n_neighbors=5), n_estimators=50, random_state=42)


In [17]:
bagging_model_knn.fit(X_train, y_train.values.ravel())

In [21]:
train_predictions = bagging_model_knn.predict(X_train)
train_conf_matrix = confusion_matrix(y_train, train_predictions)
train_class_report = classification_report(y_train, train_predictions)
train_accuracy = accuracy_score(y_train, train_predictions)

print('Training Confusion Matrix:\n', train_conf_matrix)
print('\nTraining Classification Report:\n', train_class_report)
print(f'Training Accuracy: {train_accuracy * 100:.2f}%')

Training Confusion Matrix:
 [[4955  451]
 [   9 5412]]

Training Classification Report:
               precision    recall  f1-score   support

           0       1.00      0.92      0.96      5406
           1       0.92      1.00      0.96      5421

    accuracy                           0.96     10827
   macro avg       0.96      0.96      0.96     10827
weighted avg       0.96      0.96      0.96     10827

Training Accuracy: 95.75%


In [19]:
val_predictions = bagging_model_knn.predict(X_val)
val_conf_matrix = confusion_matrix(y_val, val_predictions)
val_class_report = classification_report(y_val, val_predictions)
val_accuracy = accuracy_score(y_val, val_predictions)

print('Validation Confusion Matrix:\n', val_conf_matrix)
print('\nValidation Classification Report:\n', val_class_report)
print(f'Validation Accuracy: {val_accuracy * 100:.2f}%')

Validation Confusion Matrix:
 [[633  72]
 [  4 644]]

Validation Classification Report:
               precision    recall  f1-score   support

           0       0.99      0.90      0.94       705
           1       0.90      0.99      0.94       648

    accuracy                           0.94      1353
   macro avg       0.95      0.95      0.94      1353
weighted avg       0.95      0.94      0.94      1353

Validation Accuracy: 94.38%


In [20]:
# Evaluate on test set
test_predictions = bagging_model_knn.predict(X_test)
test_conf_matrix = confusion_matrix(y_test, test_predictions)
test_class_report = classification_report(y_test, test_predictions)
test_accuracy = accuracy_score(y_test, test_predictions)

print('Test Confusion Matrix:\n', test_conf_matrix)
print('\nTest Classification Report:\n', test_class_report)
print(f'Test Accuracy: {test_accuracy * 100:.2f}%')

Test Confusion Matrix:
 [[580  76]
 [  5 693]]

Test Classification Report:
               precision    recall  f1-score   support

           0       0.99      0.88      0.93       656
           1       0.90      0.99      0.94       698

    accuracy                           0.94      1354
   macro avg       0.95      0.94      0.94      1354
weighted avg       0.94      0.94      0.94      1354

Test Accuracy: 94.02%
