In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

In [2]:
X = pd.read_csv(r"C:\Users\ayank_hyn0aug\OneDrive\Desktop\equipment_monitoring\X_resampled.csv")
y = pd.read_csv(r"C:\Users\ayank_hyn0aug\OneDrive\Desktop\equipment_monitoring\y_resampled.csv").squeeze()

In [3]:
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42, stratify=y_temp)

In [4]:
model = GaussianNB()
param_grid = {'var_smoothing': [1e-9, 1e-8, 1e-7, 1e-6, 1e-5]}
grid_search = GridSearchCV(model, param_grid, cv=5, scoring='accuracy', n_jobs=-1)

In [5]:
grid_search.fit(X_train, y_train)
print(f'Best Parameters: {grid_search.best_params_}')

Best Parameters: {'var_smoothing': 1e-07}


In [12]:
train_predictions = grid_search.predict(X_train)
print('Training Set Results:')
print(confusion_matrix(y_train, train_predictions))
print(classification_report(y_train, train_predictions))
print(f'Training Accuracy: {accuracy_score(y_train, train_predictions)}')

Training Set Results:
[[4986  427]
 [ 406 5008]]
              precision    recall  f1-score   support

           0       0.92      0.92      0.92      5413
           1       0.92      0.93      0.92      5414

    accuracy                           0.92     10827
   macro avg       0.92      0.92      0.92     10827
weighted avg       0.92      0.92      0.92     10827

Training Accuracy: 0.9230627135864043


In [13]:
val_predictions = grid_search.predict(X_val)
print('Validation Set Results:')
print(confusion_matrix(y_val, val_predictions))
print(classification_report(y_val, val_predictions))
print(f'Validation Accuracy: {accuracy_score(y_val, val_predictions)}')

Validation Set Results:
[[616  61]
 [ 51 625]]
              precision    recall  f1-score   support

           0       0.92      0.91      0.92       677
           1       0.91      0.92      0.92       676

    accuracy                           0.92      1353
   macro avg       0.92      0.92      0.92      1353
weighted avg       0.92      0.92      0.92      1353

Validation Accuracy: 0.9172209903917221


In [14]:
X_test.head()

Unnamed: 0,temperature,pressure,vibration,humidity,equipment_Compressor,equipment_Pump,equipment_Turbine,location_Atlanta,location_Chicago,location_Houston,location_New York,location_San Francisco
8961,80.237018,25.929956,0.819636,58.901135,False,True,True,False,False,True,False,True
3890,67.382417,29.275898,2.351635,57.656303,False,False,True,True,False,False,False,False
4350,54.193961,37.252667,1.959402,44.377008,False,False,True,False,False,True,False,False
4772,75.055579,39.947699,2.280511,42.004983,True,False,False,False,True,False,False,False
11476,54.831952,34.183347,2.061994,70.947928,False,True,True,False,False,False,False,True


In [15]:
import joblib
model_path=r"C:\Users\ayank_hyn0aug\OneDrive\Desktop\equipment_monitoring\naive_bayes_model.pkl"
joblib.dump(grid_search.best_estimator_,model_path)

# Load the model and run on sample data
loaded_model = joblib.load(model_path)
sample_data = X_test.sample(10, random_state=101)
sample_predictions = loaded_model.predict(sample_data)
print('Actual Data :')
print(y_test.loc[sample_data.index].values)
print('Sample Data Predictions:')
print(sample_predictions)

Actual Data :
[0 1 0 0 1 0 0 1 1 0]
Sample Data Predictions:
[0 1 0 0 1 0 0 1 1 0]


In [16]:
best_model = grid_search.best_estimator_
y_test_pred = best_model.predict(X_test)
print('Test Set Evaluation:')
print(classification_report(y_test, y_test_pred))
print(confusion_matrix(y_test, y_test_pred))
print('Test Accuracy:', accuracy_score(y_test, y_test_pred))

Test Set Evaluation:
              precision    recall  f1-score   support

           0       0.92      0.89      0.91       677
           1       0.90      0.92      0.91       677

    accuracy                           0.91      1354
   macro avg       0.91      0.91      0.91      1354
weighted avg       0.91      0.91      0.91      1354

[[605  72]
 [ 52 625]]
Test Accuracy: 0.9084194977843427
