In [1]:
import numpy as np
import pandas as pd

# Seed for reproducibility
np.random.seed(42)

# Number of samples
n_samples = 1000

# Generate synthetic sensor data
thermal_data = np.random.normal(loc=75, scale=10, size=n_samples)  # Mean temperature of 75°C, standard deviation of 10
vibrational_data = np.random.normal(loc=1.0, scale=0.5, size=n_samples)  # Mean vibration of 1.0, standard deviation of 0.5
acoustic_data = np.random.normal(loc=60, scale=5, size=n_samples)  # Mean acoustic level of 60 dB, standard deviation of 5

# Generate binary target variable: 0 for no failure, 1 for failure
# Failures are randomly assigned with a 20% probability
failure = np.random.binomial(1, 0.2, n_samples)

# Create a DataFrame
data = pd.DataFrame({
    'thermal': thermal_data,
    'vibrational': vibrational_data,
    'acoustic': acoustic_data,
    'failure': failure
})

# Display the first few rows of the dataset
print(data.head())


     thermal  vibrational   acoustic  failure
0  79.967142     1.699678  56.624109        1
1  73.617357     1.462317  59.277407        1
2  81.476885     1.029815  56.037900        0
3  90.230299     0.676532  58.460192        1
4  72.658466     1.349112  50.531927        0


In [2]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Split the data into features and target variable
X = data[['thermal', 'vibrational', 'acoustic']]
y = data['failure']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Standardize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


In [3]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix

# Initialize and train the Random Forest model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train_scaled, y_train)

# Make predictions
y_pred = model.predict(X_test_scaled)

# Evaluate the model
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))
print("\nClassification Report:")
print(classification_report(y_test, y_pred))


Confusion Matrix:
[[237   6]
 [ 56   1]]

Classification Report:
              precision    recall  f1-score   support

           0       0.81      0.98      0.88       243
           1       0.14      0.02      0.03        57

    accuracy                           0.79       300
   macro avg       0.48      0.50      0.46       300
weighted avg       0.68      0.79      0.72       300



In [4]:
import joblib

# Save the model
joblib.dump(model, 'predictive_maintenance_model.pkl')
joblib.dump(scaler, 'scaler.pkl')

# Load the model
loaded_model = joblib.load('predictive_maintenance_model.pkl')
loaded_scaler = joblib.load('scaler.pkl')

# Test the loaded model
y_pred_loaded = loaded_model.predict(X_test_scaled)
print("Loaded Model Confusion Matrix:")
print(confusion_matrix(y_test, y_pred_loaded))
print("\nLoaded Model Classification Report:")
print(classification_report(y_test, y_pred_loaded))


Loaded Model Confusion Matrix:
[[237   6]
 [ 56   1]]

Loaded Model Classification Report:
              precision    recall  f1-score   support

           0       0.81      0.98      0.88       243
           1       0.14      0.02      0.03        57

    accuracy                           0.79       300
   macro avg       0.48      0.50      0.46       300
weighted avg       0.68      0.79      0.72       300



In [5]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
import joblib


1. Logistic Regression

In [6]:
# Define the model and parameters
log_reg = LogisticRegression(max_iter=1000)
log_reg_params = {
    'C': [0.01, 0.1, 1, 10],
    'penalty': ['l2']
}

# Grid search
log_reg_grid = GridSearchCV(log_reg, log_reg_params, cv=5, scoring='accuracy')
log_reg_grid.fit(X_train_scaled, y_train)

# Best parameters and performance
print("Logistic Regression Best Parameters:", log_reg_grid.best_params_)
print("Logistic Regression Best Score:", log_reg_grid.best_score_)


Logistic Regression Best Parameters: {'C': 0.01, 'penalty': 'l2'}
Logistic Regression Best Score: 0.8028571428571428


In [7]:
# Define the model and parameters
decision_tree = DecisionTreeClassifier()
dt_params = {
    'criterion': ['gini', 'entropy'],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10]
}

# Grid search
dt_grid = GridSearchCV(decision_tree, dt_params, cv=5, scoring='accuracy')
dt_grid.fit(X_train_scaled, y_train)

# Best parameters and performance
print("Decision Tree Best Parameters:", dt_grid.best_params_)
print("Decision Tree Best Score:", dt_grid.best_score_)


Decision Tree Best Parameters: {'criterion': 'gini', 'max_depth': 10, 'min_samples_split': 2}
Decision Tree Best Score: 0.7428571428571429


In [8]:
# Define the model and parameters
random_forest = RandomForestClassifier()
rf_params = {
    'n_estimators': [50, 100, 200],
    'criterion': ['gini', 'entropy'],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10]
}

# Grid search
rf_grid = GridSearchCV(random_forest, rf_params, cv=5, scoring='accuracy')
rf_grid.fit(X_train_scaled, y_train)

# Best parameters and performance
print("Random Forest Best Parameters:", rf_grid.best_params_)
print("Random Forest Best Score:", rf_grid.best_score_)


Random Forest Best Parameters: {'criterion': 'entropy', 'max_depth': 10, 'min_samples_split': 10, 'n_estimators': 100}
Random Forest Best Score: 0.7985714285714286


In [9]:
# Define the model and parameters
gbm = GradientBoostingClassifier()
gbm_params = {
    'n_estimators': [50, 100, 200],
    'learning_rate': [0.01, 0.1, 0.2],
    'max_depth': [3, 5, 7]
}

# Grid search
gbm_grid = GridSearchCV(gbm, gbm_params, cv=5, scoring='accuracy')
gbm_grid.fit(X_train_scaled, y_train)

# Best parameters and performance
print("GBM Best Parameters:", gbm_grid.best_params_)
print("GBM Best Score:", gbm_grid.best_score_)


GBM Best Parameters: {'learning_rate': 0.01, 'max_depth': 3, 'n_estimators': 50}
GBM Best Score: 0.8028571428571428


In [10]:
# Define the model and parameters
svm = SVC()
svm_params = {
    'C': [0.01, 0.1, 1, 10],
    'kernel': ['linear', 'rbf'],
    'gamma': ['scale', 'auto']
}

# Grid search
svm_grid = GridSearchCV(svm, svm_params, cv=5, scoring='accuracy')
svm_grid.fit(X_train_scaled, y_train)

# Best parameters and performance
print("SVM Best Parameters:", svm_grid.best_params_)
print("SVM Best Score:", svm_grid.best_score_)


SVM Best Parameters: {'C': 0.01, 'gamma': 'scale', 'kernel': 'linear'}
SVM Best Score: 0.8028571428571428


In [11]:
# Define the model and parameters
mlp = MLPClassifier(max_iter=1000)
mlp_params = {
    'hidden_layer_sizes': [(50,), (100,), (50, 50)],
    'activation': ['tanh', 'relu'],
    'solver': ['adam', 'sgd']
}

# Grid search
mlp_grid = GridSearchCV(mlp, mlp_params, cv=5, scoring='accuracy')
mlp_grid.fit(X_train_scaled, y_train)

# Best parameters and performance
print("MLP Best Parameters:", mlp_grid.best_params_)
print("MLP Best Score:", mlp_grid.best_score_)




MLP Best Parameters: {'activation': 'tanh', 'hidden_layer_sizes': (50,), 'solver': 'adam'}
MLP Best Score: 0.8028571428571428


In [12]:
# Evaluate Logistic Regression
y_pred_log_reg = log_reg_grid.best_estimator_.predict(X_test_scaled)
print("Logistic Regression Test Performance:")
print(confusion_matrix(y_test, y_pred_log_reg))
print(classification_report(y_test, y_pred_log_reg))

# Evaluate Decision Tree
y_pred_dt = dt_grid.best_estimator_.predict(X_test_scaled)
print("Decision Tree Test Performance:")
print(confusion_matrix(y_test, y_pred_dt))
print(classification_report(y_test, y_pred_dt))

# Evaluate Random Forest
y_pred_rf = rf_grid.best_estimator_.predict(X_test_scaled)
print("Random Forest Test Performance:")
print(confusion_matrix(y_test, y_pred_rf))
print(classification_report(y_test, y_pred_rf))

# Evaluate GBM
y_pred_gbm = gbm_grid.best_estimator_.predict(X_test_scaled)
print("GBM Test Performance:")
print(confusion_matrix(y_test, y_pred_gbm))
print(classification_report(y_test, y_pred_gbm))

# Evaluate SVM
y_pred_svm = svm_grid.best_estimator_.predict(X_test_scaled)
print("SVM Test Performance:")
print(confusion_matrix(y_test, y_pred_svm))
print(classification_report(y_test, y_pred_svm))

# Evaluate MLP
y_pred_mlp = mlp_grid.best_estimator_.predict(X_test_scaled)
print("MLP Test Performance:")
print(confusion_matrix(y_test, y_pred_mlp))
print(classification_report(y_test, y_pred_mlp))


Logistic Regression Test Performance:
[[243   0]
 [ 57   0]]
              precision    recall  f1-score   support

           0       0.81      1.00      0.90       243
           1       0.00      0.00      0.00        57

    accuracy                           0.81       300
   macro avg       0.41      0.50      0.45       300
weighted avg       0.66      0.81      0.72       300

Decision Tree Test Performance:
[[223  20]
 [ 51   6]]
              precision    recall  f1-score   support

           0       0.81      0.92      0.86       243
           1       0.23      0.11      0.14        57

    accuracy                           0.76       300
   macro avg       0.52      0.51      0.50       300
weighted avg       0.70      0.76      0.73       300

Random Forest Test Performance:
[[242   1]
 [ 57   0]]
              precision    recall  f1-score   support

           0       0.81      1.00      0.89       243
           1       0.00      0.00      0.00        57

    accurac

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [13]:
from sklearn.metrics import classification_report, confusion_matrix

# Define a dictionary to store performance metrics
performance = {}

# Logistic Regression
y_pred_log_reg = log_reg_grid.best_estimator_.predict(X_test_scaled)
conf_matrix_log_reg = confusion_matrix(y_test, y_pred_log_reg)
report_log_reg = classification_report(y_test, y_pred_log_reg, output_dict=True)
performance['Logistic Regression'] = {
    'accuracy': report_log_reg['accuracy'],
    'precision': report_log_reg['1']['precision'],
    'recall': report_log_reg['1']['recall'],
    'f1_score': report_log_reg['1']['f1-score']
}

# Decision Tree
y_pred_dt = dt_grid.best_estimator_.predict(X_test_scaled)
conf_matrix_dt = confusion_matrix(y_test, y_pred_dt)
report_dt = classification_report(y_test, y_pred_dt, output_dict=True)
performance['Decision Tree'] = {
    'accuracy': report_dt['accuracy'],
    'precision': report_dt['1']['precision'],
    'recall': report_dt['1']['recall'],
    'f1_score': report_dt['1']['f1-score']
}

# Random Forest
y_pred_rf = rf_grid.best_estimator_.predict(X_test_scaled)
conf_matrix_rf = confusion_matrix(y_test, y_pred_rf)
report_rf = classification_report(y_test, y_pred_rf, output_dict=True)
performance['Random Forest'] = {
    'accuracy': report_rf['accuracy'],
    'precision': report_rf['1']['precision'],
    'recall': report_rf['1']['recall'],
    'f1_score': report_rf['1']['f1-score']
}

# GBM
y_pred_gbm = gbm_grid.best_estimator_.predict(X_test_scaled)
conf_matrix_gbm = confusion_matrix(y_test, y_pred_gbm)
report_gbm = classification_report(y_test, y_pred_gbm, output_dict=True)
performance['Gradient Boosting Machine'] = {
    'accuracy': report_gbm['accuracy'],
    'precision': report_gbm['1']['precision'],
    'recall': report_gbm['1']['recall'],
    'f1_score': report_gbm['1']['f1-score']
}

# SVM
y_pred_svm = svm_grid.best_estimator_.predict(X_test_scaled)
conf_matrix_svm = confusion_matrix(y_test, y_pred_svm)
report_svm = classification_report(y_test, y_pred_svm, output_dict=True)
performance['SVM'] = {
    'accuracy': report_svm['accuracy'],
    'precision': report_svm['1']['precision'],
    'recall': report_svm['1']['recall'],
    'f1_score': report_svm['1']['f1-score']
}

# MLP
y_pred_mlp = mlp_grid.best_estimator_.predict(X_test_scaled)
conf_matrix_mlp = confusion_matrix(y_test, y_pred_mlp)
report_mlp = classification_report(y_test, y_pred_mlp, output_dict=True)
performance['MLP'] = {
    'accuracy': report_mlp['accuracy'],
    'precision': report_mlp['1']['precision'],
    'recall': report_mlp['1']['recall'],
    'f1_score': report_mlp['1']['f1-score']
}

# Print performance metrics
for model, metrics in performance.items():
    print(f"\n{model} Performance:")
    print(f"Accuracy: {metrics['accuracy']:.4f}")
    print(f"Precision: {metrics['precision']:.4f}")
    print(f"Recall: {metrics['recall']:.4f}")
    print(f"F1 Score: {metrics['f1_score']:.4f}")

# Determine the best-performing model based on F1 Score
best_model = max(performance, key=lambda m: performance[m]['f1_score'])
print(f"\nBest Performing Model: {best_model}")



Logistic Regression Performance:
Accuracy: 0.8100
Precision: 0.0000
Recall: 0.0000
F1 Score: 0.0000

Decision Tree Performance:
Accuracy: 0.7633
Precision: 0.2308
Recall: 0.1053
F1 Score: 0.1446

Random Forest Performance:
Accuracy: 0.8067
Precision: 0.0000
Recall: 0.0000
F1 Score: 0.0000

Gradient Boosting Machine Performance:
Accuracy: 0.8100
Precision: 0.0000
Recall: 0.0000
F1 Score: 0.0000

SVM Performance:
Accuracy: 0.8100
Precision: 0.0000
Recall: 0.0000
F1 Score: 0.0000

MLP Performance:
Accuracy: 0.8100
Precision: 0.0000
Recall: 0.0000
F1 Score: 0.0000

Best Performing Model: Decision Tree


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [14]:
from sklearn.metrics import classification_report, confusion_matrix

# Define a dictionary to store performance metrics
performance = {}

def evaluate_model(model_name, model, X_test_scaled, y_test):
    y_pred = model.predict(X_test_scaled)
    conf_matrix = confusion_matrix(y_test, y_pred)
    report = classification_report(y_test, y_pred, output_dict=True, zero_division=0)
    
    print(f"\n{model_name} Performance:")
    print("Confusion Matrix:")
    print(conf_matrix)
    
    accuracy = report['accuracy']
    precision = report['1']['precision']
    recall = report['1']['recall']
    f1_score = report['1']['f1-score']
    
    print(f"Accuracy: {accuracy:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1 Score: {f1_score:.4f}")

    return f1_score

# Evaluate each model
f1_scores = {}

f1_scores['Logistic Regression'] = evaluate_model('Logistic Regression', log_reg_grid.best_estimator_, X_test_scaled, y_test)
f1_scores['Decision Tree'] = evaluate_model('Decision Tree', dt_grid.best_estimator_, X_test_scaled, y_test)
f1_scores['Random Forest'] = evaluate_model('Random Forest', rf_grid.best_estimator_, X_test_scaled, y_test)
f1_scores['Gradient Boosting Machine'] = evaluate_model('GBM', gbm_grid.best_estimator_, X_test_scaled, y_test)
f1_scores['SVM'] = evaluate_model('SVM', svm_grid.best_estimator_, X_test_scaled, y_test)
f1_scores['MLP'] = evaluate_model('MLP', mlp_grid.best_estimator_, X_test_scaled, y_test)

# Determine the best-performing model based on F1 Score
best_model = max(f1_scores, key=lambda m: f1_scores[m])
print(f"\nBest Performing Model: {best_model}")



Logistic Regression Performance:
Confusion Matrix:
[[243   0]
 [ 57   0]]
Accuracy: 0.8100
Precision: 0.0000
Recall: 0.0000
F1 Score: 0.0000

Decision Tree Performance:
Confusion Matrix:
[[223  20]
 [ 51   6]]
Accuracy: 0.7633
Precision: 0.2308
Recall: 0.1053
F1 Score: 0.1446

Random Forest Performance:
Confusion Matrix:
[[242   1]
 [ 57   0]]
Accuracy: 0.8067
Precision: 0.0000
Recall: 0.0000
F1 Score: 0.0000

GBM Performance:
Confusion Matrix:
[[243   0]
 [ 57   0]]
Accuracy: 0.8100
Precision: 0.0000
Recall: 0.0000
F1 Score: 0.0000

SVM Performance:
Confusion Matrix:
[[243   0]
 [ 57   0]]
Accuracy: 0.8100
Precision: 0.0000
Recall: 0.0000
F1 Score: 0.0000

MLP Performance:
Confusion Matrix:
[[243   0]
 [ 57   0]]
Accuracy: 0.8100
Precision: 0.0000
Recall: 0.0000
F1 Score: 0.0000

Best Performing Model: Decision Tree
