# Predicting Machine Failure:

In [1]:
# Import Libraries
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix, roc_curve, classification_report, precision_recall_curve, auc, accuracy_score, precision_score, recall_score, f1_score
import ipywidgets as widgets
from ipywidgets import interact
import pandas as pd
from imblearn.over_sampling import SMOTE
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.linear_model import LogisticRegression
from xgboost import XGBClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import StratifiedShuffleSplit, GridSearchCV
from sklearn.preprocessing import LabelEncoder
import warnings
import shap

warnings.filterwarnings('ignore')

In [2]:
# Load the preprocessed data
data = pd.read_csv('Preprocessed_Data.csv')

# Rename columns to remove special characters
data.rename(columns={
    'Air temperature [K]': 'Air_temperature_K',
    'Process temperature [K]': 'Process_temperature_K',
    'Rotational speed [rpm]': 'Rotational_speed_rpm',
    'Torque [Nm]': 'Torque_Nm',
    'Tool wear [min]': 'Tool_wear_min'
}, inplace=True)

# Define features and target
X = data[['Type', 'Air_temperature_K', 'Process_temperature_K', 'Rotational_speed_rpm', 'Torque_Nm', 'Tool_wear_min']]
y = data['Machine failure']

In [3]:
# Initialize stratified split
sss = StratifiedShuffleSplit(n_splits=1, test_size=0.3, random_state=42)

for train_index, test_index in sss.split(X, y):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

# Apply SMOTE to oversample the training data
smote = SMOTE(random_state=42)
X_train_res, y_train_res = smote.fit_resample(X_train, y_train)

In [4]:
# Define models with default parameters
default_models = {
    'Random Forest': RandomForestClassifier(random_state=42),
    'XGBoost': XGBClassifier(random_state=42),
    'Neural Network': MLPClassifier(random_state=42, max_iter=350)
}

# Train and evaluate default models
default_trained_models = {}
default_y_preds = {}
default_y_probas = {}

for name, model in default_models.items():
    model.fit(X_train_res, y_train_res)
    default_y_preds[name] = model.predict(X_test)
    if hasattr(model, "predict_proba"):
        default_y_probas[name] = model.predict_proba(X_test)
    default_trained_models[name] = model

In [5]:
# Hyperparameter tuning for best parameters
refined_param_grids = {
    'Random Forest': {
        'max_depth': [15, 20, 25],
        'min_samples_split': [2, 3, 4],
        'n_estimators': [100, 150, 200]
    },
    'XGBoost': {
        'learning_rate': [0.15, 0.2, 0.25],
        'max_depth': [5, 6, 7],
        'n_estimators': [150, 200, 250],
        'subsample': [0.9, 1.0]
    },
    'Neural Network': {
        'activation': ['tanh', 'relu'],
        'hidden_layer_sizes': [(50, 50), (100,)],
        'solver': ['adam', 'sgd']
    }
}

best_models = {}
for name, model in default_models.items():
    grid_search = GridSearchCV(estimator=model, param_grid=refined_param_grids[name], cv=5, scoring='accuracy', n_jobs=-1)
    grid_search.fit(X_train_res, y_train_res)
    best_models[name] = grid_search.best_estimator_
    print(f"Best parameters for {name}: {grid_search.best_params_}")

Best parameters for Random Forest: {'max_depth': 25, 'min_samples_split': 2, 'n_estimators': 100}
Best parameters for XGBoost: {'learning_rate': 0.2, 'max_depth': 7, 'n_estimators': 250, 'subsample': 0.9}




Best parameters for Neural Network: {'activation': 'tanh', 'hidden_layer_sizes': (50, 50), 'solver': 'adam'}


In [6]:
# Train and evaluate best models
best_trained_models = {}
best_y_preds = {}
best_y_probas = {}

for name, model in best_models.items():
    model.fit(X_train_res, y_train_res)
    best_y_preds[name] = model.predict(X_test)
    if hasattr(model, "predict_proba"):
        best_y_probas[name] = model.predict_proba(X_test)
    best_trained_models[name] = model

In [7]:
# Plot functions
def plot_confusion_matrix(y_test, y_pred, model_name):
    cm = confusion_matrix(y_test, y_pred)
    plt.figure(figsize=(10, 7))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=['No failure', 'Failure'], yticklabels=['No failure', 'Failure'])
    plt.xlabel('Predicted')
    plt.ylabel('Actual')
    plt.title(f'Confusion Matrix - {model_name}')
    plt.show()

def plot_roc_curve(y_test, y_proba, model_name):
    fpr, tpr, _ = roc_curve(y_test, y_proba[:, 1])
    roc_auc = auc(fpr, tpr)
    plt.figure(figsize=(10, 7))
    plt.plot(fpr, tpr, color='blue', lw=2, label='ROC curve (area = %0.2f)' % roc_auc)
    plt.plot([0, 1], [0, 1], color='gray', lw=2, linestyle='--')
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title(f'Receiver Operating Characteristic - {model_name}')
    plt.legend(loc="lower right")
    plt.show()

def plot_precision_recall_curve(y_test, y_proba, model_name):
    precision, recall, _ = precision_recall_curve(y_test, y_proba[:, 1])
    pr_auc = auc(recall, precision)
    plt.figure(figsize=(10, 7))
    plt.plot(recall, precision, color='blue', lw=2, label='Precision-Recall curve (area = %0.2f)' % pr_auc)
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('Recall')
    plt.ylabel('Precision')
    plt.title(f'Precision-Recall Curve - {model_name}')
    plt.legend(loc="lower left")
    plt.show()

def plot_metrics(y_test, y_pred, model_name):
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='weighted')
    recall = recall_score(y_test, y_pred, average='weighted')
    f1 = f1_score(y_test, y_pred, average='weighted')
    
    metrics = {'Accuracy': accuracy, 'Precision': precision, 'Recall': recall, 'F1 Score': f1}
    plt.figure(figsize=(10, 7))
    bars = plt.bar(metrics.keys(), metrics.values(), color='blue')
    plt.xlabel('Metrics')
    plt.ylabel('Scores')
    plt.ylim([0, 1])
    plt.title(f'Metrics - {model_name}')
    
    # Annotate bars with the metric values
    for bar in bars:
        yval = bar.get_height()
        plt.text(bar.get_x() + bar.get_width()/2, yval, round(yval, 2), va='bottom')  # va: vertical alignment
    
    plt.show()

# Dropdown Menu Function
def display_plots(model_name, plot_type, model_type):
    if model_type == 'Default':
        y_pred = default_y_preds[model_name]
        y_proba = default_y_probas[model_name]
    elif model_type == 'Best':
        y_pred = best_y_preds[model_name]
        y_proba = best_y_probas[model_name]
    
    if plot_type == 'Confusion Matrix':
        plot_confusion_matrix(y_test, y_pred, f"{model_name} ({model_type})")
    elif plot_type == 'ROC Curve':
        plot_roc_curve(y_test, y_proba, f"{model_name} ({model_type})")
    elif plot_type == 'Precision-Recall Curve':
        plot_precision_recall_curve(y_test, y_proba, f"{model_name} ({model_type})")
    elif plot_type == 'Metrics':
        plot_metrics(y_test, y_pred, f"{model_name} ({model_type})")

# Create Dropdown Widgets
model_dropdown = widgets.Dropdown(options=list(default_models.keys()), description='Model:')
plot_dropdown = widgets.Dropdown(options=['Confusion Matrix', 'ROC Curve', 'Precision-Recall Curve', 'Metrics'], description='Plot Type:')
model_type_dropdown = widgets.Dropdown(options=['Default', 'Best'], description='Model Type:')

# Display Interactive Widgets
interact(display_plots, model_name=model_dropdown, plot_type=plot_dropdown, model_type=model_type_dropdown)

interactive(children=(Dropdown(description='Model:', options=('Random Forest', 'XGBoost', 'Neural Network'), v…

<function __main__.display_plots(model_name, plot_type, model_type)>

# Model Explanation: 

In [8]:
# Initialize TreeExplainer for Random Forest and XGBoost
rf_explainer = shap.TreeExplainer(best_models['Random Forest'], X_train_res)
xgb_explainer = shap.TreeExplainer(best_models['XGBoost'], X_train_res)

In [9]:
# Compute SHAP values for training data
rf_shap_values = rf_explainer.shap_values(X_train_res, check_additivity=False)
xgb_shap_values = xgb_explainer.shap_values(X_train_res, check_additivity=False)



In [10]:
# List of feature names
features = X_train_res.columns.tolist()

In [19]:
# Function to plot SHAP summary plots
def plot_shap_summary(model_name,class_index):
    plt.figure()
    if model_name == 'Random Forest':
        shap.summary_plot(rf_shap_values[:, :, class_index], X_train_res, feature_names=features)
    elif model_name == 'XGBoost':
        shap.summary_plot(xgb_shap_values, X_train_res, feature_names=features)
    plt.show()

In [20]:
# Create dropdown widgets
model_dropdown = widgets.Dropdown(options=['Random Forest', 'XGBoost'], description='Model:')
class_dropdown = widgets.Dropdown(options=[0, 1], description='Class (RF):')
# Display interactive widgets for SHAP summary plots
interact(plot_shap_summary, model_name=model_dropdown, class_index=class_dropdown)

interactive(children=(Dropdown(description='Model:', options=('Random Forest', 'XGBoost'), value='Random Fores…

<function __main__.plot_shap_summary(model_name, class_index)>

In [24]:
def plot_shap_dependence(model_name, interaction_feature):
    # Define the number of columns for the grid
    n_cols = 3
    n_rows = (len(features) + n_cols - 1) // n_cols  # Compute the number of rows needed

    # Plot SHAP dependence plots for the selected model
    if model_name == 'Random Forest':
        shap_values = rf_shap_values[:, :,1]
    elif model_name == 'XGBoost':
        shap_values = xgb_shap_values
    else:
        return

    fig, axes = plt.subplots(n_rows, n_cols, figsize=(20, n_rows * 5))
    axes = axes.flatten()

    for i, feature in enumerate(features):
        shap.dependence_plot(feature, shap_values, X_train_res, interaction_index=interaction_feature if interaction_feature != 'auto' else None, ax=axes[i], show=False)
        axes[i].set_title(f'{model_name} SHAP Dependence for {feature}')

    # Remove any empty subplots
    for j in range(i + 1, n_rows * n_cols):
        fig.delaxes(axes[j])

    plt.tight_layout()
    plt.show()

# Create dropdown widgets
model_dropdown = widgets.Dropdown(options=['Random Forest', 'XGBoost'], description='Model:')
interaction_feature_dropdown = widgets.Dropdown(options=['auto'] + features, description='Interact:')
# Display interactive widgets
interact(plot_shap_dependence, model_name=model_dropdown, interaction_feature=interaction_feature_dropdown)

interactive(children=(Dropdown(description='Model:', options=('Random Forest', 'XGBoost'), value='Random Fores…

<function __main__.plot_shap_dependence(model_name, interaction_feature)>