In [14]:
# Import Libraries
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix, roc_curve, classification_report, precision_recall_curve, auc, accuracy_score, precision_score, recall_score, f1_score
import ipywidgets as widgets
from ipywidgets import interact
import pandas as pd
from imblearn.over_sampling import SMOTE
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.linear_model import LogisticRegression
from xgboost import XGBClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import StratifiedShuffleSplit, GridSearchCV
from sklearn.preprocessing import LabelEncoder
import warnings
import shap
import numpy as np
from art.attacks.poisoning import PoisoningAttackBackdoor, FeatureCollisionAttack
from art.estimators.classification import SklearnClassifier
from art.utils import to_categorical

sns.set(style="whitegrid")
warnings.filterwarnings('ignore')

In [15]:
# Load the preprocessed data
data = pd.read_csv('Preprocessed_Data.csv')

# Rename columns to remove special characters
data.rename(columns={
    'Air temperature [K]': 'Air_temperature_K',
    'Process temperature [K]': 'Process_temperature_K',
    'Rotational speed [rpm]': 'Rotational_speed_rpm',
    'Torque [Nm]': 'Torque_Nm',
    'Tool wear [min]': 'Tool_wear_min'
}, inplace=True)

# Create the 'No failure' column
data['No failure'] = 1 - data['Machine failure']

# Define features and target
X = data[['Type', 'Air_temperature_K', 'Process_temperature_K', 'Rotational_speed_rpm', 'Torque_Nm', 'Tool_wear_min']]
y = data[['No failure', 'TWF', 'HDF', 'PWF', 'OSF', 'RNF']].idxmax(axis=1)

# Encode the target variable
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)
set(y_encoded)

{0, 1, 2, 3, 4}

In [16]:
# Initialize stratified split
sss = StratifiedShuffleSplit(n_splits=1, test_size=0.3, random_state=42)

for train_index, test_index in sss.split(X, y_encoded):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y_encoded[train_index], y_encoded[test_index]

# Apply SMOTE to oversample the training data
smote = SMOTE(random_state=42)
X_train_res, y_train_res = smote.fit_resample(X_train, y_train)

# Define all possible target names
all_classes = label_encoder.classes_
set(all_classes)

{'HDF', 'No failure', 'OSF', 'PWF', 'TWF'}

In [17]:
# Define models with the best parameters
models = {
    'Random Forest': RandomForestClassifier(
        max_depth=35, 
        min_samples_split=3, 
        n_estimators=150, 
        random_state=42
    ),
    'XGBoost': XGBClassifier(
        learning_rate=0.4, 
        max_depth=5, 
        n_estimators=400, 
        subsample=1.0, 
        random_state=42
    ),
    'Neural Network': MLPClassifier(
        activation='relu', 
        hidden_layer_sizes=(50, 50), 
        solver='adam', 
        max_iter=350, 
        random_state=42
    )
}

In [18]:
import numpy as np

# Define the poisoning function for multiclass classification
def label_flip_poisoning(X_train, y_train, poison_percentage, target_class):
    # Target Class 1: Flipping "no failure" (y_train == 1) to the 6th failure (RNF, y_train == 6)
    if target_class == 1:
        # Find indices where y_train indicates no failure (y_train == 1)
        no_failure_indices = np.where(y_train == 1)[0]
        
        # Determine the number of labels to flip based on the poison percentage
        num_to_flip = int(poison_percentage * len(no_failure_indices))
        
        # Randomly select indices to flip
        flip_indices = np.random.choice(no_failure_indices, size=num_to_flip, replace=False)
        
        # Create poisoned labels by copying y_train
        y_train_poisoned = y_train.copy()
        
        # Flip the selected "no failure" labels (1) to the 6th failure (RNF, y_train == 6)
        y_train_poisoned[flip_indices] = 0
    
    # Target Class 2: Flipping failure types (2 to 6) to "no failure" (y_train == 1)
    else:
        # Find indices where y_train indicates failure (y_train in [2, 3, 4, 5, 6])
        failure_indices = np.where(y_train > 1)[0]
        
        # Determine the number of labels to flip based on the poison percentage
        num_to_flip = int(poison_percentage * len(failure_indices))
        
        # Randomly select indices to flip
        flip_indices = np.random.choice(failure_indices, size=num_to_flip, replace=False)
        
        # Create poisoned labels by copying y_train
        y_train_poisoned = y_train.copy()
        
        # Flip the selected failure labels (2 to 6) to "no failure" (1)
        y_train_poisoned[flip_indices] = 1
    
    return X_train, y_train_poisoned


In [19]:
# Define the poisoning percentages to test
poison_percentages = [0, 0.1, 0.2, 0.3, 0.4, 0.5]
target_classes = [1, 2]  # 1 for no failure (flipping to failure), 2 for failure (flipping to no failure)

# Initialize dictionaries to hold DataFrames for each metric, model, and target class
accuracy_results = {'Model': [], 'Target': []}
precision_results = {'Model': [], 'Target': []}
recall_results = {'Model': [], 'Target': []}
f1_results = {'Model': [], 'Target': []}

# Add columns for each poisoning percentage in the results dictionaries
for poison_percentage in poison_percentages:
    accuracy_results[f'{int(poison_percentage * 100)}%'] = []
    precision_results[f'{int(poison_percentage * 100)}%'] = []
    recall_results[f'{int(poison_percentage * 100)}%'] = []
    f1_results[f'{int(poison_percentage * 100)}%'] = []

# Train and evaluate each model for both target classes
for name, model in models.items():
    print(f'Model: {name}')
    
    # Loop through each target class (1 and 2)
    for target_class in target_classes:
        print(f'Target Class: {target_class}')
        
        # Store the model name and target class in each results dictionary
        accuracy_results['Model'].append(name)
        precision_results['Model'].append(name)
        recall_results['Model'].append(name)
        f1_results['Model'].append(name)
        
        accuracy_results['Target'].append(target_class)
        precision_results['Target'].append(target_class)
        recall_results['Target'].append(target_class)
        f1_results['Target'].append(target_class)
        
        # Train the model on clean data
        model.fit(X_train_res, y_train_res)
        
        # Make predictions on the clean test set
        y_pred = model.predict(X_test)
        
        # Calculate metrics for clean data (0% poisoning)
        clean_accuracy = accuracy_score(y_test, y_pred)
        clean_precision = precision_score(y_test, y_pred, average='weighted')
        clean_recall = recall_score(y_test, y_pred, average='weighted')
        clean_f1 = f1_score(y_test, y_pred, average='weighted')
        
        # Store the clean metrics (0% poisoning) for the current target class
        accuracy_results['0%'].append(clean_accuracy)
        precision_results['0%'].append(clean_precision)
        recall_results['0%'].append(clean_recall)
        f1_results['0%'].append(clean_f1)
        
        # Loop through each poisoning percentage (starting from 10%)
        for poison_percentage in poison_percentages[1:]:
            print(f'Poison percentage: {poison_percentage}')
            
            # Poison the training data based on the current target class
            X_train_poisoned, y_train_poisoned = label_flip_poisoning(X_train_res, y_train_res, poison_percentage, target_class)
            
            # Train the model on poisoned data
            model.fit(X_train_poisoned, y_train_poisoned)
            
            # Make predictions on the clean test set
            y_pred_poisoned = model.predict(X_test)
            
            # Calculate metrics on the clean test set after training on poisoned data
            poisoned_accuracy = accuracy_score(y_test, y_pred_poisoned)
            poisoned_precision = precision_score(y_test, y_pred_poisoned, average='weighted')
            poisoned_recall = recall_score(y_test, y_pred_poisoned, average='weighted')
            poisoned_f1 = f1_score(y_test, y_pred_poisoned, average='weighted')
            
            # Store the poisoned metrics based on the poison percentage for the current target class
            accuracy_results[f'{int(poison_percentage * 100)}%'].append(poisoned_accuracy)
            precision_results[f'{int(poison_percentage * 100)}%'].append(poisoned_precision)
            recall_results[f'{int(poison_percentage * 100)}%'].append(poisoned_recall)
            f1_results[f'{int(poison_percentage * 100)}%'].append(poisoned_f1)

# Convert the results dictionaries to DataFrames
accuracy_df = pd.DataFrame(accuracy_results)
precision_df = pd.DataFrame(precision_results)
recall_df = pd.DataFrame(recall_results)
f1_df = pd.DataFrame(f1_results)

# Display the results
print("Accuracy Results:")
display(accuracy_df)

print("Precision Results:")
display(precision_df)

print("Recall Results:")
display(recall_df)

print("F1 Score Results:")
display(f1_df)

Model: Random Forest
Target Class: 1
Poison percentage: 0.1
Poison percentage: 0.2
Poison percentage: 0.3
Poison percentage: 0.4
Poison percentage: 0.5
Target Class: 2
Poison percentage: 0.1
Poison percentage: 0.2
Poison percentage: 0.3
Poison percentage: 0.4
Poison percentage: 0.5
Model: XGBoost
Target Class: 1
Poison percentage: 0.1
Poison percentage: 0.2
Poison percentage: 0.3
Poison percentage: 0.4
Poison percentage: 0.5
Target Class: 2
Poison percentage: 0.1
Poison percentage: 0.2
Poison percentage: 0.3
Poison percentage: 0.4
Poison percentage: 0.5
Model: Neural Network
Target Class: 1
Poison percentage: 0.1
Poison percentage: 0.2
Poison percentage: 0.3
Poison percentage: 0.4
Poison percentage: 0.5
Target Class: 2
Poison percentage: 0.1
Poison percentage: 0.2
Poison percentage: 0.3
Poison percentage: 0.4
Poison percentage: 0.5
Accuracy Results:


Unnamed: 0,Model,Target,0%,10%,20%,30%,40%,50%
0,Random Forest,1,0.969667,0.967,0.951333,0.903,0.757,0.473333
1,Random Forest,2,0.969667,0.978333,0.977667,0.979333,0.977,0.976
2,XGBoost,1,0.979333,0.953667,0.895,0.801,0.656333,0.485333
3,XGBoost,2,0.979333,0.978,0.976667,0.976333,0.979667,0.980333
4,Neural Network,1,0.977333,0.974667,0.948667,0.928333,0.834,0.506
5,Neural Network,2,0.977333,0.973667,0.978,0.971667,0.975333,0.979667


Precision Results:


Unnamed: 0,Model,Target,0%,10%,20%,30%,40%,50%
0,Random Forest,1,0.979279,0.977743,0.976454,0.973658,0.974041,0.972755
1,Random Forest,2,0.979279,0.980438,0.976948,0.97599,0.97551,0.963728
2,XGBoost,1,0.985434,0.978089,0.975826,0.975792,0.974732,0.972933
3,XGBoost,2,0.985434,0.983058,0.97942,0.974452,0.975494,0.976063
4,Neural Network,1,0.980276,0.979257,0.975915,0.974014,0.975949,0.971144
5,Neural Network,2,0.980276,0.980505,0.982284,0.978262,0.977802,0.97738


Recall Results:


Unnamed: 0,Model,Target,0%,10%,20%,30%,40%,50%
0,Random Forest,1,0.969667,0.967,0.951333,0.903,0.757,0.473333
1,Random Forest,2,0.969667,0.978333,0.977667,0.979333,0.977,0.976
2,XGBoost,1,0.979333,0.953667,0.895,0.801,0.656333,0.485333
3,XGBoost,2,0.979333,0.978,0.976667,0.976333,0.979667,0.980333
4,Neural Network,1,0.977333,0.974667,0.948667,0.928333,0.834,0.506
5,Neural Network,2,0.977333,0.973667,0.978,0.971667,0.975333,0.979667


F1 Score Results:


Unnamed: 0,Model,Target,0%,10%,20%,30%,40%,50%
0,Random Forest,1,0.974029,0.971803,0.961785,0.93208,0.843663,0.622324
1,Random Forest,2,0.974029,0.9793,0.976537,0.976488,0.972119,0.967857
2,XGBoost,1,0.982062,0.963382,0.92864,0.872883,0.775103,0.63421
3,XGBoost,2,0.982062,0.980475,0.977165,0.97489,0.976501,0.975526
4,Neural Network,1,0.978728,0.976539,0.959796,0.94705,0.893535,0.653385
5,Neural Network,2,0.978728,0.976846,0.980079,0.974793,0.976335,0.977789


In [21]:
# Function to plot a metric for all models across poisoning percentages for the multiclass case
def plot_metric_multiclass(metric_df, metric_name, target_class):
    plt.figure(figsize=(10, 6))
    
    # Filter the DataFrame for the selected target class
    filtered_df = metric_df[metric_df['Target'] == target_class]
    
    # Iterate through each model and plot its metric across poisoning percentages
    for i, model in enumerate(filtered_df['Model']):
        # Get the values as numpy array, stripping the '%' symbol and converting to integers
        x_values = [int(col.strip('%')) for col in filtered_df.columns[2:]]  # Skip the 'Model' and 'Target' columns
        y_values = filtered_df.iloc[i, 2:].values  # Get the corresponding metric values (from the 2nd column onward)
        
        # Plot the line for each model
        plt.plot(x_values, y_values, label=model, marker='o')
    
    # Update title and labels to reflect multiclass poisoning
    if target_class == 1:
        target_desc = "No Failure to Failure"
    else:
        target_desc = "Failure to No Failure"
    
    # Add labels and title
    plt.title(f'{metric_name} Across Poisoning Percentages ({target_desc})', fontsize=16)
    plt.xlabel('Poisoning Percentage (%)', fontsize=12)
    plt.ylabel(metric_name, fontsize=12)
    
    # Display legend
    plt.legend(title='Model', bbox_to_anchor=(1.05, 1), loc='upper left')
    
    # Show plot
    plt.tight_layout()
    plt.show()

# Define a function that will update the plot based on user input for the multiclass case
def interactive_plot_multiclass(metric_name, target_class):
    if metric_name == 'Accuracy':
        plot_metric_multiclass(accuracy_df, "Accuracy", target_class)
    elif metric_name == 'Precision':
        plot_metric_multiclass(precision_df, "Precision", target_class)
    elif metric_name == 'Recall':
        plot_metric_multiclass(recall_df, "Recall", target_class)
    elif metric_name == 'F1 Score':
        plot_metric_multiclass(f1_df, "F1 Score", target_class)

# Create a dropdown menu for selecting the metric
metric_dropdown_multiclass = widgets.Dropdown(
    options=['Accuracy', 'Precision', 'Recall', 'F1 Score'],
    value='Accuracy',
    description='Metric:',
    disabled=False,
)

# Create a dropdown menu for selecting the target class (1 or 2)
target_dropdown_multiclass = widgets.Dropdown(
    options=[1, 2],  # 1: No Failure to Failure, 2: Failure to No Failure
    value=1,
    description='Target Class:',
    disabled=False,
)

# Use the interact function to update the plot dynamically for multiclass case
interact(interactive_plot_multiclass, metric_name=metric_dropdown_multiclass, target_class=target_dropdown_multiclass)

interactive(children=(Dropdown(description='Metric:', options=('Accuracy', 'Precision', 'Recall', 'F1 Score'),…

<function __main__.interactive_plot_multiclass(metric_name, target_class)>