In [1]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report, accuracy_score
from imblearn.over_sampling import SMOTE

In [14]:
# Load the dataset
file_path = 'datasets/dataset_3.csv'
data = pd.read_csv(file_path)

# Data Cleaning and Preprocessing
# Drop non-numeric and irrelevant columns
data = data.drop(columns=['rul'], errors='ignore')
data = data.dropna()

# Define features (X) and target (y)
X = data.drop(columns=['machine_status'])
y = data['machine_status'].apply(lambda x: 0 if x == 'NORMAL' else 1)

# Handle class imbalance using SMOTE
smote = SMOTE(random_state=42)

X, y = smote.fit_resample(X, y)

# Standardize the numerical features
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

  data = pd.read_csv(file_path)


In [17]:
# Function to train and evaluate a model
def train_and_evaluate_model(model, model_name):
    print(f"Training {model_name}...")
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    report = classification_report(y_test, y_pred)
    print(f"{model_name} Accuracy: {accuracy}")
    print(f"Classification Report for {model_name}:\n", report)
    print("-" * 80)

In [18]:
# Define and evaluate the SVM model
svm_model = SVC(kernel='rbf', random_state=42, class_weight='balanced')
train_and_evaluate_model(svm_model, "SVM")

Training SVM...
SVM Accuracy: 0.9993326731820726
Classification Report for SVM:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00     22996
           1       1.00      1.00      1.00     23458

    accuracy                           1.00     46454
   macro avg       1.00      1.00      1.00     46454
weighted avg       1.00      1.00      1.00     46454

--------------------------------------------------------------------------------


In [19]:
# Define and evaluate the ANN model
ann_model = MLPClassifier(hidden_layer_sizes=(100,), max_iter=300, random_state=42)
train_and_evaluate_model(ann_model, "ANN")

Training ANN...
ANN Accuracy: 0.9999138933138159
Classification Report for ANN:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00     22996
           1       1.00      1.00      1.00     23458

    accuracy                           1.00     46454
   macro avg       1.00      1.00      1.00     46454
weighted avg       1.00      1.00      1.00     46454

--------------------------------------------------------------------------------


In [6]:
# Define and evaluate the Random Forest model
rf_model = RandomForestClassifier(n_estimators=100, random_state=42, class_weight='balanced')
train_and_evaluate_model(rf_model, "Random Forest")

Training Random Forest...
Random Forest Accuracy: 0.9989147037543211
Classification Report for Random Forest:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00     24752
           1       1.00      1.00      1.00     25004

    accuracy                           1.00     49756
   macro avg       1.00      1.00      1.00     49756
weighted avg       1.00      1.00      1.00     49756

--------------------------------------------------------------------------------


In [7]:
# Define and evaluate the Naive Bayes model
nb_model = GaussianNB()
train_and_evaluate_model(nb_model, "Naive Bayes")

Training Naive Bayes...
Naive Bayes Accuracy: 0.7108690409196881
Classification Report for Naive Bayes:
               precision    recall  f1-score   support

           0       0.63      0.99      0.77     24752
           1       0.98      0.44      0.60     25004

    accuracy                           0.71     49756
   macro avg       0.80      0.71      0.69     49756
weighted avg       0.81      0.71      0.69     49756

--------------------------------------------------------------------------------


In [8]:
# Define and evaluate the KNN model
knn_model = KNeighborsClassifier(n_neighbors=5)
train_and_evaluate_model(knn_model, "KNN")

Training KNN...
KNN Accuracy: 0.9920411608650213
Classification Report for KNN:
               precision    recall  f1-score   support

           0       1.00      0.99      0.99     24752
           1       0.99      1.00      0.99     25004

    accuracy                           0.99     49756
   macro avg       0.99      0.99      0.99     49756
weighted avg       0.99      0.99      0.99     49756

--------------------------------------------------------------------------------
