In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, classification_report
from sklearn.preprocessing import StandardScaler, LabelEncoder, MinMaxScaler
from sklearn.impute import SimpleImputer
from sklearn.model_selection import train_test_split, cross_validate, cross_val_score, GridSearchCV, KFold
from sklearn.metrics import accuracy_score, roc_curve, roc_auc_score, precision_score
from sklearn.pipeline import make_pipeline, Pipeline
from sklearn.compose import make_column_transformer
from tensorflow.keras.utils import to_categorical
from tensorflow import keras

In [None]:
df = pd.read_pickle("../../data/interim/02_data_features.pkl")

df.columns

Index(['Tn (Rated Torque) N*m', 'k (constant of proportionality)', 'Ia (Amp)',
       'Ib (Amp)', 'Ic (Amp)', 'Vab (V)', 'Torque (N*m)', 'Speed (rad/s)',
       'Category', 'pca_1', 'pca_2', 'pca_3', 'pca_4'],
      dtype='object')

In [None]:
feature_set1 = ['Ia (Amp)','Ib (Amp)', 'Ic (Amp)', 'Vab (V)', 'Speed (rad/s)']

In [None]:
df['Category'].unique()

array(['NOM', 'PTPF', 'PTGF', 'UVF', 'OVF', 'OLF'], dtype=object)

In [None]:
label_category = LabelEncoder()
df['Category'] = label_category.fit_transform(df['Category'])
df.head()

Unnamed: 0,Tn (Rated Torque) N*m,k (constant of proportionality),Ia (Amp),Ib (Amp),Ic (Amp),Vab (V),Torque (N*m),Speed (rad/s),Category,pca_1,pca_2,pca_3,pca_4
0,0.8,0.000948,0.0,0.0,0.0,0.0,0.0,157.079633,0,-0.386117,0.261339,-0.685428,0.021675
1,0.8,0.000948,0.0,0.0,0.0,0.0,-0.00938,156.173693,0,-0.386347,0.259374,-0.685678,0.019534
2,0.8,0.000948,0.0,0.0,0.0,0.0,0.217542,155.281539,0,-0.386567,0.257444,-0.685916,0.01744
3,0.8,0.000948,0.0,0.0,0.0,0.0,-0.050265,154.401984,0,-0.386797,0.25553,-0.686168,0.015346
4,0.8,0.000948,0.0,0.0,0.0,0.0,-0.076747,153.525611,0,-0.38702,0.253628,-0.68641,0.013274


In [None]:
df['Category'].unique()

array([0, 4, 3, 5, 2, 1])

In [None]:
x = df[feature_set1]
x.info()

y = df['Category']
y.info()

<class 'pandas.core.frame.DataFrame'>
Index: 40037 entries, 0 to 40039
Data columns (total 5 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Ia (Amp)       40037 non-null  float64
 1   Ib (Amp)       40037 non-null  float64
 2   Ic (Amp)       40037 non-null  float64
 3   Vab (V)        40037 non-null  float64
 4   Speed (rad/s)  40037 non-null  float64
dtypes: float64(5)
memory usage: 1.8 MB
<class 'pandas.core.series.Series'>
Index: 40037 entries, 0 to 40039
Series name: Category
Non-Null Count  Dtype
--------------  -----
40037 non-null  int32
dtypes: int32(1)
memory usage: 469.2 KB


In [None]:
scaler = MinMaxScaler()

# Apply scaling to the feature set 'x'
X = scaler.fit_transform(x)

#split the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, test_size=0.2)

In [None]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report, accuracy_score

# Initialize the KNN model
knn_model = KNeighborsClassifier(n_neighbors=5, weights='distance')

# Fit the model
knn_model.fit(X_train, y_train)

# Predict on the test set
y_pred_knn = knn_model.predict(X_test)

# Evaluate the model
print("KNN Accuracy:", accuracy_score(y_test, y_pred_knn))
print("KNN Classification Report:\n", classification_report(y_test, y_pred_knn))

KNN Accuracy: 0.88499000999001
KNN Classification Report:
               precision    recall  f1-score   support

           0       0.84      0.86      0.85      2679
           1       0.98      0.97      0.98      1012
           2       0.98      0.98      0.98       992
           3       0.83      0.86      0.85      1334
           4       0.85      0.79      0.82      1400
           5       0.98      0.97      0.97       591

    accuracy                           0.88      8008
   macro avg       0.91      0.90      0.91      8008
weighted avg       0.89      0.88      0.88      8008



In [None]:
from sklearn.ensemble import RandomForestClassifier

# Initialize the Random Forest model
rf = RandomForestClassifier(n_estimators=100, max_depth=10, random_state=42)

# Fit the model
rf.fit(X_train, y_train)

# Predict on the test set
y_pred_rf = rf.predict(X_test)

# Evaluate the model
print("Random Forest Accuracy:", accuracy_score(y_test, y_pred_rf))
print("Random Forest Classification Report:\n", classification_report(y_test, y_pred_rf))

Random Forest Accuracy: 0.9143356643356644
Random Forest Classification Report:
               precision    recall  f1-score   support

           0       0.82      0.97      0.89      2679
           1       0.99      0.98      0.98      1012
           2       1.00      0.96      0.98       992
           3       0.95      0.83      0.88      1334
           4       0.97      0.80      0.87      1400
           5       1.00      0.97      0.98       591

    accuracy                           0.91      8008
   macro avg       0.95      0.92      0.93      8008
weighted avg       0.92      0.91      0.91      8008



In [None]:
from sklearn.svm import SVC

# Initialize the SVM model
svm = SVC(kernel='rbf', C=1.0, gamma='scale')  # Using RBF kernel

# Fit the model
svm.fit(X_train, y_train)

# Predict on the test set
y_pred_svm = svm.predict(X_test)

# Evaluate the model
print("SVM Accuracy:", accuracy_score(y_test, y_pred_svm))
print("SVM Classification Report:\n", classification_report(y_test, y_pred_svm))

SVM Accuracy: 0.8931068931068931
SVM Classification Report:
               precision    recall  f1-score   support

           0       0.81      0.96      0.88      2679
           1       0.81      0.93      0.86      1012
           2       1.00      0.96      0.98       992
           3       1.00      0.79      0.88      1334
           4       1.00      0.76      0.86      1400
           5       0.98      0.96      0.97       591

    accuracy                           0.89      8008
   macro avg       0.93      0.89      0.91      8008
weighted avg       0.91      0.89      0.89      8008



In [None]:
# Neural Network

# Encode the target variable
label_encoder = LabelEncoder()
y_train_encoded = label_encoder.fit_transform(y_train)
y_test_encoded = label_encoder.transform(y_test)


# Convert to categorical format for the neural network
y_train_categorical = to_categorical(y_train)
y_test_categorical = to_categorical(y_test)
len(y_train_categorical[0])

6

In [None]:
model_v2 = keras.Sequential([
    keras.layers.Input(shape=(X_train.shape[1],)),  
    keras.layers.Dense(128, activation='relu'),
    keras.layers.Dense(64, activation='relu'),
    keras.layers.Dense(6, activation='softmax')  # Output layer
])

# Compile the model
model_v2.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# Train the model
model_v2.fit(X_train, y_train_categorical, 
          epochs=500, 
          batch_size=32, 
          validation_data=(X_test, y_test_categorical))

Epoch 1/500
[1m1001/1001[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.5999 - loss: 1.1697 - val_accuracy: 0.8300 - val_loss: 0.5580
Epoch 2/500
[1m1001/1001[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.8389 - loss: 0.5196 - val_accuracy: 0.8745 - val_loss: 0.4118
Epoch 3/500
[1m1001/1001[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.8730 - loss: 0.4054 - val_accuracy: 0.8847 - val_loss: 0.3612
Epoch 4/500
[1m1001/1001[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.8845 - loss: 0.3614 - val_accuracy: 0.8887 - val_loss: 0.3376
Epoch 5/500
[1m1001/1001[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.8853 - loss: 0.3461 - val_accuracy: 0.8926 - val_loss: 0.3125
Epoch 6/500
[1m1001/1001[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.8871 - loss: 0.3295 - val_accuracy: 0.8966 - val_loss: 0.3058
Epoch 7/50

<keras.src.callbacks.history.History at 0x16ac540b2c0>

In [None]:
# Evaluate the model on the test set
y_pred_categorical = model_v2.predict(X_test)
y_pred_classes = np.argmax(y_pred_categorical, axis=1)  # Convert probabilities to class labels

[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step


In [None]:
# Calculate accuracy
accuracy = accuracy_score(y_test_encoded, y_pred_classes)
print(f"Neural Network Accuracy: {accuracy}")

Neural Network Accuracy: 0.9158341658341659


In [None]:
from sklearn.metrics import classification_report, accuracy_score
import numpy as np

# Ensure label_encoder.classes_ is properly formatted
class_names = [str(cls) for cls in label_encoder.classes_]

In [None]:
# Check the length of y_test_encoded and y_pred_classes
print(f"Length of y_test_encoded: {len(y_test_encoded)}")
print(f"Length of y_pred_classes: {len(y_pred_classes)}")

Length of y_test_encoded: 8008
Length of y_pred_classes: 8008


In [None]:
# Generate classification report
class_report = classification_report(
    y_test_encoded, 
    y_pred_classes, 
    target_names=class_names
)

In [None]:
print("Neural Network Classification Report:")
print(class_report)

Neural Network Classification Report:
              precision    recall  f1-score   support

           0       0.84      0.95      0.89      2679
           1       1.00      0.97      0.98      1012
           2       0.99      0.97      0.98       992
           3       0.90      0.87      0.88      1334
           4       0.97      0.79      0.87      1400
           5       1.00      0.97      0.98       591

    accuracy                           0.92      8008
   macro avg       0.95      0.92      0.93      8008
weighted avg       0.92      0.92      0.92      8008

