In [None]:
import time

import numpy as np
import matplotlib.pyplot as plt

import pandas as pd

from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split

from scipy.stats import kurtosis, skew

from scipy.fft import fft
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler

from sklearn.preprocessing import MinMaxScaler

from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score


from sklearn.preprocessing import label_binarize
from sklearn.metrics import roc_curve, auc
from itertools import cycle


import seaborn as sns

from sklearn.model_selection import GridSearchCV

from sklearn.decomposition import PCA


In [None]:
#File names

import os

for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# ----------------------------------------------------------------------

# Input Dataset I

In [None]:

data0D = pd.read_csv('/kaggle/input/vibration-dataset-1/0D.csv')
data1D = pd.read_csv('/kaggle/input/vibration-dataset-1/1D.csv')
data2D = pd.read_csv('/kaggle/input/vibration-dataset-1/2D.csv')
data3D = pd.read_csv('/kaggle/input/vibration-dataset-1/3D.csv')
data4D = pd.read_csv('/kaggle/input/vibration-dataset-1/4D.csv')

data0E = pd.read_csv('/kaggle/input/vibration-dataset-1/0E.csv')
data1E = pd.read_csv('/kaggle/input/vibration-dataset-1/1E.csv')
data2E = pd.read_csv('/kaggle/input/vibration-dataset-1/2E.csv')
data3E = pd.read_csv('/kaggle/input/vibration-dataset-1/3E.csv')
data4E = pd.read_csv('/kaggle/input/vibration-dataset-1/4E.csv')



print('DATASET LOADED')

In [None]:
# The signal was initialized from 20 seconds onward to eliminate the initial transient phase.

fs = 4096
initial_time = 20 * fs  


# Reiniciar índice
data0D = data0D.iloc[initial_time:].reset_index(drop=True)
data1D = data1D.iloc[initial_time:].reset_index(drop=True)
data2D = data2D.iloc[initial_time:].reset_index(drop=True)
data3D = data3D.iloc[initial_time:].reset_index(drop=True)
data4D = data4D.iloc[initial_time:].reset_index(drop=True)


data0E = data0E.iloc[initial_time:].reset_index(drop=True)
data1E = data1E.iloc[initial_time:].reset_index(drop=True)
data2E = data2E.iloc[initial_time:].reset_index(drop=True)
data3E = data3E.iloc[initial_time:].reset_index(drop=True)
data4E = data4E.iloc[initial_time:].reset_index(drop=True)


print('Done')

In [None]:
# One-second window

window_time = 1  
window = fs * window_time  

### Divide the raw signal in samples of 1 sec and labeling each sample

In [None]:
# Extracts signal segments of the specified window size.

def get_features(data, label):
    n = int(np.floor(len(data)/window))
    data = data[:int(n)*window]
    X = data.values.reshape((n, window))
    y = np.ones(n)*labels[label]
    return X,y

In [None]:
labels = {'no_unbalance':0, 'unbalance_1':1, 'unbalance_2':2,'unbalance_3':3, 'unbalance_4':4}
sensor = 'Vibration_2'


X0D, y0D = get_features(data0D[sensor], "no_unbalance")
X1D, y1D = get_features(data1D[sensor], "unbalance_1")
X2D, y2D = get_features(data2D[sensor], "unbalance_2")
X3D, y3D = get_features(data3D[sensor], "unbalance_3")
X4D, y4D = get_features(data4D[sensor], "unbalance_4")


X0E, y0E = get_features(data0E[sensor], "no_unbalance")
X1E, y1E = get_features(data1E[sensor], "unbalance_1")
X2E, y2E = get_features(data2E[sensor], "unbalance_2")
X3E, y3E = get_features(data3E[sensor], "unbalance_3")
X4E, y4E = get_features(data4E[sensor], "unbalance_4")



X=np.concatenate([X0D, X1D, X2D, X3D, X4D, X0E, X1E, X2E, X3E, X4E])
Y=np.concatenate([y0D, y1D, y2D, y3D, y4D, y0E, y1E, y2E, y3E, y4E])


print(X.shape, Y.shape)

In [None]:
# Histogram used to visualize the number of samples per class (to verify whether the dataset is balanced).

custom_labels = [
    "Normal",
    "Unb. I",
    "Unb. II",
    "Unb. III",
    "Unb. IV"
]


plt.figure(figsize=(8,5))
plt.hist(Y, bins=np.arange(len(labels)+1)-0.5, edgecolor='black', rwidth=0.8)


plt.xticks(range(len(labels)), custom_labels, rotation=0)
plt.xlabel("Class")
plt.ylabel("Absolute Frequency")
plt.title("Class Distribution for Dataset I")

plt.grid(axis='y', linestyle='--', alpha=0.6)

plt.show()

# --------------------------------------------------------

# Input Dataset II

In [None]:


for i in range(1, 1001):
    globals()[f"data_normal_{i}"] = pd.read_csv(f'/kaggle/input/vbl-va001/normal/normal_{i}.csv', header=None)

for i in range(1, 501):
    globals()[f"data_unbalance_i_{i}"] = pd.read_csv(f'/kaggle/input/vbl-va001/unbalance_6/unbalance_i_{i}.csv', header=None)

for i in range(1, 501):
    globals()[f"data_unbalance_ii_{i}"] = pd.read_csv(f'/kaggle/input/vbl-va001/unbalance_27/unbalance_ii_{i}.csv', header=None)



for i in range(1, 1001):
    globals()[f"data_misalignment_{i}"] = pd.read_csv(f'/kaggle/input/vbl-va001/misalignment/misalignment_{i}.csv', header=None)
    

for i in range(1, 1001):
    globals()[f"data_bearing_{i}"] = pd.read_csv(f'/kaggle/input/vbl-va001/bearing/bearing_{i}.csv', header=None)



print('DATASET LOADED')

In [None]:
# Visualize which axis exhibits the highest amplitude.


df = globals()["data_unbalance_ii_100"]

time = df.iloc[:, 0]
axis_x = df.iloc[:, 1]
axis_y = df.iloc[:, 2]
axis_z = df.iloc[:, 3]

plt.figure(figsize=(12, 6))
plt.plot(time, axis_x, label='X axis')
plt.plot(time, axis_y, label='Y axis')
plt.plot(time, axis_z, label='Z axis')
plt.title('Vibration in 3 axis - Unbalance II')
plt.xlabel('Time (s)')
plt.ylabel('Amplitude')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()


In [None]:
fs = 20000

window_time = 1
window = fs * window_time  


### Divide the raw signal in samples of 1 sec and labeling each sample

In [None]:
def get_features(data, label):
    n = int(np.floor(len(data)/window))
    data = data[:int(n)*window]
    X = data.values.reshape((n, window))
    y = np.ones(n)*labels[label]
    return X,y

In [None]:

labels = {'normal':0, 'unbalance_1':1, 'unbalance_2':2,'misaligment':3, 'bearing_fault':4}
axis = 2


X_list = []
Y_list = []


for i in range(1, 1001):
    globals()[f'X_normal_{i}'], globals()[f'y_normal_{i}'] = get_features(globals()[f'data_normal_{i}'][axis], "normal")
    X_list.append(globals()[f"X_normal_{i}"])
    Y_list.append(globals()[f"y_normal_{i}"])


for i in range(1, 501):
    globals()[f'X_unbalance_i_{i}'], globals()[f'y_unbalance_i_{i}'] = get_features(globals()[f'data_unbalance_i_{i}'][axis], "unbalance_1")
    X_list.append(globals()[f"X_unbalance_i_{i}"])
    Y_list.append(globals()[f"y_unbalance_i_{i}"])
    
for i in range(1, 501):
    globals()[f'X_unbalance_ii_{i}'], globals()[f'y_unbalance_ii_{i}'] = get_features(globals()[f'data_unbalance_ii_{i}'][axis], "unbalance_2")
    X_list.append(globals()[f"X_unbalance_ii_{i}"])
    Y_list.append(globals()[f"y_unbalance_ii_{i}"])


for i in range(1, 1001):
    globals()[f'X_misaligment_{i}'], globals()[f'y_misaligment_{i}'] = get_features(globals()[f'data_misalignment_{i}'][axis], "misaligment")
    X_list.append(globals()[f"X_misaligment_{i}"])
    Y_list.append(globals()[f"y_misaligment_{i}"])

for i in range(1, 1001):
    globals()[f'X_bearing_{i}'], globals()[f'y_bearing_{i}'] = get_features(globals()[f'data_bearing_{i}'][axis], "bearing_fault")
    X_list.append(globals()[f"X_bearing_{i}"])
    Y_list.append(globals()[f"y_bearing_{i}"])


X=np.concatenate(X_list)
Y=np.concatenate(Y_list)


print(X.shape, Y.shape)

print('Done')

In [None]:
# Histogram used to visualize the number of samples per class (to verify whether the dataset is balanced).

custom_labels = [
    "Normal",
    "Unb. I",
    "Unb. II",
    "Misalig.",
    "Bearings"
]

# Criar histograma
plt.figure(figsize=(8,5))
plt.hist(Y, bins=np.arange(len(labels)+1)-0.5, edgecolor='black', rwidth=0.8)

# Ajustar eixos e rótulos
plt.xticks(range(len(labels)), custom_labels, rotation=0)
plt.xlabel("Class")
plt.ylabel("Absolute Frequency")
plt.title("Class Distribution for Dataset II")

plt.grid(axis='y', linestyle='--', alpha=0.6)

plt.show()

# --------------------------------------------------------

# Input Dataset III

In [None]:

for i in range(1, 50):
    globals()[f"data_normal_{i}"] = pd.read_csv(f'/kaggle/input/comfaulda/COMFAULDA_v2/normal/normal_{i}.csv', header=None, sep = '[;,]', engine = 'python')

for i in range(1, 49):
    globals()[f"data_unbalance_i_{i}"] = pd.read_csv(f'/kaggle/input/comfaulda/COMFAULDA_v2/unbalance_i/unbalance_6_{i}.csv', header=None, sep = '[;,]', engine = 'python')

for i in range(1, 49):
    globals()[f"data_unbalance_ii_{i}"] = pd.read_csv(f'/kaggle/input/comfaulda/COMFAULDA_v2/unbalance_ii/unbalance_20_{i}.csv', header=None, sep = '[;,]', engine = 'python')

for i in range(1, 49):
    globals()[f"data_unbalance_iii_{i}"] = pd.read_csv(f'/kaggle/input/comfaulda/COMFAULDA_v2/unbalance_iii/unbalance_35_{i}.csv', header=None, sep = '[;,]', engine = 'python')


for i in range(1, 50):
    globals()[f"data_misalignment_{i}"] = pd.read_csv(f'/kaggle/input/comfaulda/COMFAULDA_v2/misalignment/misalignment_{i}.csv', header=None,sep = '[;,]', engine = 'python')


for i in range(1, 40):
    globals()[f"data_unbalance_misaligment_{i}"] = pd.read_csv(f'/kaggle/input/comfaulda/COMFAULDA_v2/unbalance_misalignment/unbalance_misalignment_{i}.csv', header=None, sep = '[;,]', engine = 'python')




print('DATASET LOADED')

In [None]:
# Visualize which axis exhibits the highest amplitude.


df = globals()["data_unbalance_ii_42"]

axis_time = df.iloc[:, 0]
axis_x = df.iloc[:, 5]
axis_y = df.iloc[:, 7]
axis_z = df.iloc[:, 6]

plt.figure(figsize=(12, 6))
plt.plot(time, axis_x, label='X axis')
plt.plot(time, axis_y, label='Y axis')
plt.plot(time, axis_z, label='Z axis')
plt.title('Vibration in 3 axis - Unbalance II')
plt.xlabel('Time (s)')
plt.ylabel('Amplitude')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()


In [None]:
fs = 50000

window_time = 1                # Janela de 1 segundo
window = fs * window_time 

In [None]:

def get_features(data, label):
    n = int(np.floor(len(data)/window))
    data = data[:int(n)*window]
    X = data.values.reshape((n, window))
    y = np.ones(n)*labels[label]
    return X,y

In [None]:

labels = {'normal':0, 'unbalance_1':1, 'unbalance_2':2, 'unbalance_3':3, 'misaligment':4, 'unbalance_misaligment':5}
axis = 6


X_list = []
Y_list = []


for i in range(1, 50):
    globals()[f'X_normal_{i}'], globals()[f'y_normal_{i}'] = get_features(globals()[f'data_normal_{i}'][axis], "normal")
    X_list.append(globals()[f"X_normal_{i}"])
    Y_list.append(globals()[f"y_normal_{i}"])

for i in range(1, 49):
    globals()[f'X_unbalance_i_{i}'], globals()[f'y_unbalance_i_{i}'] = get_features(globals()[f'data_unbalance_i_{i}'][axis], "unbalance_1")
    X_list.append(globals()[f"X_unbalance_i_{i}"])
    Y_list.append(globals()[f"y_unbalance_i_{i}"])
    
for i in range(1, 49):
    globals()[f'X_unbalance_ii_{i}'], globals()[f'y_unbalance_ii_{i}'] = get_features(globals()[f'data_unbalance_ii_{i}'][axis], "unbalance_2")
    X_list.append(globals()[f"X_unbalance_ii_{i}"])
    Y_list.append(globals()[f"y_unbalance_ii_{i}"])

for i in range(1, 49):
    globals()[f'X_unbalance_iii_{i}'], globals()[f'y_unbalance_iii_{i}'] = get_features(globals()[f'data_unbalance_iii_{i}'][axis], "unbalance_3")
    X_list.append(globals()[f"X_unbalance_iii_{i}"])
    Y_list.append(globals()[f"y_unbalance_iii_{i}"])


for i in range(1, 50):
    globals()[f'X_misalignment_{i}'], globals()[f'y_misalignment_{i}'] = get_features(globals()[f'data_misalignment_{i}'][axis], "misaligment")
    X_list.append(globals()[f"X_misalignment_{i}"])
    Y_list.append(globals()[f"y_misalignment_{i}"])

for i in range(1, 40):
    globals()[f'X_unbalance_misaligment_{i}'], globals()[f'y_unbalance_misaligment_{i}'] = get_features(globals()[f'data_unbalance_misaligment_{i}'][axis], "unbalance_misaligment")
    X_list.append(globals()[f"X_unbalance_misaligment_{i}"])
    Y_list.append(globals()[f"y_unbalance_misaligment_{i}"])



X=np.concatenate(X_list)
Y=np.concatenate(Y_list)


print(X.shape, Y.shape)

print('Done')


In [None]:
# Histogram used to visualize the number of samples per class (to verify whether the dataset is balanced).

custom_labels = [
    "Normal",
    "Unb. I",
    "Unb. II",
    "Unb. III",
    "Misalig.",
    "Unb. II + Misalig."
]

# Criar histograma
plt.figure(figsize=(8,5))
plt.hist(Y, bins=np.arange(len(labels)+1)-0.5, edgecolor='black', rwidth=0.8)

# Ajustar eixos e rótulos
plt.xticks(range(len(labels)), custom_labels, rotation=0)
plt.xlabel("Class")
plt.ylabel("Absolute Frequency")
plt.title("Class Distribution for Dataset III")

plt.grid(axis='y', linestyle='--', alpha=0.6)

plt.show()

# --------------------------------------------------------

## Train, validation and test dataset Split

##### 80 % Train and  20% Test

In [None]:
X, Y = shuffle(X, Y, random_state=42)

In [None]:
# 20% of the data used for testing.
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)


print(X_train.shape, Y_train.shape, X_test.shape, Y_test.shape)

## 01 - SVM with raw signal (with no feature extraction)

In [None]:
#Signal Standardization

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.fit_transform(X_test)

In [None]:
# Signal normalization

scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


In [None]:
#Grid search (No Cross-validation)

start_time = time.time()

# Grid search hiperparameters
param_grid = [
    {'kernel': 'linear', 'C': C}
    for C in [0.1, 1, 10, 100]
] + [
    {'kernel': 'rbf', 'C': C, 'gamma': gamma}
    for C in [0.1, 1, 10, 100]
    for gamma in [0.001, 0.01, 0.1, 'scale']
] + [
    {'kernel': 'poly', 'C': C, 'degree': d, 'gamma': gamma}
    for C in [0.1, 1, 10]
    for d in [2, 3, 4]
    for gamma in [0.01, 0.1]
]


results = []


for params in param_grid:
    model = SVC(**params)
    model.fit(X_train_scaled, Y_train)
    Y_pred = model.predict(X_test_scaled)

    acc = accuracy_score(Y_test, Y_pred)
    f1 = f1_score(Y_test, Y_pred, average=None)
    report = classification_report(Y_test, Y_pred)

    results.append({
        'params': params,
        'accuracy': acc,
        'f1_score': f1,
        'report':report
    })

end_time = time.time()
elapsed_time = end_time - start_time


results_sorted = sorted(results, key=lambda x: x['accuracy'], reverse=True)
best = results_sorted[0]

report = classification_report(Y_test, Y_pred)


print("Melhores parâmetros encontrados:", best['params'])
print("Accuracy:", best['accuracy'])
print("F1-score per classe:", f1)
print("Tempo de treino (s):", elapsed_time)
print(report)
print(results)

In [None]:
#Train model without grid search

start_time = time.time()

#model = SVC(kernel='rbf', C=10, gamma = 'scale')
#model = SVC(kernel='poly', C=1, gamma = 0.01, degree = 2)
model = SVC(kernel='rbf', C=100, gamma = 'scale')

model.fit(X_train, Y_train)

end_time = time.time()
elapsed_time = end_time - start_time

print("Time spent in training the model (s):", elapsed_time)

In [None]:
Y_pred = model.predict(X_test)

print(classification_report(Y_test, Y_pred))

In [None]:
conf_matriz = confusion_matrix(Y_test, Y_pred)

conf_matriz_classes = ['Normal', 'Unb. I', 'Unb. II', 'Unb. III', 'Unb. IV']
# conf_matriz_classes = ['No unb.', 'Unb. I', 'Unb. II', 'Misalig.', 'Bearings']
# conf_matriz_classes = ['Normal', 'Unb. I', 'Unb. II','Unb. III', 'Misalig.', 'Unb. II + Misalig.']

plt.figure(figsize=(6, 5))
sns.heatmap(conf_matriz, annot=True, fmt='d', cmap='Blues',
            xticklabels=conf_matriz_classes, yticklabels=conf_matriz_classes,cbar=True)

plt.title('Confusion matrix')
plt.xlabel('Predicted Class')
plt.ylabel('Real Class')
plt.tight_layout()
plt.show()

In [None]:
# ROC Curve and AUC value

#Binarize the labels.
classes = list(labels.values())
Y_test_bin = label_binarize(Y_test, classes=classes)

Y_score = model.decision_function(X_test_scaled)

fpr = dict()
tpr = dict()
roc_auc = dict()

n_classes = len(classes)


for i in range(n_classes):
    fpr[i], tpr[i], _ = roc_curve(Y_test_bin[:, i], Y_score[:, i])
    roc_auc[i] = auc(fpr[i], tpr[i])



plt.figure(figsize=(8, 6))
colors = cycle(['aqua', 'darkorange', 'cornflowerblue', 'darkgreen', 'crimson'])


class_names = ['No unb.', 'Unb. I', 'Unb. II', 'Unb. III', 'Unb. IV']


for i, (color, name) in enumerate(zip(colors, class_names)):
    plt.plot(fpr[i], tpr[i], color=color, lw=2,
             label=f"{name} (AUC = {roc_auc[i]:.2f})")


plt.plot([0, 1], [0, 1], 'k--', lw=2)
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate (1 - Specificity)')
plt.ylabel('True Positive Rate (Specificity)')
plt.title('ROC Curves')
plt.legend(loc="lower right")
plt.grid(True)
plt.tight_layout()
plt.show()


## 02 - SVM with statistical features extraction

### Feature extraction

In [None]:
def extract_features(signal):
    features = []
    features.append(np.max(signal))     # maximum
    features.append(np.min(signal))     # minimum
    features.append(np.max(np.abs(signal))) #Peak
    features.append(np.ptp(signal))         # #Peak-to-peak
    features.append(np.mean(signal))        # mean
    features.append(np.std(signal))     # standart deviation
    features.append(np.sqrt(np.mean(signal**2)))  # RMS
    rms = np.sqrt(np.mean(signal**2)) # RMS
    peak_amplitude = np.max(np.abs(signal)) #Peak
    features.append(peak_amplitude / rms if rms != 0 else 0) #cres factor
    features.append(kurtosis(signal))                        #Kurtosis
    features.append(skew(signal))                       #Skewness             
    return features

In [None]:
X_train_features = []
X_test_features = []

for signal in X_train:
    featured_signal = extract_features(signal)
    X_train_features.append(featured_signal)
    
for signal in X_test:
    featured_signal = extract_features(signal)
    X_test_features.append(featured_signal)    


X_train_features = np.array(X_train_features) 
X_test_features =  np.array(X_test_features) 


print(X_train_features.shape)
print(X_test_features.shape)

In [None]:
# Grid Search (No Cross-validation)

start_time = time.time()

param_grid = [
    {'kernel': 'linear', 'C': C}
    for C in [0.1, 1, 10, 100]
] + [
    {'kernel': 'rbf', 'C': C, 'gamma': gamma}
    for C in [0.1, 1, 10, 100]
    for gamma in [0.001, 0.01, 0.1, 'scale']
] + [
    {'kernel': 'poly', 'C': C, 'degree': d, 'gamma': gamma}
    for C in [0.1, 1, 10]
    for d in [2, 3, 4]
    for gamma in [0.01, 0.1]
]

results = []

for params in param_grid:
    model = SVC(**params)
    model.fit(X_train_features, Y_train)
    Y_pred = model.predict(X_test_features)

    acc = accuracy_score(Y_test, Y_pred)
    f1_weighted = f1_score(Y_test, Y_pred, average='weighted')
    f1_per_class = f1_score(Y_test, Y_pred, average=None)

    results.append({
        'params': params,
        'accuracy': acc,
        'f1_weighted': f1_weighted,
        'f1_per_class': f1_per_class
    })

end_time = time.time()
elapsed_time = end_time - start_time

results_sorted = sorted(results, key=lambda x: x['f1_weighted'], reverse=True)
best = results_sorted[0]

best_model = SVC(**best['params'])
best_model.fit(X_train_features, Y_train)
best_pred = best_model.predict(X_test_features)
report = classification_report(Y_test, best_pred)

print("Melhores parâmetros encontrados:", best['params'])
print("Accuracy:", best['accuracy'])
print("F1-score por classe:", best['f1_per_class'])
print("F1-score ponderado:", best['f1_weighted'])
print("Tempo de treino (s):", elapsed_time)
print("\nClassification Report (melhor modelo):\n", report)
print(results)

In [None]:
#Train model without grid search

start_time = time.time()

#model = SVC(kernel='rbf', C=100, gamma = 0.1) #dataset I, II e III
model = SVC(kernel='rbf', C=100, gamma = 'scale')

model.fit(X_train_features, Y_train)

end_time = time.time()
elapsed_time = end_time - start_time

print("Time spent in training the model (s):", elapsed_time)

In [None]:
Y_pred = model.predict(X_test_features)

print(classification_report(Y_test, Y_pred))

In [None]:
conf_matriz = confusion_matrix(Y_test, Y_pred)

conf_matriz_classes = ['No unb.', 'Unb. I', 'Unb. II', 'Unb. III', 'Unb. IV']
#conf_matriz_classes = ['Normal', 'Unb. I', 'Unb. II', 'Misalig.', 'Bearings']
#conf_matriz_classes = ['Normal', 'Unb. I', 'Unb. II']
#conf_matriz_classes = ['Normal', 'Unb. I', 'Unb. II','Unb. III', 'Misalig.', 'Unb. II + Misalig.']

plt.figure(figsize=(6, 5))
sns.heatmap(conf_matriz, annot=True, fmt='d', cmap='Blues',
            xticklabels=conf_matriz_classes, yticklabels=conf_matriz_classes,cbar=True)

plt.title('Confusion matrix')
plt.xlabel('Predicted Class')
plt.ylabel('Real Class')
plt.tight_layout()
plt.show()

In [None]:
# ROC Curve and AUC value

classes = list(labels.values())
Y_test_bin = label_binarize(Y_test, classes=classes)

Y_score = model.decision_function(X_test_features)

fpr = dict()
tpr = dict()
roc_auc = dict()

n_classes = len(classes)


for i in range(n_classes):
    fpr[i], tpr[i], _ = roc_curve(Y_test_bin[:, i], Y_score[:, i])
    roc_auc[i] = auc(fpr[i], tpr[i])

plt.figure(figsize=(8, 6))
colors = cycle(['aqua', 'darkorange', 'cornflowerblue', 'darkgreen', 'crimson'])

class_names = ['No unb.', 'Unb. I', 'Unb. II', 'Unb. III', 'Unb. IV']


for i, (color, name) in enumerate(zip(colors, class_names)):
    plt.plot(fpr[i], tpr[i], color=color, lw=2,
             label=f"{name} (AUC = {roc_auc[i]:.2f})")


plt.plot([0, 1], [0, 1], 'k--', lw=2)
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate (1 - Specificity)')
plt.ylabel('True Positive Rate (Specificity)')
plt.title('ROC Curves')
plt.legend(loc="lower right")
plt.grid(True)
plt.tight_layout()
plt.show()


## 03 - SVM with statistical features extraction and PCA

### Feature extraction

In [None]:
def extract_features(signal):
    features = []
    features.append(np.max(signal))     # maximum
    features.append(np.min(signal))     # minimum
    features.append(np.max(np.abs(signal))) #Peak
    features.append(np.ptp(signal))         # #Peak-to-peak
    features.append(np.mean(signal))    # mean
    features.append(np.std(signal))     # standart deviation
    features.append(np.sqrt(np.mean(signal**2)))  # RMS
    rms = np.sqrt(np.mean(signal**2)) # RMS
    peak_amplitude = np.max(np.abs(signal)) #Peak
    features.append(peak_amplitude / rms if rms != 0 else 0) #cres factor
    features.append(kurtosis(signal))                        #Kurtosis
    features.append(skew(signal))                       #Skewness             
    return features

In [None]:
X_train_features = []
X_test_features = []

for signal in X_train:
    featured_signal = extract_features(signal)
    X_train_features.append(featured_signal)
    
for signal in X_test:
    featured_signal = extract_features(signal)
    X_test_features.append(featured_signal)    


X_train_features = np.array(X_train_features) 
X_test_features =  np.array(X_test_features) 


print(X_train_features.shape)
print(X_test_features.shape)

In [None]:
# Check whether accuracy varies with the number of principal components (PCs) applied.

results = []

for n_pca_components in range(2,11):
    
    pca = PCA(n_components = n_pca_components) 
    X_train_features_pca = pca.fit_transform(X_train_features)
    X_test_features_pca = pca.transform(X_test_features)


    model = SVC(kernel='rbf', C=100, gamma = 0.1)
    model.fit(X_train_features_pca, Y_train)
    Y_pred = model.predict(X_test_features_pca)

    acc = accuracy_score(Y_test, Y_pred)
    
    results.append((n_pca_components ,acc))

    print(f"n_components={n_pca_components} -> Accuracy = {acc:.4f}")


In [None]:
# Plot showing the relationship between the number of principal components (PCs) and model accuracy.

n_principal_components, accuracies = zip(*results)

plt.figure(figsize=(8, 5))
plt.plot(n_principal_components, accuracies, marker='o')
plt.title("Support Vector Machines Accuracy vs. Number of Principal Components")
plt.xlabel("Number of Principal Components (PCs)")
plt.ylabel("SVM model accuracy")
plt.xlim(2,10)
plt.grid(True)
plt.show()


In [None]:
# Apply PCA

n_of_pca_components = 6   # nºo of PCA components < n_features

pca = PCA(n_components = n_of_pca_components)

X_train_features_pca = pca.fit_transform(X_train_features)

X_test_features_pca = pca.transform(X_test_features)


print("Shape after PCA:", X_train_features_pca.shape)

print(pca.components_.shape)

In [None]:
#Train model without grid search

start_time = time.time()


# model = SVC(kernel='rbf', C=100, gamma = 0.1) #dataset I, II e III
model = SVC(kernel='rbf', C=100, gamma = 'scale')

model.fit(X_train_features_pca, Y_train)

end_time = time.time()
elapsed_time = end_time - start_time

print("Time spent in training the model (s):", elapsed_time)

In [None]:
Y_pred = model.predict(X_test_features_pca)

print(classification_report(Y_test, Y_pred))

In [None]:
conf_matriz = confusion_matrix(Y_test, Y_pred)

# conf_matriz_classes = ['No unb.', 'Unb. I', 'Unb. II', 'Unb. III', 'Unb. IV']
#conf_matriz_classes = ['No unb.', 'Unb. I', 'Unb. II', 'Misalig.', 'Bearings']
conf_matriz_classes = ['Normal', 'Unb. I', 'Unb. II']
#conf_matriz_classes = ['Normal', 'Unb. I', 'Unb. II','Unb. III', 'Misalig.', 'Unb. II + Misalig.']

plt.figure(figsize=(6, 5))
sns.heatmap(conf_matriz, annot=True, fmt='d', cmap='Blues',
            xticklabels=conf_matriz_classes, yticklabels=conf_matriz_classes,cbar=True)

plt.title('Confusion matrix')
plt.xlabel('Predicted Class')
plt.ylabel('Real Class')
plt.tight_layout()
plt.show()

In [None]:
# ROC Curve and AUC value

classes = list(labels.values())
Y_test_bin = label_binarize(Y_test, classes=classes)

Y_score = model.decision_function(X_test_features_pca)

fpr = dict()
tpr = dict()
roc_auc = dict()

n_classes = len(classes)


for i in range(n_classes):
    fpr[i], tpr[i], _ = roc_curve(Y_test_bin[:, i], Y_score[:, i])
    roc_auc[i] = auc(fpr[i], tpr[i])


plt.figure(figsize=(8, 6))
colors = cycle(['aqua', 'darkorange', 'cornflowerblue', 'darkgreen', 'crimson'])

class_names = ['No unb.', 'Unb. I', 'Unb. II', 'Unb. III', 'Unb. IV']

for i, (color, name) in enumerate(zip(colors, class_names)):
    plt.plot(fpr[i], tpr[i], color=color, lw=2,
             label=f"{name} (AUC = {roc_auc[i]:.2f})")



plt.plot([0, 1], [0, 1], 'k--', lw=2)
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate (1 - Specificity)')
plt.ylabel('True Positive Rate (Specificity)')
plt.title('ROC Curves')
plt.legend(loc="lower right")
plt.grid(True)
plt.tight_layout()
plt.show()


In [None]:
# Grid Search (No Cross-validation)

start_time = time.time()

param_grid = [
    {'kernel': 'linear', 'C': C}
    for C in [0.1, 1, 10, 100]
] + [
    {'kernel': 'rbf', 'C': C, 'gamma': gamma}
    for C in [0.1, 1, 10, 100]
    for gamma in [0.001, 0.01, 0.1, 'scale']
] + [
    {'kernel': 'poly', 'C': C, 'degree': d, 'gamma': gamma}
    for C in [0.1, 1, 10]
    for d in [2, 3, 4]
    for gamma in [0.01, 0.1]
]

results = []

for params in param_grid:
    model = SVC(**params)
    model.fit(X_train_features_pca, Y_train)
    Y_pred = model.predict(X_test_features_pca)

    acc = accuracy_score(Y_test, Y_pred)
    f1_weighted = f1_score(Y_test, Y_pred, average='weighted')
    f1_per_class = f1_score(Y_test, Y_pred, average=None)

    results.append({
        'params': params,
        'accuracy': acc,
        'f1_weighted': f1_weighted,
        'f1_per_class': f1_per_class
    })

end_time = time.time()
elapsed_time = end_time - start_time

results_sorted = sorted(results, key=lambda x: x['f1_weighted'], reverse=True)
best = results_sorted[0]

best_model = SVC(**best['params'])
best_model.fit(X_train_features_pca, Y_train)
best_pred = best_model.predict(X_test_features_pca)
report = classification_report(Y_test, best_pred)

print("Melhores parâmetros encontrados:", best['params'])
print("Accuracy:", best['accuracy'])
print("F1-score por classe:", best['f1_per_class'])
print("F1-score ponderado:", best['f1_weighted'])
print("Tempo de treino (s):", elapsed_time)
print("\nClassification Report (melhor modelo):\n", report)
print(results)

## 04 - SVM with FFT features extraction

### Extract frequency features (FFT)

In [None]:
def extract_features_fft(signal):
    fft_vals = fft(signal)
    fft_magnitude = np.abs(fft_vals)[:len(signal)//2]
    
    return fft_magnitude

In [None]:
X_train_fft = []
X_test_fft = []

for signal in X_train:
    featured_signal = extract_features_fft(signal)
    X_train_fft.append(featured_signal)
    
for signal in X_test:
    featured_signal = extract_features_fft(signal)
    X_test_fft.append(featured_signal)    
    
X_train_fft =  np.array(X_train_fft)   
X_test_fft = np.array(X_test_fft)


print(X_train_fft.shape)
print(X_test_fft.shape)

In [None]:
# Grid search (No Cross-validation)

start_time = time.time()

param_grid = [
    {'kernel': 'linear', 'C': C}
    for C in [0.1, 1, 10, 100]
] + [
    {'kernel': 'rbf', 'C': C, 'gamma': gamma}
    for C in [0.1, 1, 10, 100]
    for gamma in [0.001, 0.01, 0.1, 'scale']
] + [
    {'kernel': 'poly', 'C': C, 'degree': d, 'gamma': gamma}
    for C in [0.1, 1, 10]
    for d in [2, 3, 4]
    for gamma in [0.01, 0.1]
]

results = []

for params in param_grid:
    model = SVC(**params)
    model.fit(X_train_fft, Y_train)
    Y_pred = model.predict(X_test_fft)

    acc = accuracy_score(Y_test, Y_pred)
    f1_weighted = f1_score(Y_test, Y_pred, average='weighted')
    f1_per_class = f1_score(Y_test, Y_pred, average=None)

    results.append({
        'params': params,
        'accuracy': acc,
        'f1_weighted': f1_weighted,
        'f1_per_class': f1_per_class
    })

end_time = time.time()
elapsed_time = end_time - start_time


results_sorted = sorted(results, key=lambda x: x['f1_weighted'], reverse=True)
best = results_sorted[0]


best_model = SVC(**best['params'])
best_model.fit(X_train_fft, Y_train)
best_pred = best_model.predict(X_test_fft)
report = classification_report(Y_test, best_pred)


print("Melhores parâmetros encontrados:", best['params'])
print("Accuracy:", best['accuracy'])
print("F1-score por classe:", best['f1_per_class'])
print("F1-score ponderado:", best['f1_weighted'])
print("Tempo de treino (s):", elapsed_time)
print("\nClassification Report (melhor modelo):\n", report)
print(results)


In [None]:
#Train model without grid search

start_time = time.time()

#model = SVC(kernel='rbf', C=100, gamma = 0.001) #dataset I
#model = SVC(kernel='poly', C=1, gamma = 0.01, degree = 2) 
# model = SVC(kernel='linear', C=0.1)   #dataset III

model = SVC(kernel='rbf', C=100, gamma = 'scale')



model.fit(X_train_fft, Y_train)

end_time = time.time()
elapsed_time = end_time - start_time

print("Time spent in training the model (s):", elapsed_time)

In [None]:
Y_pred = model.predict(X_test_fft)

print(classification_report(Y_test, Y_pred))

In [None]:
conf_matriz = confusion_matrix(Y_test, Y_pred)

conf_matriz_classes = ['No unb.', 'Unb. I', 'Unb. II', 'Unb. III', 'Unb. IV']
# conf_matriz_classes = ['No unb.', 'Unb. I', 'Unb. II', 'Misalig.', 'Bearings']
#conf_matriz_classes = ['Normal', 'Unb. I', 'Unb. II','Unb. III', 'Misalig.', 'Unb. II + Misalig.']

plt.figure(figsize=(6, 5))
sns.heatmap(conf_matriz, annot=True, fmt='d', cmap='Blues',
            xticklabels=conf_matriz_classes, yticklabels=conf_matriz_classes,cbar=True)

plt.title('Confusion matrix')
plt.xlabel('Predicted Class')
plt.ylabel('Real Class')
plt.tight_layout()
plt.show()

In [None]:
# ROC Curve and AUC value  (Apenas para Dataset I)

classes = list(labels.values())  # [0, 1, 2, 3, 4]
Y_test_bin = label_binarize(Y_test, classes=classes)

Y_score = model.decision_function(X_test_fft)

fpr = dict()
tpr = dict()
roc_auc = dict()

n_classes = len(classes)


for i in range(n_classes):
    fpr[i], tpr[i], _ = roc_curve(Y_test_bin[:, i], Y_score[:, i])
    roc_auc[i] = auc(fpr[i], tpr[i])


plt.figure(figsize=(8, 6))
colors = cycle(['aqua', 'darkorange', 'cornflowerblue', 'darkgreen', 'crimson'])

class_names = ['No unb.', 'Unb. I', 'Unb. II', 'Unb. III', 'Unb. IV']

for i, (color, name) in enumerate(zip(colors, class_names)):
    plt.plot(fpr[i], tpr[i], color=color, lw=2,
             label=f"{name} (AUC = {roc_auc[i]:.2f})")


plt.plot([0, 1], [0, 1], 'k--', lw=2)
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate (1 - Specificity)')
plt.ylabel('True Positive Rate (Specificity)')
plt.title('ROC Curves')
plt.legend(loc="lower right")
plt.grid(True)
plt.tight_layout()
plt.show()


## 05 - SVM with noisy data (Train Dataset noisy + Test Dataset not noisy)

In [None]:
def add_white_noise(signal, snr_db):
    signal_power = np.mean(signal**2)
    snr_linear = 10 ** (snr_db / 10)
    noise_power = signal_power / snr_linear
    noise = np.random.normal(0, np.sqrt(noise_power), signal.shape)
    return signal + noise

In [None]:
X_train_noisy = np.array([add_white_noise(signal, snr_db=10) for signal in X_train]) 

# snr_db=30 → Almost noise-free.
# snr_db=10 → Moderate noise.
# snr_db=5 → High noise.

print(X_train_noisy.shape)

In [None]:
def extract_features_fft(signal):
    fft_vals = fft(signal)
    fft_magnitude = np.abs(fft_vals)[:len(signal)//2]
    
    return fft_magnitude

In [None]:
X_train_fft_noisy = []
X_test_fft = []

for signal in X_train_noisy:
    featured_signal = extract_features_fft(signal)
    X_train_fft_noisy.append(featured_signal)
    
for signal in X_test:
    featured_signal = extract_features_fft(signal)
    X_test_fft.append(featured_signal)    
    
X_train_fft_noisy =  np.array(X_train_fft_noisy)   
X_test_fft = np.array(X_test_fft)


print(X_train_fft_noisy.shape)
print(X_test_fft.shape)

In [None]:
#Train model without grid search

start_time = time.time()

#model = SVC(kernel='rbf', C=100, gamma = 0.001)
model = SVC(kernel='rbf', C=100, gamma = 'scale')

model.fit(X_train_fft_noisy, Y_train)

end_time = time.time()
elapsed_time = end_time - start_time

print("Time spent in training the model (s):", elapsed_time)

In [None]:
Y_pred = model.predict(X_test_fft)

print(classification_report(Y_test, Y_pred))

In [None]:
conf_matriz = confusion_matrix(Y_test, Y_pred)

conf_matriz_classes = ['No unb.', 'Unb. I', 'Unb. II', 'Unb. III', 'Unb. IV']
# conf_matriz_classes = ['No unb.', 'Unb. I', 'Unb. II', 'Misalig.', 'Bearings']
# conf_matriz_classes = ['Normal', 'Unb. I', 'Unb. II','Unb. III', 'Misalig.', 'Unb. II + Misalig.']

plt.figure(figsize=(6, 5))
sns.heatmap(conf_matriz, annot=True, fmt='d', cmap='Blues',
            xticklabels=conf_matriz_classes, yticklabels=conf_matriz_classes,cbar=False)

plt.title('Confusion matrix')
plt.xlabel('Predicted Classe')
plt.ylabel('Real Classe')
plt.tight_layout()
plt.show()

## 06 - SVM with noisy data (Train Dataset not noisy + Test Dataset noisy)

In [None]:
def add_white_noise(signal, snr_db):
    signal_power = np.mean(signal**2)
    snr_linear = 10 ** (snr_db / 10)
    noise_power = signal_power / snr_linear
    noise = np.random.normal(0, np.sqrt(noise_power), signal.shape)
    return signal + noise

In [None]:
X_test_noisy = np.array([add_white_noise(signal, snr_db=10) for signal in X_test]) 

print(X_test_noisy.shape)

In [None]:
def extract_features_fft(signal):
    fft_vals = fft(signal)
    fft_magnitude = np.abs(fft_vals)[:len(signal)//2]
    
    return fft_magnitude

In [None]:
X_train_fft = []
X_test_fft_noisy = []

for signal in X_train:
    featured_signal = extract_features_fft(signal)
    X_train_fft.append(featured_signal)
    
for signal in X_test_noisy:
    featured_signal = extract_features_fft(signal)
    X_test_fft_noisy.append(featured_signal)    
    
X_train_fft =  np.array(X_train_fft)   
X_test_fft_noisy = np.array(X_test_fft_noisy)


print(X_train_fft.shape)
print(X_test_fft_noisy.shape)

In [None]:
#Train model without grid search

start_time = time.time()

#model = SVC(kernel='rbf', C=100, gamma = 0.001)
model = SVC(kernel='rbf', C=100, gamma = 'scale')

model.fit(X_train_fft, Y_train)

end_time = time.time()
elapsed_time = end_time - start_time

print("Time spent in training the model (s):", elapsed_time)

In [None]:
Y_pred = model.predict(X_test_fft_noisy)

print(classification_report(Y_test, Y_pred))

In [None]:
conf_matriz = confusion_matrix(Y_test, Y_pred)

conf_matriz_classes = ['No unb.', 'Unb. I', 'Unb. II', 'Unb. III', 'Unb. IV']
# conf_matriz_classes = ['No unb.', 'Unb. I', 'Unb. II', 'Misalig.', 'Bearings']
# conf_matriz_classes = ['Normal', 'Unb. I', 'Unb. II','Unb. III', 'Misalig.', 'Unb. II + Misalig.']

plt.figure(figsize=(6, 5))
sns.heatmap(conf_matriz, annot=True, fmt='d', cmap='Blues',
            xticklabels=conf_matriz_classes, yticklabels=conf_matriz_classes,cbar=False)

plt.title('Confusion matrix')
plt.xlabel('Predicted Classe')
plt.ylabel('Real Classe')
plt.tight_layout()
plt.show()

## 07 - SVM with noisy data (Train Dataset noisy + Test Dataset noisy)

In [None]:
def add_white_noise(signal, snr_db):
    signal_power = np.mean(signal**2)
    snr_linear = 10 ** (snr_db / 10)
    noise_power = signal_power / snr_linear
    noise = np.random.normal(0, np.sqrt(noise_power), signal.shape)
    return signal + noise

In [None]:
X_train_noisy = np.array([add_white_noise(signal, snr_db=10) for signal in X_train]) 
X_test_noisy = np.array([add_white_noise(signal, snr_db=10) for signal in X_test]) 

print(X_train_noisy.shape)
print(X_test_noisy.shape)

In [None]:
def extract_features_fft(signal):
    fft_vals = fft(signal)
    fft_magnitude = np.abs(fft_vals)[:len(signal)//2]
    
    return fft_magnitude
 

In [None]:
X_train_fft_noisy = []
X_test_fft_noisy = []

for signal in X_train_noisy:
    featured_signal = extract_features_fft(signal)
    X_train_fft_noisy.append(featured_signal)
    
for signal in X_test_noisy:
    featured_signal = extract_features_fft(signal)
    X_test_fft_noisy.append(featured_signal)    
    
X_train_fft_noisy =  np.array(X_train_fft_noisy)   
X_test_fft_noisy = np.array(X_test_fft_noisy)


print(X_train_fft_noisy.shape)
print(X_test_fft_noisy.shape)

In [None]:
#Train model without grid search

start_time = time.time()

#model = SVC(kernel='rbf', C=100, gamma = 0.001)
model = SVC(kernel='rbf', C=100, gamma = 'scale')

model.fit(X_train_fft_noisy, Y_train)

end_time = time.time()
elapsed_time = end_time - start_time

print("Time spent in training the model (s):", elapsed_time)

In [None]:
Y_pred = model.predict(X_test_fft_noisy)

print(classification_report(Y_test, Y_pred))

In [None]:
conf_matriz = confusion_matrix(Y_test, Y_pred)

conf_matriz_classes = ['No unb.', 'Unb. I', 'Unb. II', 'Unb. III', 'Unb. IV']
# conf_matriz_classes = ['No unb.', 'Unb. I', 'Unb. II', 'Misalig.', 'Bearings']
# conf_matriz_classes = ['Normal', 'Unb. I', 'Unb. II','Unb. III', 'Misalig.', 'Unb. II + Misalig.']

plt.figure(figsize=(6, 5))
sns.heatmap(conf_matriz, annot=True, fmt='d', cmap='Blues',
            xticklabels=conf_matriz_classes, yticklabels=conf_matriz_classes,cbar=False)

plt.title('Confusion matrix')
plt.xlabel('Predicted Classe')
plt.ylabel('Real Classe')
plt.tight_layout()
plt.show()