# Imports

In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
import numpy as np



## Processo de carregar e filtrar os dados do dataset

In [3]:
# Carregar o dataset em um dataframe
df = pd.read_csv('./datasets/predictive_maintenance.csv')

In [4]:
# Deletar colunas que não são uteis
df_processed = df.drop(['UDI', 'Product ID', 'Target'], axis=1)

# A coluna Failure Type é a coluna que vai ser preditada precisa ser convertida de texto para números

label_encoder = LabelEncoder()
df_processed['Failure Type'] = label_encoder.fit_transform(df_processed['Failure Type'])

# A coluna 'Type' é uma feature. Vamos usar a codificação one-hot para convertê-la
# Isso impede que o modelo pense que 'L' < 'M' < 'H'.
df_processed = pd.get_dummies(df_processed, columns=['Type'], drop_first=True)

# Separando as colunas preditada e o resto
X = df_processed.drop('Failure Type', axis=1)
y = df_processed['Failure Type']

print("\nProcessed Features (X) head:")
print(X.head())


Processed Features (X) head:
   Air temperature [K]  Process temperature [K]  Rotational speed [rpm]  \
0                298.1                    308.6                    1551   
1                298.2                    308.7                    1408   
2                298.1                    308.5                    1498   
3                298.2                    308.6                    1433   
4                298.2                    308.7                    1408   

   Torque [Nm]  Tool wear [min]  Type_L  Type_M  
0         42.8                0   False    True  
1         46.3                3    True   False  
2         49.4                5    True   False  
3         39.5                7    True   False  
4         40.0                9    True   False  


In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)


print(f"\nQuantidade de linhas de Treinamento: {X_train.shape}")
print(f"\nQuantidade de linhas de Teste: {X_test.shape}")


Quantidade de linhas de Treinamento: (8000, 7)

Quantidade de linhas de Teste: (2000, 7)


In [6]:

# Colunas que são numericas e precisam de normalização
numerical_features = ['Air temperature [K]', 'Process temperature [K]', 'Rotational speed [rpm]', 'Torque [Nm]', 'Tool wear [min]']

#Inicializar o scaler
scaler = StandardScaler()

# Colocar o scaler para aprender os parâmetros de normalização (média e desvio padrão) a partir dos dados de treino
scaler.fit(X_train[numerical_features])

# Transoformar os dados de treino e teste
X_train[numerical_features] = scaler.transform(X_train[numerical_features])
X_test[numerical_features] = scaler.transform(X_test[numerical_features])

print("\nNormalized Training Data (X_train) head:")
print(X_train.head())


Normalized Training Data (X_train) head:
      Air temperature [K]  Process temperature [K]  Rotational speed [rpm]  \
7755             0.197284                 1.212185               -0.978427   
2671            -0.152754                -0.475872               -0.772318   
4377             0.897361                -0.273305               -0.566209   
3137             0.197284                -0.070738                0.163529   
1935            -0.952842                -1.421183               -0.894869   

      Torque [Nm]  Tool wear [min]  Type_L  Type_M  
7755     0.757249         1.259463   False    True  
2671     0.185550         1.778517   False    True  
4377     0.897666         1.668415   False    True  
3137    -0.356059        -0.565093    True   False  
1935     1.509485        -1.036961   False    True  


# Testes


In [10]:
param_grid_Gaussian_NB = {
    'var_smoothing': [1e-9, 1e-8, 1e-7]
}

naive_bayes = GaussianNB()

param_grid_Gaussian_NB = {
    'var_smoothing': np.logspace(0,-9, num=100)
}

gs_nb = GridSearchCV(estimator=naive_bayes, param_grid=param_grid_Gaussian_NB, scoring='accuracy', cv=5, n_jobs=-1)
gs_nb.fit(X_train, y_train)

0,1,2
,estimator,GaussianNB()
,param_grid,{'var_smoothing': array([1.0000...00000000e-09])}
,scoring,'accuracy'
,n_jobs,-1
,refit,True
,cv,5
,verbose,0
,pre_dispatch,'2*n_jobs'
,error_score,
,return_train_score,False

0,1,2
,priors,
,var_smoothing,np.float64(1.0)


In [8]:
naive_bayes = GaussianNB()

naive_bayes.fit(X_train, y_train)

y_pred = naive_bayes.predict(X_test)

In [11]:
print("Acuracia = ", accuracy_score(y_test, y_pred))
print(classification_report(y_true=y_test, y_pred=y_pred, zero_division=0.0))

Acuracia =  0.9485
              precision    recall  f1-score   support

           0       0.44      0.77      0.56        22
           1       0.98      0.96      0.97      1930
           2       0.48      0.94      0.64        16
           3       0.11      0.21      0.14        19
           4       0.00      0.00      0.00         4
           5       0.00      0.00      0.00         9

    accuracy                           0.95      2000
   macro avg       0.33      0.48      0.38      2000
weighted avg       0.96      0.95      0.95      2000



##