In [39]:
import numpy as np
import pandas as pd

from plotly.offline import init_notebook_mode
init_notebook_mode(connected=True)

In [40]:
df = pd.read_csv("predictive_maintenance.csv")

In [41]:
df_numeric = df.loc[:,['Air temperature [K]','Process temperature [K]','Rotational speed [rpm]','Torque [Nm]','Tool wear [min]']]
df_cat    = df.loc[:,['Type']]

In [42]:
df_numeric.skew()

Air temperature [K]        0.114274
Process temperature [K]    0.015027
Rotational speed [rpm]     1.993171
Torque [Nm]               -0.009517
Tool wear [min]            0.027292
dtype: float64

In [43]:
pd.pivot_table(df,
               index   = 'Failure Type', 
               columns = 'Type', 
               values  = 'UDI',
               aggfunc ='count')

Type,H,L,M
Failure Type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Heat Dissipation Failure,8,74,30
No Failure,979,5757,2916
Overstrain Failure,1,73,4
Power Failure,5,59,31
Random Failures,4,12,2
Tool Wear Failure,6,25,14


In [44]:
pd.pivot_table(df,
               index   = 'Target', 
               columns = 'Type', 
               values  = 'UDI',
               aggfunc ='count')

Type,H,L,M
Target,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,982,5765,2914
1,21,235,83


In [45]:
from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()
df['Type']         = le.fit_transform(df.loc[:,["Type"]].values)
df['Failure Type'] = le.fit_transform(df.loc[:,["Failure Type"]].values)


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().



In [46]:
from sklearn.preprocessing import LabelEncoder

# Assuming df['Type'] and df['Failure Type'] have been encoded using LabelEncoder
le = LabelEncoder()

# Fit the LabelEncoder to your data
le.fit(df['Type'])

# View the classes
classes_type = le.classes_
print("Classes for 'Type':", classes_type)

# Fit the LabelEncoder to your other data
le.fit(df['Failure Type'])

# View the classes
classes_failure_type = le.classes_
print("Classes for 'Failure Type':", classes_failure_type)


Classes for 'Type': [0 1 2]
Classes for 'Failure Type': [0 1 2 3 4 5]


In [47]:
df = df.drop(["UDI","Product ID"],axis = 1)

In [48]:
from sklearn.model_selection import train_test_split

X  = df.iloc[:, :-2].values
y  = df.loc[:,['Failure Type']].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [49]:
from sklearn.preprocessing import StandardScaler

scaler       = StandardScaler()
X_train_sc   = scaler.fit_transform(X_train)                # Fit and transform the training set 
X_test_sc    = scaler.transform(X_test)                     # DO NOT CHEAT! Only transform the test set

In [50]:
!pip install xgboost



In [51]:
import xgboost as xgb
from xgboost import XGBClassifier
from sklearn.multioutput import MultiOutputClassifier

In [52]:
xgb_clf = XGBClassifier()
xgb_clf.fit(X_train, y_train)
print("Multi-Output Training Accuracy: ", xgb_clf.score(X_train, y_train)*100, "%")




A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().



Multi-Output Training Accuracy:  100.0 %


In [53]:
y_pred_xgb   = xgb_clf.predict(X_test)

In [54]:
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score

print("Test Accuracy (Failure Type) : ",accuracy_score(y_test, y_pred_xgb)*100,"%")

Test Accuracy (Failure Type) :  98.4 %


In [55]:
import joblib
joblib.dump(xgb_clf, 'xgboost_model.pkl')

['xgboost_model.pkl']