In [None]:
#import necessary libraries

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import classification_report

In [None]:
data = pd.read_csv('predictive_maintenance.csv')

In [None]:
data.head()

Unnamed: 0,UDI,Product ID,Type,Air temperature [K],Process temperature [K],Rotational speed [rpm],Torque [Nm],Tool wear [min],Target,Failure Type
0,1,M14860,M,298.1,308.6,1551,42.8,0,0,No Failure
1,2,L47181,L,298.2,308.7,1408,46.3,3,0,No Failure
2,3,L47182,L,298.1,308.5,1498,49.4,5,0,No Failure
3,4,L47183,L,298.2,308.6,1433,39.5,7,0,No Failure
4,5,L47184,L,298.2,308.7,1408,40.0,9,0,No Failure


In [None]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 10 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   UDI                      10000 non-null  int64  
 1   Product ID               10000 non-null  object 
 2   Type                     10000 non-null  object 
 3   Air temperature [K]      10000 non-null  float64
 4   Process temperature [K]  10000 non-null  float64
 5   Rotational speed [rpm]   10000 non-null  int64  
 6   Torque [Nm]              10000 non-null  float64
 7   Tool wear [min]          10000 non-null  int64  
 8   Target                   10000 non-null  int64  
 9   Failure Type             10000 non-null  object 
dtypes: float64(3), int64(4), object(3)
memory usage: 781.4+ KB


In [None]:
#data cleaning/preprocessing
#We'll be removing all unrequired columns or features

data = data.drop(['UDI', 'Product ID', 'Failure Type'], axis=1)

In [None]:
data.head()

Unnamed: 0,Type,Air temperature [K],Process temperature [K],Rotational speed [rpm],Torque [Nm],Tool wear [min],Target
0,M,298.1,308.6,1551,42.8,0,0
1,L,298.2,308.7,1408,46.3,3,0
2,L,298.1,308.5,1498,49.4,5,0
3,L,298.2,308.6,1433,39.5,7,0
4,L,298.2,308.7,1408,40.0,9,0


In [None]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 7 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   Type                     10000 non-null  object 
 1   Air temperature [K]      10000 non-null  float64
 2   Process temperature [K]  10000 non-null  float64
 3   Rotational speed [rpm]   10000 non-null  int64  
 4   Torque [Nm]              10000 non-null  float64
 5   Tool wear [min]          10000 non-null  int64  
 6   Target                   10000 non-null  int64  
dtypes: float64(3), int64(3), object(1)
memory usage: 547.0+ KB


In [None]:
#mapping values
data['Type'] = data['Type'].map({'L':1, 'M':2, 'H':3})

In [None]:
data['Type'].value_counts()

1    6000
2    2997
3    1003
Name: Type, dtype: int64

In [None]:
data.head()

Unnamed: 0,Type,Air temperature [K],Process temperature [K],Rotational speed [rpm],Torque [Nm],Tool wear [min],Target
0,2,298.1,308.6,1551,42.8,0,0
1,1,298.2,308.7,1408,46.3,3,0
2,1,298.1,308.5,1498,49.4,5,0
3,1,298.2,308.6,1433,39.5,7,0
4,1,298.2,308.7,1408,40.0,9,0


In [None]:
x = data.drop('Target', axis=1)
y = data['Target']

In [None]:
x.shape

(10000, 6)

In [None]:
y.shape

(10000,)

In [None]:
#data transformation

scaler = MinMaxScaler()
x = pd.DataFrame(scaler.fit_transform(x), columns=x.columns)

In [None]:
x.head()

Unnamed: 0,Type,Air temperature [K],Process temperature [K],Rotational speed [rpm],Torque [Nm],Tool wear [min]
0,0.5,0.304348,0.358025,0.222934,0.535714,0.0
1,0.0,0.315217,0.37037,0.139697,0.583791,0.011858
2,0.0,0.304348,0.345679,0.192084,0.626374,0.019763
3,0.0,0.315217,0.358025,0.154249,0.490385,0.027668
4,0.0,0.315217,0.37037,0.139697,0.497253,0.035573


In [None]:
x.columns = ['Type', 'Air Temperature', 'Process Temperature', 'Rotational Speed', 'Torque', 'Tool Wear']

In [None]:
x_train, x_test, y_train, y_test = train_test_split(x, y, stratify=y, random_state=42)

In [None]:
x_train.shape, x_test.shape, y_train.shape, y_test.shape

((7500, 6), (2500, 6), (7500,), (2500,))

In [None]:
'''
tree_params = {'max_depth': np.arange(1, 20)}
grid_search_tree = GridSearchCV(DecisionTreeClassifier(), tree_params, cv=5, scoring='accuracy')
grid_search_tree.fit(x_train, y_train)
grid_search_tree.best_params_
'''

In [None]:
clf_tree = DecisionTreeClassifier(max_depth=10)

In [None]:
clf_tree.fit(x_train, y_train)

In [None]:
predict_tree = clf_tree.predict(x_test)

In [None]:
acc_tree = clf_tree.score(x_test, y_test)

In [None]:
acc_tree

0.9844

In [None]:
print(classification_report(y_test, predict_tree))

              precision    recall  f1-score   support

           0       0.99      0.99      0.99      2415
           1       0.77      0.76      0.77        85

    accuracy                           0.98      2500
   macro avg       0.88      0.88      0.88      2500
weighted avg       0.98      0.98      0.98      2500



In [None]:
print(f'Decision Tree accuracy = {(acc_tree * 100):.2f}')

Decision Tree accuracy = 98.44
