In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import warnings
import matplotlib
warnings.filterwarnings("ignore")
pd.set_option("display.max_rows",None)
pd.set_option("display.max_columns",None)


In [2]:
plt.style.use('dark_background')

In [3]:
df=pd.read_csv("predictive_maintenance.csv")

In [4]:
df.columns

Index(['UDI', 'Product ID', 'Type', 'Air temperature [K]',
       'Process temperature [K]', 'Rotational speed [rpm]', 'Torque [Nm]',
       'Tool wear [min]', 'Target', 'Failure Type'],
      dtype='object')

In [5]:
df=df.drop(["UDI","Product ID"],axis=1)

In [6]:
df.shape

(10000, 8)

In [7]:
## Converting temperature in centigrate from Kelvin [1 K = -273.15 °C  ] 

df["Air temperature [K]"] = df["Air temperature [K]"] - 273.15
df["Process temperature [K]"] = df["Process temperature [K]"] - 273.15

# Renaming temperature in Centigrate(°C) from Kelvin (K)
df.rename(columns={"Air temperature [K]" : "Air temperature [°C]","Process temperature [K]" : "Process temperature [°C]"},inplace=True)

In [8]:
df["Temperature difference [°C]"] = df["Process temperature [°C]"] - df["Air temperature [°C]"]
df.sample(5)

Unnamed: 0,Type,Air temperature [°C],Process temperature [°C],Rotational speed [rpm],Torque [Nm],Tool wear [min],Target,Failure Type,Temperature difference [°C]
4075,L,28.85,37.45,1583,38.9,94,0,No Failure,8.6
6151,L,27.95,38.05,1555,35.3,172,0,No Failure,10.1
1843,L,24.45,33.95,1496,39.2,12,0,No Failure,9.5
396,L,24.25,35.15,1490,42.6,164,0,No Failure,10.9
9379,L,24.35,35.05,1419,48.5,108,0,No Failure,10.7


In [9]:
## check for missing values
df.isnull().sum()

Type                           0
Air temperature [°C]           0
Process temperature [°C]       0
Rotational speed [rpm]         0
Torque [Nm]                    0
Tool wear [min]                0
Target                         0
Failure Type                   0
Temperature difference [°C]    0
dtype: int64

In [10]:
df.dtypes

Type                            object
Air temperature [°C]           float64
Process temperature [°C]       float64
Rotational speed [rpm]           int64
Torque [Nm]                    float64
Tool wear [min]                  int64
Target                           int64
Failure Type                    object
Temperature difference [°C]    float64
dtype: object

In [11]:
df.describe()

Unnamed: 0,Air temperature [°C],Process temperature [°C],Rotational speed [rpm],Torque [Nm],Tool wear [min],Target,Temperature difference [°C]
count,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0
mean,26.85493,36.85556,1538.7761,39.98691,107.951,0.0339,10.00063
std,2.000259,1.483734,179.284096,9.968934,63.654147,0.180981,1.001094
min,22.15,32.55,1168.0,3.8,0.0,0.0,7.6
25%,25.15,35.65,1423.0,33.2,53.0,0.0,9.3
50%,26.95,36.95,1503.0,40.1,108.0,0.0,9.8
75%,28.35,37.95,1612.0,46.8,162.0,0.0,11.0
max,31.35,40.65,2886.0,76.6,253.0,1.0,12.1


In [12]:
df["Target"].value_counts()

0    9661
1     339
Name: Target, dtype: int64

In [13]:
df["Failure Type"].value_counts()

No Failure                  9652
Heat Dissipation Failure     112
Power Failure                 95
Overstrain Failure            78
Tool Wear Failure             45
Random Failures               18
Name: Failure Type, dtype: int64

In [14]:
df.dtypes

Type                            object
Air temperature [°C]           float64
Process temperature [°C]       float64
Rotational speed [rpm]           int64
Torque [Nm]                    float64
Tool wear [min]                  int64
Target                           int64
Failure Type                    object
Temperature difference [°C]    float64
dtype: object

In [15]:
from sklearn.preprocessing import LabelEncoder
st=LabelEncoder()

df["Type"]=st.fit_transform(df["Type"])

In [16]:
df["Failure Type"]=st.fit_transform(df["Failure Type"])

In [17]:
df.dtypes

Type                             int32
Air temperature [°C]           float64
Process temperature [°C]       float64
Rotational speed [rpm]           int64
Torque [Nm]                    float64
Tool wear [min]                  int64
Target                           int64
Failure Type                     int32
Temperature difference [°C]    float64
dtype: object

In [18]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split

In [19]:
x=df.drop(['Failure Type'],axis=1)
y=df[['Failure Type']]

In [20]:
x_train, x_test, y_train, y_test = train_test_split(x,y,test_size=0.30,random_state=42)

In [21]:
model=LogisticRegression()

In [22]:
model.fit(x_train, y_train)

In [23]:
model.score(x_train, y_train)

0.9644285714285714

In [24]:
model.score(x_test, y_test)

0.9703333333333334

In [25]:
from sklearn.tree import DecisionTreeClassifier
dTreeR = DecisionTreeClassifier(criterion = 'gini', max_depth = 3, random_state=0)
dTreeR.fit(x_train, y_train)

In [26]:
dTreeR.score(x_test, y_test)

0.9896666666666667

In [27]:
dTreeR.score(x_train, y_train)

0.9915714285714285

In [28]:
from sklearn.ensemble import RandomForestClassifier
rfcl = RandomForestClassifier(n_estimators = 50, random_state=0,max_features=3,max_depth=3)
rfcl = rfcl.fit(x_train, y_train)

In [29]:
rfcl.score(x_train, y_train)

0.9942857142857143

In [30]:
rfcl.score(x_test, y_test)

0.992

In [31]:
from sklearn.svm import SVC
svc=SVC(kernel='linear')
svc.fit(x_train, y_train)

In [32]:
svc.score(x_train, y_train)

0.9978571428571429

In [33]:
svc.score(x_test, y_test)

0.996