In [134]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import warnings
import matplotlib
warnings.filterwarnings("ignore")
pd.set_option("display.max_rows",None)
pd.set_option("display.max_columns",None)


In [135]:
plt.style.use('dark_background')

In [136]:
df=pd.read_csv("predictive_maintenance.csv")

In [137]:
df.columns

Index(['UDI', 'Product ID', 'Type', 'Air temperature [K]',
       'Process temperature [K]', 'Rotational speed [rpm]', 'Torque [Nm]',
       'Tool wear [min]', 'Target', 'Failure Type'],
      dtype='object')

In [138]:
df=df.drop(["UDI","Product ID"],axis=1)

In [139]:
df.shape

(10000, 8)

In [140]:
## Converting temperature in centigrate from Kelvin [1 K = -273.15 °C  ] 

df["Air temperature [K]"] = df["Air temperature [K]"] - 273.15
df["Process temperature [K]"] = df["Process temperature [K]"] - 273.15

# Renaming temperature in Centigrate(°C) from Kelvin (K)
df.rename(columns={"Air temperature [K]" : "Air temperature [°C]","Process temperature [K]" : "Process temperature [°C]"},inplace=True)

In [141]:
df["Temperature difference [°C]"] = df["Process temperature [°C]"] - df["Air temperature [°C]"]
df.sample(5)

Unnamed: 0,Type,Air temperature [°C],Process temperature [°C],Rotational speed [rpm],Torque [Nm],Tool wear [min],Target,Failure Type,Temperature difference [°C]
1982,M,24.85,34.55,1487,41.3,160,0,No Failure,9.7
173,M,25.05,34.85,1591,33.7,32,0,No Failure,9.8
3195,M,26.85,36.05,1635,32.4,4,0,No Failure,9.2
3733,L,29.05,37.95,1470,39.6,97,0,No Failure,8.9
9739,L,25.75,36.85,1515,42.9,170,0,No Failure,11.1


In [142]:
## check for missing values
df.isnull().sum()

Type                           0
Air temperature [°C]           0
Process temperature [°C]       0
Rotational speed [rpm]         0
Torque [Nm]                    0
Tool wear [min]                0
Target                         0
Failure Type                   0
Temperature difference [°C]    0
dtype: int64

In [143]:
df.dtypes

Type                            object
Air temperature [°C]           float64
Process temperature [°C]       float64
Rotational speed [rpm]           int64
Torque [Nm]                    float64
Tool wear [min]                  int64
Target                           int64
Failure Type                    object
Temperature difference [°C]    float64
dtype: object

In [144]:
df.describe()

Unnamed: 0,Air temperature [°C],Process temperature [°C],Rotational speed [rpm],Torque [Nm],Tool wear [min],Target,Temperature difference [°C]
count,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0
mean,26.85493,36.85556,1538.7761,39.98691,107.951,0.0339,10.00063
std,2.000259,1.483734,179.284096,9.968934,63.654147,0.180981,1.001094
min,22.15,32.55,1168.0,3.8,0.0,0.0,7.6
25%,25.15,35.65,1423.0,33.2,53.0,0.0,9.3
50%,26.95,36.95,1503.0,40.1,108.0,0.0,9.8
75%,28.35,37.95,1612.0,46.8,162.0,0.0,11.0
max,31.35,40.65,2886.0,76.6,253.0,1.0,12.1


In [145]:
df["Target"].value_counts()

0    9661
1     339
Name: Target, dtype: int64

In [146]:
df["Failure Type"].value_counts()

No Failure                  9652
Heat Dissipation Failure     112
Power Failure                 95
Overstrain Failure            78
Tool Wear Failure             45
Random Failures               18
Name: Failure Type, dtype: int64

In [147]:
df.dtypes

Type                            object
Air temperature [°C]           float64
Process temperature [°C]       float64
Rotational speed [rpm]           int64
Torque [Nm]                    float64
Tool wear [min]                  int64
Target                           int64
Failure Type                    object
Temperature difference [°C]    float64
dtype: object

In [148]:
from sklearn.preprocessing import LabelEncoder
st=LabelEncoder()

df["Type"]=st.fit_transform(df["Type"])

In [149]:
df["Failure Type"]=st.fit_transform(df["Failure Type"])

In [150]:
df.dtypes

Type                             int32
Air temperature [°C]           float64
Process temperature [°C]       float64
Rotational speed [rpm]           int64
Torque [Nm]                    float64
Tool wear [min]                  int64
Target                           int64
Failure Type                     int32
Temperature difference [°C]    float64
dtype: object