# Vehicle Predictive Maintainence

###### Objective of this project is to predict the air temperature given the independent features.

## Importing all the dependencies

In [37]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pickle
from pandas_profiling import ProfileReport
from sklearn.metrics import accuracy_score

## Data Loading

In [2]:
data = pd.read_csv('ai4i2020.csv')

In [3]:
data.head()

Unnamed: 0,UDI,Product ID,Type,Air temperature [K],Process temperature [K],Rotational speed [rpm],Torque [Nm],Tool wear [min],Machine failure,TWF,HDF,PWF,OSF,RNF
0,1,M14860,M,298.1,308.6,1551,42.8,0,0,0,0,0,0,0
1,2,L47181,L,298.2,308.7,1408,46.3,3,0,0,0,0,0,0
2,3,L47182,L,298.1,308.5,1498,49.4,5,0,0,0,0,0,0
3,4,L47183,L,298.2,308.6,1433,39.5,7,0,0,0,0,0,0
4,5,L47184,L,298.2,308.7,1408,40.0,9,0,0,0,0,0,0


## Data Analysis

In [4]:
data.shape

(10000, 14)

Data has 10000 data points and 13 independent and 1 dependent feature.

In [5]:
data.isnull().sum()

UDI                        0
Product ID                 0
Type                       0
Air temperature [K]        0
Process temperature [K]    0
Rotational speed [rpm]     0
Torque [Nm]                0
Tool wear [min]            0
Machine failure            0
TWF                        0
HDF                        0
PWF                        0
OSF                        0
RNF                        0
dtype: int64

#### Data doesnot contain any null values

In [6]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 14 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   UDI                      10000 non-null  int64  
 1   Product ID               10000 non-null  object 
 2   Type                     10000 non-null  object 
 3   Air temperature [K]      10000 non-null  float64
 4   Process temperature [K]  10000 non-null  float64
 5   Rotational speed [rpm]   10000 non-null  int64  
 6   Torque [Nm]              10000 non-null  float64
 7   Tool wear [min]          10000 non-null  int64  
 8   Machine failure          10000 non-null  int64  
 9   TWF                      10000 non-null  int64  
 10  HDF                      10000 non-null  int64  
 11  PWF                      10000 non-null  int64  
 12  OSF                      10000 non-null  int64  
 13  RNF                      10000 non-null  int64  
dtypes: float64(3), int64(9)

#### All the features are numerical

In [7]:
data.describe()

Unnamed: 0,UDI,Air temperature [K],Process temperature [K],Rotational speed [rpm],Torque [Nm],Tool wear [min],Machine failure,TWF,HDF,PWF,OSF,RNF
count,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0
mean,5000.5,300.00493,310.00556,1538.7761,39.98691,107.951,0.0339,0.0046,0.0115,0.0095,0.0098,0.0019
std,2886.89568,2.000259,1.483734,179.284096,9.968934,63.654147,0.180981,0.067671,0.106625,0.097009,0.098514,0.04355
min,1.0,295.3,305.7,1168.0,3.8,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,2500.75,298.3,308.8,1423.0,33.2,53.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,5000.5,300.1,310.1,1503.0,40.1,108.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,7500.25,301.5,311.1,1612.0,46.8,162.0,0.0,0.0,0.0,0.0,0.0,0.0
max,10000.0,304.5,313.8,2886.0,76.6,253.0,1.0,1.0,1.0,1.0,1.0,1.0


In [8]:
data.corr()

Unnamed: 0,UDI,Air temperature [K],Process temperature [K],Rotational speed [rpm],Torque [Nm],Tool wear [min],Machine failure,TWF,HDF,PWF,OSF,RNF
UDI,1.0,0.117428,0.324428,-0.006615,0.003207,-0.010702,-0.022892,0.009154,-0.022215,-0.023557,-0.00099,-0.005954
Air temperature [K],0.117428,1.0,0.876107,0.02267,-0.013778,0.013853,0.082556,0.009955,0.137831,0.00347,0.001988,0.017688
Process temperature [K],0.324428,0.876107,1.0,0.019277,-0.014061,0.013488,0.035946,0.007315,0.056933,-0.003355,0.004554,0.022279
Rotational speed [rpm],-0.006615,0.02267,0.019277,1.0,-0.875027,0.000223,-0.044188,0.010389,-0.121241,0.123018,-0.104575,-0.013088
Torque [Nm],0.003207,-0.013778,-0.014061,-0.875027,1.0,-0.003093,0.191321,-0.014662,0.14261,0.083781,0.183465,0.016136
Tool wear [min],-0.010702,0.013853,0.013488,0.000223,-0.003093,1.0,0.105448,0.115792,-0.001287,-0.009334,0.155894,0.011326
Machine failure,-0.022892,0.082556,0.035946,-0.044188,0.191321,0.105448,1.0,0.362904,0.5758,0.522812,0.531083,0.004516
TWF,0.009154,0.009955,0.007315,0.010389,-0.014662,0.115792,0.362904,1.0,-0.007332,0.008577,0.038243,0.03097
HDF,-0.022215,0.137831,0.056933,-0.121241,0.14261,-0.001287,0.5758,-0.007332,1.0,0.018443,0.046396,-0.004706
PWF,-0.023557,0.00347,-0.003355,0.123018,0.083781,-0.009334,0.522812,0.008577,0.018443,1.0,0.115836,-0.004273


Process temperature is highly correlated with the dependent feature(Air Temperature)

Heat Dissipation Failure, Power Failure and Overstrain Failure corresponds to Machine Failure.

In [9]:
data['TWF'].value_counts()

0    9954
1      46
Name: TWF, dtype: int64

Tool Wear Failure has occured 46 times out of 10000 times i.e 0.46% times.

In [10]:
data['HDF'].value_counts()

0    9885
1     115
Name: HDF, dtype: int64

Heat Dissipation Failure has occured 115 times out of 10000 times i.e 1.15% times.

In [11]:
data['PWF'].value_counts()

0    9905
1      95
Name: PWF, dtype: int64

Power Failure has occured 95 times out of 10000 times i.e 0.95% times.

In [12]:
data['OSF'].value_counts()

0    9902
1      98
Name: OSF, dtype: int64

OverStrain Failure has occured 98 times out of 10000 times i.e 0.98% times.

In [13]:
data['RNF'].value_counts()

0    9981
1      19
Name: RNF, dtype: int64

Random Failure has occured 19 times out of 10000 times i.e 0.19% times.

In [14]:
data['Type'].value_counts()

L    6000
M    2997
H    1003
Name: Type, dtype: int64

Around 60%, 30% and 10% of the vehicles in the data belongs to Low, Medium and High variants.

## Exploratory Data Analysis

In [15]:
df = ProfileReport(data)
df.to_widgets()

Summarize dataset:   0%|          | 0/5 [00:00<?, ?it/s]

Generate report structure:   0%|          | 0/1 [00:00<?, ?it/s]

Render widgets:   0%|          | 0/1 [00:00<?, ?it/s]

VBox(children=(Tab(children=(Tab(children=(GridBox(children=(VBox(children=(GridspecLayout(children=(HTML(valu…

In [16]:
df.to_file('report.html')

Render HTML:   0%|          | 0/1 [00:00<?, ?it/s]

Export report to file:   0%|          | 0/1 [00:00<?, ?it/s]

There is very little multi collinearity among the independent features

## Data Preprocessing

In [17]:
data = pd.get_dummies(data, columns = ['Type'])

In [18]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
scale_columns = ['Process temperature [K]', 'Rotational speed [rpm]', 'Torque [Nm]', 'Tool wear [min]']
scaler.fit(data[scale_columns])

StandardScaler()

In [19]:
data[scale_columns] = scaler.transform(data[scale_columns])

In [20]:
data.head()

Unnamed: 0,UDI,Product ID,Air temperature [K],Process temperature [K],Rotational speed [rpm],Torque [Nm],Tool wear [min],Machine failure,TWF,HDF,PWF,OSF,RNF,Type_H,Type_L,Type_M
0,1,M14860,298.1,-0.94736,0.068185,0.2822,-1.695984,0,0,0,0,0,0,0,0,1
1,2,L47181,298.2,-0.879959,-0.729472,0.633308,-1.648852,0,0,0,0,0,0,0,1,0
2,3,L47182,298.1,-1.014761,-0.22745,0.94429,-1.61743,0,0,0,0,0,0,0,1,0
3,4,L47183,298.2,-0.94736,-0.590021,-0.048845,-1.586009,0,0,0,0,0,0,0,1,0
4,5,L47184,298.2,-0.879959,-0.729472,0.001313,-1.554588,0,0,0,0,0,0,0,1,0


In [21]:
data = data.drop(['UDI','Product ID'], axis = 1)

In [23]:
data.head()

Unnamed: 0,Air temperature [K],Process temperature [K],Rotational speed [rpm],Torque [Nm],Tool wear [min],Machine failure,TWF,HDF,PWF,OSF,RNF,Type_H,Type_L,Type_M
0,298.1,-0.94736,0.068185,0.2822,-1.695984,0,0,0,0,0,0,0,0,1
1,298.2,-0.879959,-0.729472,0.633308,-1.648852,0,0,0,0,0,0,0,1,0
2,298.1,-1.014761,-0.22745,0.94429,-1.61743,0,0,0,0,0,0,0,1,0
3,298.2,-0.94736,-0.590021,-0.048845,-1.586009,0,0,0,0,0,0,0,1,0
4,298.2,-0.879959,-0.729472,0.001313,-1.554588,0,0,0,0,0,0,0,1,0


## Train-Test Split

In [24]:
y = data['Air temperature [K]']
X = data.drop(['Air temperature [K]'], axis = 1)

In [25]:
X

Unnamed: 0,Process temperature [K],Rotational speed [rpm],Torque [Nm],Tool wear [min],Machine failure,TWF,HDF,PWF,OSF,RNF,Type_H,Type_L,Type_M
0,-0.947360,0.068185,0.282200,-1.695984,0,0,0,0,0,0,0,0,1
1,-0.879959,-0.729472,0.633308,-1.648852,0,0,0,0,0,0,0,1,0
2,-1.014761,-0.227450,0.944290,-1.617430,0,0,0,0,0,0,0,1,0
3,-0.947360,-0.590021,-0.048845,-1.586009,0,0,0,0,0,0,0,1,0
4,-0.879959,-0.729472,0.001313,-1.554588,0,0,0,0,0,0,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,-1.082162,0.363820,-1.052012,-1.476034,0,0,0,0,0,0,0,0,1
9996,-1.082162,0.520005,-0.821283,-1.428902,0,0,0,0,0,0,1,0,0
9997,-0.947360,0.592519,-0.660777,-1.350349,0,0,0,0,0,0,0,0,1
9998,-0.879959,-0.729472,0.854005,-1.303217,0,0,0,0,0,0,1,0,0


In [26]:
y

0       298.1
1       298.2
2       298.1
3       298.2
4       298.2
        ...  
9995    298.8
9996    298.9
9997    299.0
9998    299.0
9999    299.0
Name: Air temperature [K], Length: 10000, dtype: float64

In [31]:
from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(X,y, test_size = 0.2, random_state = 42)

## Model Selection

In [33]:
from sklearn.linear_model import LinearRegression
linear = LinearRegression()

## Training The Model

In [34]:
linear.fit(X_train,Y_train)

LinearRegression()

## Training Data Output

In [35]:
linear.intercept_

299.9832236357094

In [36]:
linear.coef_

array([ 1.74258902e+00,  4.32714441e-02,  1.86691734e-02,  6.12510317e-03,
       -2.61524715e-01,  3.20786764e-01,  1.96040662e+00,  2.97171709e-01,
        3.45096427e-02, -2.08024340e-02, -3.14502558e-02,  7.98063504e-04,
        3.06521923e-02])

## Building the predictive model

In [38]:
file = 'predictive_maintainence.sav'
pickle.dump(linear, open(file, 'wb'))

In [46]:
saved_model = pickle.load(open(file, 'rb'))

In [49]:
saved_model.predict([[-0.879959, -0.216294, 0.021376, -1.224663, 0, 0, 0, 0, 0, 0, 0, 0, 1]])

array([298.46400747])

The actual result is 299.0 whereas our model gives result as 298.46

In [48]:
linear.score(X_train,Y_train)

0.7793414378841095

Accuracy of the linear model is 78% for the 80% of the data.