In [None]:
!pip install scikit-learn==1.3.0




In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [None]:
df = pd.read_csv('/content/pd_newsheet.csv')

In [None]:
df.head()

Unnamed: 0,UDI,Product ID,Type,Air temperature [K],Process temperature [K],Rotational speed [rpm],Torque [Nm],Tool wear [min],Target,Failure Type
0,292,L47471,L,298.0,308.3,1473,42.6,107,0,No Failure
1,293,L47472,L,298.1,308.4,1335,40.2,109,0,No Failure
2,294,H29707,H,298.1,308.5,1392,44.8,111,0,No Failure
3,295,M15154,M,298.1,308.5,1372,51.1,116,0,No Failure
4,296,M15155,M,298.1,308.6,1716,30.7,119,0,No Failure


In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 835 entries, 0 to 834
Data columns (total 10 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   UDI                      835 non-null    int64  
 1   Product ID               835 non-null    object 
 2   Type                     835 non-null    object 
 3   Air temperature [K]      835 non-null    float64
 4   Process temperature [K]  835 non-null    float64
 5   Rotational speed [rpm]   835 non-null    int64  
 6   Torque [Nm]              835 non-null    float64
 7   Tool wear [min]          835 non-null    int64  
 8   Target                   835 non-null    int64  
 9   Failure Type             835 non-null    object 
dtypes: float64(3), int64(4), object(3)
memory usage: 65.4+ KB


In [None]:
df['Type'].nunique()

3

In [None]:
df.isnull().sum()

UDI                        0
Product ID                 0
Type                       0
Air temperature [K]        0
Process temperature [K]    0
Rotational speed [rpm]     0
Torque [Nm]                0
Tool wear [min]            0
Target                     0
Failure Type               0
dtype: int64

#Some Understandings : <br>
1. First we need to transform some columns by ordinal encoding/ label encoding<br>
2. There are no missing values<br>
3. `Type` Column has 3 types - L/M/H which is ordinal data , so apply ordinal encoding here<br>
The target variable here is `Target` only, i.e. to predict failure or not
4. Notice that, there are two output target variables `Target` and `Failure Type`<br> So prediect here multiclass Failure Type Variable<br>

5. This is a classification problem <br>

In [None]:
df[df['Target']==1]

Unnamed: 0,UDI,Product ID,Type,Air temperature [K],Process temperature [K],Rotational speed [rpm],Torque [Nm],Tool wear [min],Target,Failure Type
36,328,L47507,L,297.7,308.5,1373,56.7,203,1,Overstrain Failure
89,381,L47560,L,297.5,308.3,2564,12.8,127,1,Power Failure
151,443,L47622,L,297.4,308.5,1399,61.5,61,1,Power Failure
172,464,L47643,L,297.4,308.7,2874,4.2,118,1,Power Failure
295,587,L47766,L,297.6,309.6,1501,49.8,222,1,Overstrain Failure
...,...,...,...,...,...,...,...,...,...,...
830,9759,L56938,L,298.6,309.8,2271,16.2,218,1,Tool Wear Failure
831,9765,L56944,L,298.5,309.5,1294,66.7,12,1,Power Failure
832,9823,L57002,L,298.5,309.4,1360,60.9,187,1,Overstrain Failure
833,9831,L57010,L,298.3,309.3,1337,56.1,206,1,Overstrain Failure


In [None]:
X = df.iloc[:,2:8]
y = df.iloc[:,-1]

from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size = 0.2,random_state = 42)
y_train.shape

(668,)

In [None]:
from sklearn.preprocessing import OrdinalEncoder
oe = OrdinalEncoder(categories=[['L', 'M', 'H']])
oe.fit(X_train[['Type']])
X_train['Type'] = oe.transform(X_train[['Type']]).astype(int)
X_test['Type'] = oe.transform(X_test[['Type']]).astype(int)

In [None]:
X_train.head()

Unnamed: 0,Type,Air temperature [K],Process temperature [K],Rotational speed [rpm],Torque [Nm],Tool wear [min]
478,0,296.9,308.0,1343,44.5,15
346,0,298.2,309.7,1585,37.4,118
462,0,296.8,308.0,1536,40.4,217
691,0,303.3,311.3,1350,48.1,32
302,0,297.5,309.6,1312,50.7,0


In [None]:
from sklearn.preprocessing import LabelEncoder
categories = ['No Failure', 'Heat Dissipation Failure', 'Power Failure', 'Overstrain Failure', 'Tool Wear Failure', 'Random Failures']
custom_encoder = {cat: i for i, cat in enumerate(categories)}
y_train_encoded = [custom_encoder.get(cat, len(categories)) for cat in y_train]
y_test_encoded = [custom_encoder.get(cat, len(categories)) for cat in y_test]
le = LabelEncoder()
le.fit(list(custom_encoder.values()))
y_train = le.transform(y_train_encoded)
y_test = le.transform(y_test_encoded)



In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score,classification_report,confusion_matrix

clf = LogisticRegression(solver='lbfgs', max_iter=10000)
clf.fit(X_train, y_train)

# Predict the labels of the test set
y_pred = clf.predict(X_test)

log_train = round(clf.score(X_train, y_train) * 100, 2)
log_accuracy = round(accuracy_score(y_pred, y_test) * 100, 2)


print("Training Accuracy    :",log_train ,"%")
print("Model Accuracy Score :",log_accuracy ,"%")
print("\033[1m--------------------------------------------------------\033[0m")
print("Classification_Report: \n",classification_report(y_test,y_pred))
print("\033[1m--------------------------------------------------------\033[0m")


Training Accuracy    : 94.01 %
Model Accuracy Score : 95.21 %
[1m--------------------------------------------------------[0m
Classification_Report: 
               precision    recall  f1-score   support

           0       0.96      1.00      0.98        91
           1       0.94      1.00      0.97        29
           2       1.00      1.00      1.00        17
           3       0.94      0.89      0.92        19
           4       0.83      0.56      0.67         9
           5       0.00      0.00      0.00         2

    accuracy                           0.95       167
   macro avg       0.78      0.74      0.76       167
weighted avg       0.94      0.95      0.94       167

[1m--------------------------------------------------------[0m


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
# Decision Tree
from sklearn.tree import DecisionTreeClassifier
decision = DecisionTreeClassifier()
decision.fit(X_train, y_train)
y_pred_dec = decision.predict(X_test)

decision_train = round(decision.score(X_train, y_train) * 100, 2)
decision_accuracy = round(accuracy_score(y_pred_dec, y_test) * 100, 2)

print("Training Accuracy    :",decision_train ,"%")
print("Model Accuracy Score :",decision_accuracy ,"%")
print("\033[1m--------------------------------------------------------\033[0m")
print("Classification_Report: \n",classification_report(y_test,y_pred_dec))
print("\033[1m--------------------------------------------------------\033[0m")


Training Accuracy    : 100.0 %
Model Accuracy Score : 92.22 %
[1m--------------------------------------------------------[0m
Classification_Report: 
               precision    recall  f1-score   support

           0       0.98      0.98      0.98        91
           1       0.96      0.93      0.95        29
           2       0.74      0.82      0.78        17
           3       0.89      0.89      0.89        19
           4       0.86      0.67      0.75         9
           5       0.33      0.50      0.40         2

    accuracy                           0.92       167
   macro avg       0.79      0.80      0.79       167
weighted avg       0.93      0.92      0.92       167

[1m--------------------------------------------------------[0m


In [None]:
# Random Forest
from sklearn.ensemble import RandomForestClassifier
random_forest = RandomForestClassifier(n_estimators=100)
random_forest.fit(X_train, y_train)
y_pred_rf = random_forest.predict(X_test)
random_forest.score(X_train, y_train)

random_forest_train = round(random_forest.score(X_train, y_train) * 100, 2)
random_forest_accuracy = round(accuracy_score(y_pred_rf, y_test) * 100, 2)

print("Training Accuracy    :",random_forest_train ,"%")
print("Model Accuracy Score :",random_forest_accuracy ,"%")
print("\033[1m--------------------------------------------------------\033[0m")
print("Classification_Report: \n",classification_report(y_test,y_pred_rf))
print("\033[1m--------------------------------------------------------\033[0m")

Training Accuracy    : 100.0 %
Model Accuracy Score : 93.41 %
[1m--------------------------------------------------------[0m
Classification_Report: 
               precision    recall  f1-score   support

           0       0.95      0.99      0.97        91
           1       1.00      0.97      0.98        29
           2       0.81      1.00      0.89        17
           3       0.88      0.79      0.83        19
           4       1.00      0.56      0.71         9
           5       1.00      0.50      0.67         2

    accuracy                           0.93       167
   macro avg       0.94      0.80      0.84       167
weighted avg       0.94      0.93      0.93       167

[1m--------------------------------------------------------[0m


In [None]:
# Support Vector Machines
from sklearn.svm import SVC
svc = SVC()
svc.fit(X_train, y_train)
y_pred_svc = svc.predict(X_test)

svc_train = round(svc.score(X_train, y_train) * 100, 2)
svc_accuracy = round(accuracy_score(y_pred_svc, y_test) * 100, 2)

print("Training Accuracy    :",svc_train ,"%")
print("Model Accuracy Score :",svc_accuracy ,"%")
print("\033[1m--------------------------------------------------------\033[0m")
print("Classification_Report: \n",classification_report(y_test,y_pred_svc))
print("\033[1m--------------------------------------------------------\033[0m")

Training Accuracy    : 62.13 %
Model Accuracy Score : 56.89 %
[1m--------------------------------------------------------[0m
Classification_Report: 
               precision    recall  f1-score   support

           0       0.56      1.00      0.72        91
           1       0.00      0.00      0.00        29
           2       1.00      0.24      0.38        17
           3       0.00      0.00      0.00        19
           4       0.00      0.00      0.00         9
           5       0.00      0.00      0.00         2

    accuracy                           0.57       167
   macro avg       0.26      0.21      0.18       167
weighted avg       0.41      0.57      0.43       167

[1m--------------------------------------------------------[0m


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


##Random Forest is having 99.99% acc and 98% prec. So lets use that here

In [None]:
import pickle
pickle.dump(random_forest,open('model2.pkl','wb'))


In [None]:
import joblib

# Assuming you have 'random_forest' as your trained model
joblib.dump(random_forest, 'model2.joblib')

['model2.joblib']

In [None]:
columns = df.columns.tolist()
columns

['UDI',
 'Product ID',
 'Type',
 'Air temperature [K]',
 'Process temperature [K]',
 'Rotational speed [rpm]',
 'Torque [Nm]',
 'Tool wear [min]',
 'Target',
 'Failure Type']