# This Model Uses the specific machine failure categories TWF, HDF, PWF, OSF, RNF

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import KFold, train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, roc_curve

#### Loading initial data

In [2]:
initial_data = pd.read_csv('../data/ai4i2020.csv')

#### Convering Type column into binary values for model preperation

In [3]:
dummy = pd.get_dummies(initial_data['Type'], dtype=int, drop_first=True)

In [4]:
cleaned_data = pd.concat([initial_data, dummy], axis=1)

In [5]:
cleaned_data.drop(columns=['Type'], inplace=True)

In [6]:
cleaned_data.head()

Unnamed: 0,UDI,Product ID,Air temperature [K],Process temperature [K],Rotational speed [rpm],Torque [Nm],Tool wear [min],Machine failure,TWF,HDF,PWF,OSF,RNF,L,M
0,1,M14860,298.1,308.6,1551,42.8,0,0,0,0,0,0,0,0,1
1,2,L47181,298.2,308.7,1408,46.3,3,0,0,0,0,0,0,1,0
2,3,L47182,298.1,308.5,1498,49.4,5,0,0,0,0,0,0,1,0
3,4,L47183,298.2,308.6,1433,39.5,7,0,0,0,0,0,0,1,0
4,5,L47184,298.2,308.7,1408,40.0,9,0,0,0,0,0,0,1,0


#### Creating Logistic Regression Model

In [7]:
log_model = LogisticRegression(max_iter=1000)
kf = KFold(n_splits=5, shuffle=True, random_state=42)

accuracy_scores = []
precision_scores = []
recall_scores = []

X = cleaned_data[["L", "M", "Air temperature [K]",
                  "Process temperature [K]", "Rotational speed [rpm]",
                  "Torque [Nm]", "Tool wear [min]", "TWF", "HDF", "PWF", "OSF", "RNF"]]
y = cleaned_data["Machine failure"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

#### Training Model

In [8]:
for train_set, val_set in kf.split(X_train):
    X_train_split, y_train_split = X_train.iloc[train_set], y_train.iloc[train_set]
    X_val_split, y_val_split = X_train.iloc[val_set], y_train.iloc[val_set]
    log_model.fit(X_train_split, y_train_split)
    model_predictions = log_model.predict(X_val_split)

    accuracy_scores.append(accuracy_score(y_val_split, model_predictions))
    precision_scores.append(precision_score(y_val_split, model_predictions))
    recall_scores.append(recall_score(y_val_split, model_predictions))

#### Assessing model's performance

* Using the machine failure specific identifiers over trains the model
* For one, I am trying to predict machine failure and giving these values tells the model there is a failure, so no point in having a model since you already know what the failure is

In [9]:
print(f"Average Accuracy Scores: {np.mean(accuracy_scores) * 100}")
print(f"Average Precision Scores: {np.mean(precision_scores) * 100}")
print(f"Average Recall Scores: {np.mean(recall_scores) * 100}")

Average Accuracy Scores: 99.85333333333332
Average Precision Scores: 100.0
Average Recall Scores: 95.84967417199438


#### Using model

In [13]:
machine_telemtry = pd.DataFrame({
    "L": 1,
    "M": 0,
    "Air temperature [K]": 310.8,
    "Process temperature [K]": 310.6,
    "Rotational speed [rpm]": 1577,
    "Torque [Nm]": 30.5,
    "Tool wear [min]": 227,
    "TWF": 0,
    "HDF": 0,
    "PWF": 1,
    "OSF": 0,
    "RNF": 0
}, index=[0])

In [14]:
machine_telemtry

Unnamed: 0,L,M,Air temperature [K],Process temperature [K],Rotational speed [rpm],Torque [Nm],Tool wear [min],TWF,HDF,PWF,OSF,RNF
0,1,0,310.8,310.6,1577,30.5,227,0,0,1,0,0


In [15]:
prediction = log_model.predict(machine_telemtry)
prediction

array([1])

#### Conclusion

* I will not use this model for predicting machine failure