### Machine Learning for Engineers: [SteelPlateFaults](https://www.apmonitor.com/pds/index.php/Main/SteelPlateFaults)
- [Steel Plate Defects](https://www.apmonitor.com/pds/index.php/Main/SteelPlateFaults)
 - Source Blocks: 9
 - Description: Machine learning to predict defects (faults) in a steel plate. There are multiple types of faults or a general other type of fault for defects that do not fit into one of the categories.
- [Course Overview](https://apmonitor.com/pds)
- [Course Schedule](https://apmonitor.com/pds/index.php/Main/CourseSchedule)


In [None]:
import pandas as pd
url = 'http://apmonitor.com/pds/uploads/Main/steel.txt'
data = pd.read_csv(url)

In [None]:
# Split unscaled data into X and y
features = data.columns[:-7]
labels = data.columns[-7:]
X = data[features]
y = data[labels]
y.idxmax(axis=1).value_counts().plot(kind='bar')

In [None]:
# keras packages
from keras.models import Sequential
from keras.layers import Dense

# scikit-learn packages
from sklearn.metrics import confusion_matrix
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split 
from sklearn.feature_selection import SelectKBest,chi2

In [None]:
# Scale data
s = MinMaxScaler()
data_s = s.fit_transform(data)
data_s = pd.DataFrame(data_s,columns=data.columns)

# Split data into X and y
features = data.columns[:-7]
labels = data.columns[-7:]
X = data_s[features]
y = data_s[labels]

# Train/test split
Xtrain, Xtest, ytrain, ytest = train_test_split(X,y,test_size=0.2,shuffle=True)

In [None]:
# Classification neural network with Keras
model = Sequential()
model.add(Dense(8, input_dim=Xtrain.shape[1], activation='relu'))
model.add(Dense(ytrain.shape[1], activation='softmax'))

# Compile model
model.compile(loss='categorical_crossentropy', \
              optimizer='adam', metrics=['accuracy'])

# Train model
result = model.fit(Xtrain,ytrain,epochs=1000,\
                   validation_split=0.2,verbose=0)

In [None]:
import matplotlib.pyplot as plt
plt.semilogy(result.history['loss'],label='loss')
plt.semilogy(result.history['val_loss'],label='val_loss')
plt.legend()
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.show()

In [None]:
# Make predictions and save in dataframe
yp = model.predict(Xtest)
yp = pd.DataFrame(yp,columns=ytest.columns)

# Extract predicted labels and probabilities
predicted_label = yp.idxmax(axis=1)
predicted_prob = yp.max(axis=1)
actual_label = ytest.idxmax(axis=1)

In [None]:
# Display probabilities, with the most likely label
#   highlighted and the actual label displayed 
yp['Actual fault'] = actual_label.values
yp.style.highlight_max(axis=1)

In [None]:
import seaborn as sns
# Plot the confusion matrix
cm = confusion_matrix(predicted_label,actual_label)
sns.heatmap(cm,annot=True)
plt.show()