In [None]:
!python3 -m pip install -e .
!pip install openml

In [None]:
!pip install keras
!pip install -q -U keras-tuner

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m128.9/128.9 kB[0m [31m677.1 kB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
import openml
import pandas as pd
import numpy as np
from time import time
import keras_tuner as kt
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import scipy.stats as stats
from sklearn.metrics import accuracy_score
from sklearn.datasets import fetch_openml
from openml.datasets import edit_dataset, fork_dataset, get_dataset
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV

In [None]:
# This is done based on the dataset ID.
dataset = openml.datasets.get_dataset(42890)

# Storing the data
eeg, *_ = dataset.get_data()



In [None]:
# For Machine failure
df_filtered = eeg.drop(columns=['UDI', 'Product ID', 'Type', 'HDF', 'PWF', 'OSF', 'RNF'])
target_cols = ['Machine failure']

X = df_filtered.drop(columns='Machine failure', axis=1)
y = eeg[target_cols]

In [None]:
weight_for_0 = (1 / y.value_counts()[0]) * (y.count()['Machine failure'] / 2.0)
weight_for_1 = (1 / y.value_counts()[1]) * (y.count()['Machine failure'] / 2.0)

class_weight_0 = {0: weight_for_0, 1: weight_for_1}

print(class_weight_0)

{0: 0.5175447676223993, 1: 14.749262536873156}


In [None]:
# For TWF
df_filtered = eeg.drop(columns=['UDI', 'Product ID', 'Type', 'Machine failure', 'HDF', 'PWF', 'OSF', 'RNF'])
target_cols = ['TWF']

X = df_filtered.drop(columns='TWF', axis=1)
y = eeg[target_cols]

In [None]:
weight_for_0 = (1 / y.value_counts()[0]) * (y.count()['TWF'] / 2.0)
weight_for_1 = (1 / y.value_counts()[1]) * (y.count()['TWF'] / 2.0)

class_weight_1 = {0: weight_for_0, 1: weight_for_1}

print(class_weight_1)

{0: 0.5023106288929075, 1: 108.69565217391305}


In [None]:
# For HDF
df_filtered = eeg.drop(columns=['UDI', 'Product ID', 'Type', 'Machine failure', 'TWF', 'PWF', 'OSF', 'RNF'])
target_cols = ['HDF']

X = df_filtered.drop(columns='HDF', axis=1)
y = eeg[target_cols]

In [None]:
weight_for_0 = (1 / y.value_counts()[0]) * (y.count()['HDF'] / 2.0)
weight_for_1 = (1 / y.value_counts()[1]) * (y.count()['HDF'] / 2.0)

class_weight_2 = {0: weight_for_0, 1: weight_for_1}

print(class_weight_2)

{0: 0.5058168942842691, 1: 43.47826086956522}


In [None]:
# For PWF
df_filtered = eeg.drop(columns=['UDI', 'Product ID', 'Type', 'Machine failure', 'TWF', 'HDF', 'OSF', 'RNF'])
target_cols = ['PWF']

X = df_filtered.drop(columns='PWF', axis=1)
y = eeg[target_cols]

In [None]:
weight_for_0 = (1 / y.value_counts()[0]) * (y.count()['PWF'] / 2.0)
weight_for_1 = (1 / y.value_counts()[1]) * (y.count()['PWF'] / 2.0)

class_weight_3 = {0: weight_for_0, 1: weight_for_1}

print(class_weight_3)

{0: 0.5047955577990914, 1: 52.63157894736842}


In [None]:
# For OSF
df_filtered = eeg.drop(columns=['UDI', 'Product ID', 'Type', 'Machine failure', 'TWF', 'HDF', 'PWF', 'RNF'])
target_cols = ['OSF']

X = df_filtered.drop(columns='OSF', axis=1)
y = eeg[target_cols]

In [None]:
weight_for_0 = (1 / y.value_counts()[0]) * (y.count()['OSF'] / 2.0)
weight_for_1 = (1 / y.value_counts()[1]) * (y.count()['OSF'] / 2.0)

class_weight_4 = {0: weight_for_0, 1: weight_for_1}

print(class_weight_4)

{0: 0.5049484952534841, 1: 51.0204081632653}


In [None]:
# For RNF
df_filtered = eeg.drop(columns=['UDI', 'Product ID', 'Type', 'Machine failure', 'TWF', 'HDF', 'OSF', 'PWF'])
target_cols = ['RNF']

X = df_filtered.drop(columns='RNF', axis=1)
y = eeg[target_cols]

In [None]:
weight_for_0 = (1 / y.value_counts()[0]) * (y.count()['RNF'] / 2.0)
weight_for_1 = (1 / y.value_counts()[1]) * (y.count()['RNF'] / 2.0)

class_weight_5 = {0: weight_for_0, 1: weight_for_1}

print(class_weight_5)

{0: 0.5009518084360284, 1: 263.1578947368421}


In [None]:
# All
df_filtered = eeg.drop(columns=['UDI', 'Product ID', 'Type', 'Machine failure', 'TWF', 'HDF', 'PWF', 'OSF', 'RNF'])
target_cols = eeg.drop(columns=['UDI', 'Product ID', 'Type', 'Air temperature [K]',
                  'Process temperature [K]', 'Rotational speed [rpm]',
                  'Torque [Nm]', 'Tool wear [min]'])

X = df_filtered
y = target_cols

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=31)



---



In [None]:
# Using tensorflow to create my model
# More challenging then sklearn but interesting!

# Define the model
tf_model = tf.keras.models.Sequential([
    tf.keras.layers.Dense(300, activation=tf.nn.sigmoid, input_shape=(X_train.shape[1],)), # Hidden layer def
    tf.keras.layers.Dropout(0.35), # To reduce overfitting
    tf.keras.layers.Dense(y_train.shape[1], activation='relu') # Output layer (Adjust based on data used)
])

In [None]:
tf_model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3),
              loss=tf.keras.losses.BinaryCrossentropy(),
              metrics=[tf.keras.metrics.BinaryAccuracy(),
                       tf.keras.metrics.FalseNegatives()])


In [None]:
tf_model.fit(X_train, y_train, epochs=25, class_weight=class_weight_5)
y_pred = tf_model.predict(X_test)

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


# Iterating on this

We have established a strong model, one that can train and predict with good accuracy as well as lowish loss. Our false negatives however are quite high.

With this dataset a false negative could cost significant money to the company that owns this equipment. Ideally we wouldn't want to see any

Let's take it a step further and see if we can minimize with a keras tuner.

I would be nice to see the loss drop!


Thanks to this article for helping with setting up optimizers https://stackoverflow.com/questions/61080146/kerastuner-custom-objective-function

In [None]:
from tensorflow.keras import initializers

In [None]:
def build_model(hp):

    model = keras.Sequential()
    model.add(layers.Flatten())
    # Tune the number of layers.
    for i in range(hp.Int("num_layers", 3, 5)):
        model.add(
            layers.Dense(
                # Tune number of units separately.
                units=hp.Int(f"units_{i}", min_value=128, max_value=1344, step=32),
                activation=hp.Choice("activation", ["sigmoid", "relu"]),
                kernel_initializer=keras.initializers.Zeros(),
                bias_initializer=initializers.Zeros()
            )
        )

    # Silu is defined as: swish(x) = x * sigmoid(x).
    # This activation function is a smooth, non-monotonic function that is unbounded above and bounded below.

    if hp.Boolean("dropout"):
        model.add(layers.Dropout(rate=0.35))
    model.add(layers.Dense(1, activation="softmax"))
    learning_rate = hp.Float("lr", min_value=1e-4, max_value=1e-2, sampling="log")
    model.compile(loss=tf.keras.losses.BinaryCrossentropy(from_logits=False),
          optimizer=keras.optimizers.Adadelta(
              learning_rate=0.003,
              rho=0.95, # Decay rate
              epsilon=1e-07,
              weight_decay=None,
              clipnorm=None,
              clipvalue=None,
              global_clipnorm=None,
              use_ema=False,
              ema_momentum=0.99,
              ema_overwrite_frequency=None,
              name="adadelta"),
          metrics=[
                # tf.keras.metrics.TruePositives(name='tp'),
                # tf.keras.metrics.FalsePositives(name='fp'),
                # tf.keras.metrics.TrueNegatives(name='tn'),
                # tf.keras.metrics.FalseNegatives(name='fn'),
                # tf.keras.metrics.BinaryAccuracy(name='accuracy'),
                # tf.keras.metrics.Precision(name='precision'),
                tf.keras.metrics.Recall(name='recall'),
                #tf.keras.metrics.AUC(name='auc'),
    ]),
    return model


build_model(kt.HyperParameters())

<keras.engine.sequential.Sequential at 0x7c75723bc040>

In [None]:
grid_tuner = kt.GridSearch(
    hypermodel=build_model,
    objective=kt.Objective('recall', direction='max'),
    max_trials=2,
    executions_per_trial=5,
    overwrite=True,
    max_retries_per_trial=0,
    max_consecutive_failed_trials=3,
)

In [None]:
# Using BayesianOptimization
kt_tuner = kt.BayesianOptimization(
    hypermodel=build_model,
    objective=kt.Objective('recall', direction='max'),
    max_trials=2,
    executions_per_trial=5,
    overwrite=True,
    directory="/",
    project_name="MLProject",
)

In [None]:
# Using Hyperband
hyper_tuner = kt.Hyperband(
    hypermodel=build_model,
    objective=kt.Objective('recall', direction='max'),
    max_epochs=2,
    factor=3,
    hyperband_iterations=3,
    max_retries_per_trial=0,
    overwrite=True,
    directory="/",
    project_name="MLProject",
    max_consecutive_failed_trials=3
)

In [None]:
# Using RandomSearch
rand_tuner = kt.RandomSearch(
    hypermodel=build_model,
    objective=kt.Objective('recall', direction='max'),
    max_trials=2,
    executions_per_trial=5,
    overwrite=True,
)

In [None]:
hyper_tuner.search(X_train, y_train, epochs=2, validation_data=(X_test, y_test), class_weight=class_weight_5)

Trial 6 Complete [00h 00m 14s]
recall: 1.0

Best recall So Far: 1.0
Total elapsed time: 00h 02m 07s


In [None]:
rand_tuner.search(X_train, y_train, epochs=5, validation_data=(X_test, y_test), class_weight=class_weight_1)

Trial 2 Complete [00h 03m 18s]
recall: 1.0

Best recall So Far: 1.0
Total elapsed time: 00h 07m 36s


In [None]:
kt_tuner.search(X_train, y_train, epochs=5, validation_data=(X_test, y_test), class_weight=class_weight_1)

In [None]:
grid_tuner.search(X_train, y_train, epochs=6, validation_data=(X_test, y_test))

Trial 30 Complete [00h 00m 12s]
recall: 0.0

Best recall So Far: 0.21645796298980713
Total elapsed time: 00h 04m 56s


In [None]:
%load_ext tensorboard

In [None]:
%tensorboard --logdir /content/untitled_project --load_fast=false

ERROR: Failed to launch TensorBoard (exited with 1).
Contents of stderr:
Address already in use
Port 6006 is in use by another program. Either identify and stop that program, or start the server with a different port.

In [None]:
# Find the best model
rand_best = rand_tuner.get_best_model(1)
cool_best = cool_tuner.get_best_model(1)
kt_best = kt_tuner.get_best_model(1)

rand_best_score = rand_tuner.results_summary()[0]['objective_value']
cool_best_score = cool_tuner.results_summary()[0]['objective_value']
kt_best_score = kt_tuner.results_summary()[0]['objective_value']

scores = [rand_best_score, cool_best_score, kt_best_score]
models = [rand_best, cool_best, kt_best]

best_model = models[np.argmax(scores)]


# Understanding the scoring

https://developers.google.com/machine-learning/crash-course/classification/precision-and-recall#:~:text=Recall%20attempts%20to%20answer%20the,actual%20positives%20was%20identified%20correctly%3F&text=P%20%2B%20F%20N-,Note%3A%20A%20model%20that%20produces%20no%20false,has%20a%20recall%20of%201.0.&text=Our%20model%20has%20a%20recall,11%25%20of%20all%20malignant%20tumors.



---



In [None]:
kt_tuner.results_summary

<bound method BaseTuner.results_summary of <keras_tuner.src.tuners.bayesian.BayesianOptimization object at 0x7b9d87e467a0>>

In [None]:
kt_tuner.search_space_summary

<bound method BaseTuner.search_space_summary of <keras_tuner.src.tuners.bayesian.BayesianOptimization object at 0x7b9d87e467a0>>

In [None]:
best_hyperparams = kt_tuner.get_best_hyperparameters(5)

# Create a new model with these
my_model = build_model(best_hyperparams[0])

# Fit
X_new = np.concatenate((X_train, X_test))
y_new = np.concatenate((y_train, y_test))

my_model.fit(x=X_new, y=y_new, epochs=20)