## Import Packages

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [None]:
# Import 'Tensorflow' pakage
import tensorflow as tf
from tensorflow import keras

# Check the version of tensorflow
print(tf.__version__)

In [None]:
# Access to Google Drive
from google.colab import drive
drive.mount('/content/drive')

## Load the SELECTED (Top 30) Features Dataset
* Results of ML3-1 and ML3-2

In [None]:
FeatureSelected = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/SavedFiles/FeatureSelected.csv', header=None)
FeatureSelected = FeatureSelected.T
FeatureSelected.shape

## Standardize the feature values

In [None]:
from sklearn.preprocessing import StandardScaler

FeatureSelected_std = StandardScaler().fit_transform(FeatureSelected)
FeatureSelected_std.shape

## Split Dataset into Training and Test Sets
- Use 'train_test_split' function
- It randomly samples the training and testing data according to the designated ratio.

In [None]:
# Number of data for each condition: 180
NoOfData   = int(FeatureSelected_std.shape[0]/2)

# Separate the dataset into normal and abnormal sets
NormalSet   = FeatureSelected_std[:NoOfData , :]
AbnormalSet = FeatureSelected_std[NoOfData: , :]

NormalSet.shape, AbnormalSet.shape

In [None]:
from sklearn.model_selection    import train_test_split

# Define the test data ratio
TestData_Ratio = 0.2 

# Split the normal and abnormal sets into training and test sets
TrainData_Nor, TestData_Nor = train_test_split(NormalSet  , test_size=TestData_Ratio, random_state=777)
TrainData_Abn, TestData_Abn = train_test_split(AbnormalSet, test_size=TestData_Ratio, random_state=777)

print(TrainData_Nor.shape, TestData_Nor.shape)
print(TrainData_Abn.shape, TestData_Abn.shape)

## Label the data (One-hot Encoding) using np.zeros and np.ones
- in this tutorial, [1,0] refers to 'Normal' and [1,0] refers to 'Abnormal'

In [None]:
# Create labels for the training and test sets
TrainLabel_Nor = np.zeros((TrainData_Nor.shape[0],2))
TrainLabel_Abn = np.ones( (TrainData_Abn.shape[0],2)) 
TestLabel_Nor  = np.zeros((TestData_Nor.shape[0],2))
TestLabel_Abn  = np.ones( (TestData_Abn.shape[0],2)) 

TrainLabel_Nor[:,0] = 1  # [1,0]: Normal
TrainLabel_Abn[:,0] = 0  # [0,1]: Abnormal
TestLabel_Nor[:,0]  = 1  # [1,0]: Normal
TestLabel_Abn[:,0]  = 0  # [0,1]: Abnormal

print(TrainLabel_Nor.shape, TestLabel_Nor.shape)
print(TrainLabel_Abn.shape, TestLabel_Abn.shape)

## Prepare the final Data and Label for ML modeling


In [None]:
# Combine the normal and abnormal data/labels
TrainData  = np.concatenate([TrainData_Nor , TrainData_Abn ], axis=0)
TestData   = np.concatenate([TestData_Nor  , TestData_Abn  ], axis=0)
TrainLabel = np.concatenate([TrainLabel_Nor, TrainLabel_Abn], axis=0)
TestLabel  = np.concatenate([TestLabel_Nor , TestLabel_Abn ], axis=0)

print(TrainData.shape,  TestData.shape)
print(TrainLabel.shape, TestLabel.shape)

.

.

.

.

.

.

.



## Grid search for Artificial Neural Network (ANN) hyperparameters

### [Main hyperparameters of ANN]

1. **Number of hidden layers**: The number of hidden layers in an ANN determines the depth of the network. A deeper network can learn more complex patterns and representations of the data. However, increasing the number of hidden layers can also make the network more prone to overfitting and increase the computational cost.

.

2. **Number of neurons per hidden layer**:The number of neurons in each hidden layer determines the width of the network. A wider network can learn more complex representations of the data, but it also increases the number of trainable parameters and can lead to overfitting and increased computational cost.

.

3. **Activation functions**: Activation functions introduce non-linearity into the network, allowing it to learn complex patterns and representations. Common activation functions include:

  - Sigmoid: $f(x) = \frac{1}{1 + e^{-x}}$
  - Hyperbolic Tangent (tanh): $f(x) = \frac{e^x - e^{-x}}{e^x + e^{-x}}$
  - Rectified Linear Unit (ReLU): $f(x) = max(0, x)$
  - Leaky ReLU: $f(x) = max(\alpha x, x)$, where $\alpha$ is a small constant (e.g., 0.01)

.

4. **Loss function**: The loss function measures the difference between the predicted output and the actual output (target) for each data point. The goal of training an ANN is to minimize the loss function. Common loss functions for classification tasks include:

  - *(Categorical) Cross-Entropy Loss*: For multi-class classification problems.
  - *Binary Cross-Entropy Loss*: For binary classification problems.

.

5. **Optimizer**: The optimizer is an algorithm used to update the weights of the network during training to minimize the loss function. Common optimizers include:

  - *Stochastic Gradient Descent (SGD)*: Simplest optimization algorithm, updates weights using a single data point, can be slow to converge.
  - *Momentum*: Extension of SGD with a momentum term, accelerates and dampens oscillations, converges faster.
  - *RMSProp*: Improvement over AdaGrad, resolves diminishing learning rate issue, suitable for non-stationary optimization problems.
  - *Adam*: Combines benefits of momentum and RMSProp, adapts learning rate for each weight, maintains smooth convergence, popular in deep learning.

.

6. **Learning rate**: The learning rate is a hyperparameter that controls the step size of the weight updates during training. A smaller learning rate will lead to slower convergence, while a larger learning rate may cause the model to overshoot the optimal weights and not converge at all.

.

7. **Epochs**: The number of epochs is the number of times the entire training dataset is passed through the network during training. Too few epochs can lead to underfitting, while too many epochs can lead to overfitting.

### Prepare lists of hyperparameters for grid search

In [None]:
# Hyperparameters for grid search
param_ActFn = ['tanh', 'relu'] # activation function
param_Layer = [2, 3]           # number of hiddent layers
param_Lrate = [0.001, 0.01]    # learning rate

# Fixed hyperparameters
noOfNeuron = 16
Epoch      = 100

# Calculate the number of cases
NoOfCases = len(param_ActFn) * len(param_Layer) * len(param_Lrate)
NoOfCases

In [None]:
# Define a function to create ANN models by inputting the hyperparameters for grid search
def ANN_model(input_data, noOfNeuron, temp_actfn, temp_layer, temp_lrate):
    keras.backend.clear_session()  # Clearing the Keras backend session (initiating variables)

    model = keras.Sequential()
    model.add(keras.layers.InputLayer(input_shape=(input_data.shape[1],)))  # Input Layer

    for i in range(temp_layer):
        model.add(keras.layers.Dense(units=noOfNeuron, activation=temp_actfn, name=f'Hidden{i+1}'))  # Hidden Layer

    model.add(keras.layers.Dense(units=2, activation='softmax', name='Output'))  # Output Layer

    model.compile(optimizer=keras.optimizers.Adam(learning_rate=temp_lrate),
                  loss=keras.losses.CategoricalCrossentropy(),
                  metrics=['accuracy'])
    return model

In [None]:
# Create an empty dataframe to store the accuracy results
Accuracy_df = pd.DataFrame(np.zeros(shape=(NoOfCases , 4)),
                           columns=['kernel', 'C', 'gamma', 'Accuracy'])
Accuracy_df

### Train the ANN models with different combinations of hyperparameters and save them

In [None]:
# Initialize a count value to store the performance of each model
cnt = 0

# Iterate through all possible combinations of activation functions, hidden layers, and learning rates
for temp_actfn in param_ActFn:          # Select each activation function in the list
    for temp_layer in param_Layer:      # Select each hidden layer configuration in the list
        for temp_lrate in param_Lrate:  # Select each learning rate value in the list
            
            # Create, train, and validate a temporary ANN model with the current combination of hyperparameters
            temp_ann_model = ANN_model(TrainData, noOfNeuron, temp_actfn, temp_layer, temp_lrate)
            temp_ann_model.fit(TrainData, TrainLabel, epochs=Epoch, verbose=0)
            Loss, Accuracy = temp_ann_model.evaluate(TestData,  TestLabel, verbose=0)

            # Save the temporary model to a file with a corresponding name
            temp_ann_model_name = f'ANN_{temp_actfn}_L{temp_layer}_LR{temp_lrate:.4f}.h5'
            temp_ann_model.save('/content/drive/MyDrive/Colab Notebooks/SavedFiles/ML_Models/GridSearch_ANN/' + temp_ann_model_name)
            
            # Store the performance (accuracy) of the temporary model in the dataframe
            Accuracy_df.iloc[cnt, :] = [temp_actfn, temp_layer, temp_lrate, Accuracy]
            cnt += 1

# Display the resulting dataframe with model performances
Accuracy_df

### Confirm the grid search results

In [None]:
# Sort the Accuracy_df by 'Accuracy' column in descending order
Accuracy_df_sorted = Accuracy_df.sort_values(by='Accuracy', ascending=False).reset_index(drop=True)

# Output the best case
print("[Best case]\nActivation Function: " + Accuracy_df_sorted.iloc[0, 0] +
      "\nHidden Layers: %d\nLearning Rate: %.4f\n\nAccuracy: %.2f" % (Accuracy_df_sorted.iloc[0, 1],
                                                                       Accuracy_df_sorted.iloc[0, 2],
                                                                       Accuracy_df_sorted.iloc[0, 3]))

In [None]:
# Calculate mean and standard deviation accuracy for each activation function
mean_accuracy_ActFn = Accuracy_df.groupby(['kernel'])['Accuracy'].agg(['mean', 'std']).reset_index()
mean_accuracy_ActFn

In [None]:
# Calculate mean and standard deviation of accuracy for each hidden layer
mean_accuracy_Layer = Accuracy_df.groupby(['C'])['Accuracy'].agg(['mean', 'std']).reset_index()
mean_accuracy_Layer

In [None]:
# Calculate mean and standard deviation of accuracy for each learning rate
mean_accuracy_Lrate = Accuracy_df.groupby(['gamma'])['Accuracy'].agg(['mean', 'std']).reset_index()
mean_accuracy_Lrate

### Visualize the performance comparison for the selected hyperparameter

In [None]:
# Set an index to select a hyperparmeter
# 0: activation function // 1: hidden layers // 2: learning rate
idx = 1

# Automatically define variables based on the selected index
H_Param = ['ActFn', 'Layer', 'Lrate']
H_Param_name = ['Activation Function', 'Hidden Layers', 'Learning Rate']
Selected = H_Param[idx]
Selected_name = H_Param_name[idx]
exec('Result = mean_accuracy_' + H_Param[idx])

xLabel = Result.iloc[:, 0]
x_pos = np.arange(Result.shape[0])
y_val = Result['mean']
y_err = Result['std']

# Draw a bar chart to compare the model performance (diagnostic accuracy) for each hyperparameter
fig, ax = plt.subplots(figsize=(10, 5))

# Create a bar plot with error bars
ax.bar(x_pos, y_val, yerr=y_err, align='center', alpha=0.5, ecolor='black', capsize=10,
       color=['tab:blue', 'tab:orange', 'tab:green', 'tab:red', 'tab:purple'])
ax.set_ylabel('Accuracy (mean)', fontsize=15)
ax.set_title(f"Model performance comparsion by '{Selected_name}'", fontsize=20)
ax.set_xticks(x_pos)
ax.set_xticklabels(xLabel, fontsize=15)
ax.yaxis.grid()

plt.tight_layout()
plt.show()

### Confusion matrix and evaluation metrics for the best ANN model

In [None]:
# Retrieve activation function, hidden layers, and learning rate values from the first row of 'Accuracy_df_sorted'
Best_ActFn = Accuracy_df_sorted.iloc[0, 0]
Best_Layer = int(Accuracy_df_sorted.iloc[0, 1])
Best_Lrate = Accuracy_df_sorted.iloc[0, 2]

# Load the best ANN model using the retrieved hyperparameters
best_ann_model_name = f'ANN_{Best_ActFn}_L{Best_Layer}_LR{Best_Lrate:.4f}.h5'
best_ann_model = keras.models.load_model('/content/drive/MyDrive/Colab Notebooks/SavedFiles/ML_Models/GridSearch_ANN/' + best_ann_model_name)

# Predict the output (Robotic spot-welding condition) for the test data
Predicted = best_ann_model.predict(TestData)

# Convert TestLabel and Predicted into vectors to calculate the confusion matrix and evaluation metrics
TestLabel_rev = np.argmax(TestLabel, axis=1)
Predicted_rev = np.argmax(Predicted, axis=1)

# Plot the confusion matrix
import seaborn as sns
from sklearn.metrics import confusion_matrix

# Calculate the confusion matrix
cm = confusion_matrix(TestLabel_rev, Predicted_rev)

plt.figure(figsize=(6, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap=plt.cm.Blues, cbar=False, square=True)
plt.xlabel("Predicted label")
plt.ylabel("True label")
plt.title("Confusion Matrix of the Best ANN Model")
plt.show()

from sklearn import metrics

# Calculate the evaluation metrics
accuracy  = metrics.accuracy_score(TestLabel_rev, Predicted_rev)
precision = metrics.precision_score(TestLabel_rev, Predicted_rev)
recall    = metrics.recall_score(TestLabel_rev, Predicted_rev)
f1_score  = metrics.f1_score(TestLabel_rev, Predicted_rev)

# Print the evaluation metrics
print("\n\n")
print(f"Best ANN Model Evaluation:\n")
print(f"Accuracy : {accuracy:.2f}")
print(f"Precision: {precision:.2f}")
print(f"Recall   : {recall:.2f}")
print(f"F1 Score : {f1_score:.2f}")