# LDED Audiovisual Fusion 

Author: Chen Lequn.
Created on 11 Oct 2023.

- Material: Maraging Steel 300
- Process: Robotic Laser-directed energy deposition
- Recorded data: position, veolocity, coaxial ccd features, audio feature
- Quality labels generated: keyhole pores, cracks, defect-free

### Notebook 3: Melt pool visual feature prediction using audio feature: baseline regression model
In this notebook:
- develop a ML model to predict representative  melt pool visual features (i.e., ellipse width) using audio features
- We use handcrafted features extracted from previous steps
- This serves as our baseline model

## System setup

In [49]:
from sklearnex import patch_sklearn, config_context
import dpctl
patch_sklearn()

Intel(R) Extension for Scikit-learn* enabled (https://github.com/intel/scikit-learn-intelex)


In [2]:
## Required python libraries
import pandas as pd
import numpy as np
import scipy as sp
import scipy.signal
import os
import math
import sys
from scipy.interpolate import griddata

# to make this notebook's output stable across runs
np.random.seed(42)

pd.plotting.register_matplotlib_converters()

import time
import matplotlib as mpl
import matplotlib 
import matplotlib.pyplot as plt
mpl.rc('axes', labelsize=14)
mpl.rc('xtick', labelsize=12)
mpl.rc('ytick', labelsize=12)
%matplotlib inline
import seaborn as sns
import matplotlib.font_manager as font_manager

import os
from pathlib import Path
import json
import h5py
import joblib
from tqdm import tqdm
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, classification_report
from sklearn.ensemble import RandomForestClassifier
# import backoff
from scipy.interpolate import BSpline
import numpy as np
import matplotlib.pyplot as plt
import time
import numpy as np
import pandas as pd
import seaborn as sns
from collections import Counter
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
from sklearn.preprocessing import LabelEncoder
import matplotlib.patches
from sklearn.manifold import TSNE
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import RobustScaler
from sklearn.preprocessing import StandardScaler
from sklearn import (manifold, datasets, decomposition, ensemble,random_projection)
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from tqdm.notebook import tqdm

In [30]:
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.neural_network import MLPRegressor
from xgboost import XGBRegressor
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV, KFold
from sklearn.ensemble import RandomForestRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.ensemble import GradientBoostingRegressor
from xgboost import XGBRegressor
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeRegressor
from sklearn.linear_model import Ridge
from sklearn.neighbors import KNeighborsRegressor
from sklearn.svm import SVR
from lightgbm import LGBMRegressor

In [6]:
plt.rcParams["axes.edgecolor"] = "black"
plt.rcParams["axes.linewidth"] = 2.50

# Set the random seed for reproducibility
np.random.seed(42)

PROJECT_ROOT_DIR = "../"
IMAGE_PATH = os.path.join(PROJECT_ROOT_DIR, "result_images", 'meltpool prediction', 'basline regression')
model_path = os.path.join(PROJECT_ROOT_DIR, "trained_models", 'meltpool prediction', 'basline')
os.makedirs(IMAGE_PATH, exist_ok=True)
os.makedirs(model_path, exist_ok=True)

Multimodal_dataset_PATH = "/home/chenlequn/Dataset/LDED_acoustic_visual_monitoring_dataset"
Dataset_path = os.path.join(Multimodal_dataset_PATH, f'25Hz')
                            

## function for automatically save the diagram/graph into the folder 
def save_fig(fig_id, tight_layout=True, fig_extension="png", resolution=300):
    path = os.path.join(IMAGE_PATH, fig_id + "." + fig_extension)
    print("Saving figure", fig_id)
    if tight_layout:
        plt.tight_layout()
    plt.savefig(path, format=fig_extension, dpi=resolution)

import warnings
warnings.filterwarnings(action="ignore", message="^internal gelsd")

plt.rcParams["axes.edgecolor"] = "black"
plt.rcParams["axes.linewidth"] = 2.50

## Retrieve multisensor dataframe

In [8]:
# Read the data back from the HDF5 file into a new DataFrame
df_multimodal = pd.read_hdf(os.path.join(Dataset_path, 'spatiotemporal_fused_multimodal.h5'), key='df')
# df_multimodal = df_multimodal.dropna(subset=['class_name'])
df_multimodal

Unnamed: 0,sample index,Time_Stamps,audio_file_name,image_file_name,class_name,class_name_v2,Layer number,Sample number,rms_energy,amplitude_envelope_mean,...,center_y,contour_area,contour_length,X,Y,Z,Vx,Vy,Vz,Speed
0,1,0.00,sample_21_1.wav,sample_21_1.jpg,Laser-off,Laser-off,1.0,21,0.009018,0.003034,...,0.000000,0.0,0.000000,0.000,0.000,0.000,0.000,-0.000,0.000,0.000
1,2,0.04,sample_21_2.wav,sample_21_2.jpg,Defect-free,Defect-free,1.0,21,0.019286,0.012109,...,239.000000,305442.0,2234.000000,0.000,-0.000,0.001,-0.074,-0.074,0.000,0.105
2,3,0.08,sample_21_3.wav,sample_21_3.jpg,Defect-free,Defect-free,1.0,21,0.019593,0.015114,...,239.000000,305442.0,2234.000000,-0.010,0.015,-0.007,-0.051,0.026,-0.051,0.077
3,4,0.12,sample_21_4.wav,sample_21_4.jpg,Defect-free,Defect-free,1.0,21,0.030937,0.021501,...,230.264496,291865.5,2279.781744,-0.007,0.016,0.010,0.173,-0.222,2.520,2.536
4,5,0.16,sample_21_5.wav,sample_21_5.jpg,Defect-free,Defect-free,1.0,21,0.038329,0.029851,...,229.109962,281970.0,2407.847760,-0.111,0.123,-0.057,-1.864,1.662,-2.468,3.511
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
61989,13524,540.92,sample_32_13524.wav,sample_32_13524.jpg,,,,32,0.048554,0.036195,...,0.000000,0.0,0.000000,3.017,22.125,7.629,1.104,-24.165,0.427,24.194
61990,13525,540.96,sample_32_13525.wav,sample_32_13525.jpg,,,,32,0.040282,0.032946,...,0.000000,0.0,0.000000,3.011,21.169,7.622,-0.804,-24.051,-0.553,24.071
61991,13526,541.00,sample_32_13526.wav,sample_32_13526.jpg,,,,32,0.052285,0.042469,...,0.000000,0.0,0.000000,2.999,20.204,7.613,0.049,-23.766,0.636,23.775
61992,13527,541.04,sample_32_13527.wav,sample_32_13527.jpg,,,,32,0.023477,0.016409,...,0.000000,0.0,0.000000,2.997,19.250,7.622,-0.811,-24.167,-0.634,24.189


In [9]:
df_multimodal_97 = pd.read_hdf(os.path.join(Dataset_path, 'spatiotemporal_fused_multimodal_dropped_97.h5'), key='df')
df_multimodal_97

Unnamed: 0,sample index,Time_Stamps,audio_file_name,image_file_name,class_name,class_name_v2,Layer number,Sample number,amplitude_envelope_std,zero_crossing_rate,...,nu03,center_y,contour_area,X,Y,Z,Vx,Vy,Vz,Speed
0,1,0.00,sample_21_1.wav,sample_21_1.jpg,Laser-off,Laser-off,1.0,21,0.003803,0.129819,...,0.000000,0.000000,0.0,0.000,0.000,0.000,0.000,-0.000,0.000,0.000
1,2,0.04,sample_21_2.wav,sample_21_2.jpg,Defect-free,Defect-free,1.0,21,0.006188,0.210317,...,0.000000,239.000000,305442.0,0.000,-0.000,0.001,-0.074,-0.074,0.000,0.105
2,3,0.08,sample_21_3.wav,sample_21_3.jpg,Defect-free,Defect-free,1.0,21,0.005868,0.200680,...,0.000000,239.000000,305442.0,-0.010,0.015,-0.007,-0.051,0.026,-0.051,0.077
3,4,0.12,sample_21_4.wav,sample_21_4.jpg,Defect-free,Defect-free,1.0,21,0.010559,0.160431,...,0.000891,230.264496,291865.5,-0.007,0.016,0.010,0.173,-0.222,2.520,2.536
4,5,0.16,sample_21_5.wav,sample_21_5.jpg,Defect-free,Defect-free,1.0,21,0.013688,0.113379,...,0.001388,229.109962,281970.0,-0.111,0.123,-0.057,-1.864,1.662,-2.468,3.511
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
48461,10982,439.24,sample_26_10982.wav,sample_26_10982.jpg,Laser-off,Laser-off,,26,0.003904,0.018707,...,0.000000,0.000000,0.0,0.000,0.000,0.000,-0.076,0.025,0.000,0.080
48462,10983,439.28,sample_26_10983.wav,sample_26_10983.jpg,Laser-off,Laser-off,,26,0.003369,0.023810,...,0.000000,0.000000,0.0,0.000,0.000,0.000,-0.076,0.025,0.000,0.080
48463,10984,439.32,sample_26_10984.wav,sample_26_10984.jpg,Laser-off,Laser-off,,26,0.003854,0.017007,...,0.000000,0.000000,0.0,0.000,0.000,0.000,-0.076,0.025,0.000,0.080
48464,10985,439.36,sample_26_10985.wav,sample_26_10985.jpg,Laser-off,Laser-off,,26,0.003729,0.013605,...,0.000000,0.000000,0.0,0.000,0.000,0.000,-0.076,0.025,0.000,0.080


### Split data into Features X and Lable Y

In [51]:
# List of audio features to be used
audio_features = [
        'rms_energy', 'amplitude_envelope_mean','amplitude_envelope_std', 'zero_crossing_rate', 'dynamic_complexity',
        'loudness', 'loudness_vickers', 'spectral_centroid_mean', 'spectral_centroid_std', 'spectral_complexity_mean',
        'spectral_complexity_std', 'spectral_contrast_0_mean', 'spectral_contrast_0_std', 'spectral_contrast_1_mean',
        'spectral_contrast_1_std', 'spectral_contrast_2_mean', 'spectral_contrast_2_std', 'spectral_contrast_3_mean',
        'spectral_contrast_3_std', 'spectral_contrast_4_mean', 'spectral_contrast_4_std', 'spectral_contrast_5_mean',
        'spectral_contrast_5_std', 'spectral_valley_0_mean', 'spectral_valley_0_std', 'spectral_valley_1_mean',
        'spectral_valley_1_std', 'spectral_valley_2_mean', 'spectral_valley_2_std', 'spectral_valley_3_mean',
        'spectral_valley_3_std', 'spectral_valley_4_mean','spectral_valley_4_std', 'spectral_valley_5_mean',
        'spectral_valley_5_std', 'spectral_decrease_mean', 'spectral_decrease_std', 'spectral_energy_mean', 'spectral_energy_std',
        'spectral_energy_band_ratio_mean', 'spectral_energy_band_ratio_std',
        'spectral_flatness_mean', 'spectral_flatness_std', 'spectral_flux_mean', 'spectral_flux_std', 'spectral_rolloff_mean', 'spectral_rolloff_std',
        'spectral_strong_peak_mean', 'spectral_strong_peak_std', 'spectral_variance_mean', 'spectral_variance_std',
        'spectral_skewness_mean', 'spectral_skewness_std','spectral_kurtosis_mean', 'spectral_kurtosis_std',
        'spectral_crest_factor_mean', 'spectral_crest_factor_std','mfcc_0_mean', 'mfcc_0_std', 'mfcc_1_mean', 'mfcc_1_std', 'mfcc_2_mean',
        'mfcc_2_std', 'mfcc_3_mean', 'mfcc_3_std', 'mfcc_4_mean', 'mfcc_4_std',
        'mfcc_5_mean', 'mfcc_5_std', 'mfcc_6_mean', 'mfcc_6_std', 'mfcc_7_mean',
        'mfcc_7_std', 'mfcc_8_mean', 'mfcc_8_std', 'mfcc_9_mean', 'mfcc_9_std',
        'mfcc_10_mean', 'mfcc_10_std', 'mfcc_11_mean', 'mfcc_11_std','mfcc_12_mean', 'mfcc_12_std'
    ]

redundant_audio_features_090 = ['spectral_decrease_mean', 'spectral_decrease_std', 'amplitude_envelope_std', 
                               'amplitude_envelope_mean', 'rms_energy', 'spectral_energy_mean', 'spectral_energy_std', 
                               'spectral_skewness_mean', 'spectral_energy_band_ratio_mean', 'zero_crossing_rate', 
                               'spectral_valley_1_mean', 'mfcc_0_mean', 'spectral_strong_peak_std','dynamic_complexity','loudness']

# Visual features to be predicted
target_visual_features = ['max_contour_area', 'ellipse_width', 'contour_length', 'rectangle_width']

# Filter out the columns based on selected features
X = df_multimodal[audio_features].drop(columns=redundant_audio_features_090).to_numpy() 
Y = df_multimodal[target_visual_features].to_numpy() 

# Split the target Y into separate arrays for each visual feature
Y_max_contour = Y[:, 0]
Y_ellipse_width = Y[:, 1]
Y_contour_length = Y[:, 2]
Y_rectangle_width = Y[:, 3]

# Show the shape of each target array to confirm the split
Y_max_contour.shape, Y_ellipse_width.shape, Y_contour_length.shape, Y_rectangle_width.shape

((61994,), (61994,), (61994,), (61994,))

In [52]:
print (X.shape)
print (Y.shape)

(61994, 68)
(61994, 4)


## Split the dataset into *train*, *validation*, and *test*
- Ratio: 0.8, 0.1, 0.1

In [53]:
from sklearn.model_selection import train_test_split

# Define the split ratio
train_ratio = 0.8
val_ratio = 0.1
test_ratio = 0.1

# First, split the dataset into training and temporary sets (to be further divided into validation and test sets)
X_temp, X_test, Y_max_contour_temp, Y_max_contour_test = train_test_split(X, Y_max_contour, test_size=test_ratio, random_state=42)
X_train, X_val, Y_max_contour_train, Y_max_contour_val = train_test_split(X_temp, Y_max_contour_temp, 
                                                                          test_size=val_ratio/(train_ratio + val_ratio), random_state=42)

# Repeat the same for other target variables
_, _, Y_ellipse_width_temp, Y_ellipse_width_test = train_test_split(X, Y_ellipse_width, test_size=test_ratio, random_state=42)
_, _, Y_ellipse_width_train, Y_ellipse_width_val = train_test_split(X_temp, Y_ellipse_width_temp, 
                                                                    test_size=val_ratio/(train_ratio + val_ratio), random_state=42)

_, _, Y_contour_length_temp, Y_contour_length_test = train_test_split(X, Y_contour_length, test_size=test_ratio, random_state=42)
_, _,Y_contour_length_train, Y_contour_length_val = train_test_split(X_temp, Y_contour_length_temp, 
                                                                      test_size=val_ratio/(train_ratio + val_ratio), random_state=42)

_, _, Y_rectangle_width_temp, Y_rectangle_width_test = train_test_split(X, Y_rectangle_width, test_size=test_ratio, random_state=42)
_, _, Y_rectangle_width_train,  Y_rectangle_width_val = train_test_split(X_temp, Y_rectangle_width_temp, 
                                                                        test_size=val_ratio/(train_ratio + val_ratio), random_state=42)

# Show the shape of each split to confirm the process
(X_train.shape, X_val.shape, X_test.shape), (Y_max_contour_train.shape, Y_max_contour_val.shape, Y_max_contour_test.shape)

(((49594, 68), (6200, 68), (6200, 68)), ((49594,), (6200,), (6200,)))

In [54]:
(Y_contour_length_train.shape, Y_contour_length_val.shape, Y_contour_length_test.shape)

((49594,), (6200,), (6200,))

In [55]:
(Y_rectangle_width_train.shape, Y_rectangle_width_val.shape, Y_rectangle_width_test.shape)

((49594,), (6200,), (6200,))

In [56]:
(Y_ellipse_width_train.shape, Y_ellipse_width_val.shape, Y_ellipse_width_test.shape)

((49594,), (6200,), (6200,))

# 1. Max Contour Modelling

### Hyperparameter tuning (Grid Search)

Select a `20%` random subset of the training data for hyperparameter tuning

In [57]:
# Select a 20% random subset of the training data for hyperparameter tuning
subset_size = int(0.2 * X_train.shape[0])
random_indices = np.random.choice(X_train.shape[0], subset_size, replace=False)

# Subset for X and Y_max_contour
X_train_subset = X_train[random_indices, :]
Y_max_contour_train_subset = Y_max_contour_train[random_indices]

X_train_subset.shape, Y_max_contour_train_subset.shape

((9918, 68), (9918,))

In [None]:
# Define the parameter grids
param_grids = {
    'RandomForestRegressor': {
        'n_estimators': [100, 200, 400],
        'max_depth': [None, 30, 100, 500],
        'min_samples_split': [2, 5, 10, 20],
        'min_samples_leaf': [1, 4, 10]
    },
    'DecisionTreeRegressor': {
        'max_depth': [None, 10, 20, 30],
        'min_samples_split': [2, 5, 10]
    },
    'Ridge': {
        'alpha': [0.01,  0.1, 1, 10, 100],
        'solver': ['auto', 'svd', 'cholesky', 'lsqr', 'sparse_cg', 'sag', 'saga']
    },
    'SVR': {
        'estimator__C': [0.1, 1],
        'estimator__epsilon': [0.01, 0.1],
        'estimator__kernel': ['linear', 'rbf']
    },
    'KNeighborsRegressor': {
        'n_neighbors': [3, 5, 7],
        'weights': ['uniform', 'distance'],
        'metric': ['euclidean', 'manhattan']
    },
    'MLPRegressor': {
        'mlpregressor__hidden_layer_sizes': [(16,64,256), (32,64,256), (100, 100, 100)],
        'mlpregressor__activation': ['relu', 'tanh'],
        'mlpregressor__solver': ['adam'],
        'mlpregressor__learning_rate_init': [0.001, 0.01],
        'mlpregressor__max_iter': [10000],
    },
    'GaussianProcessRegressor': {
        'normalize_y': [True, False],
        'alpha': [1e-10, 1e-9]
    },
    'GradientBoostingRegressor': {
        'n_estimators': [50, 100],
        'learning_rate': [0.01, 0.1],
        'max_depth': [3, 5]
    },
    'XGBRegressor': {
        'n_estimators': [50, 100],
        'learning_rate': [0.01, 0.1],
        'max_depth': [3, 5]
    },
    'LGBMRegressor': {
        'n_estimators': [50, 100, 150],
        'learning_rate': [0.01, 0.1],
        'max_depth': [3, 5]
    }
}

# Models to train
models_to_train = {
    'Ridge': Ridge(),
    'KNeighborsRegressor': KNeighborsRegressor(),
    'RandomForestRegressor': RandomForestRegressor(random_state=42),
    'DecisionTreeRegressor': DecisionTreeRegressor(random_state=42),
    'MLPRegressor': make_pipeline(StandardScaler(), MLPRegressor(random_state=42)),
    'SVR': SVR(),
    'GaussianProcessRegressor': GaussianProcessRegressor(random_state=42),
    'GradientBoostingRegressor': GradientBoostingRegressor(random_state=42),
    'XGBRegressor': XGBRegressor(random_state=42),
    'LGBMRegressor': LGBMRegressor(random_state=42)
}

# Initialize KFold for cross-validation
kf = KFold(n_splits=5, shuffle=True, random_state=42)

# For storing the best parameters and scores
best_params = {}
best_scores = {}

for model_name, model in models_to_train.items():
    print(f"\nPerforming GridSearchCV for {model_name}...")
    
    grid_search = GridSearchCV(estimator=model,
                               param_grid=param_grids[model_name],
                               cv=kf, n_jobs=-1, verbose=3)
    grid_search.fit(X_train_subset, Y_max_contour_train_subset)
    
    best_params[model_name] = grid_search.best_params_
    best_scores[model_name] = grid_search.best_score_

best_params, best_scores



Performing GridSearchCV for RandomForestRegressor...
Fitting 5 folds for each of 144 candidates, totalling 720 fits
[CV 2/5] END max_depth=None, min_samples_leaf=1, min_samples_split=2, n_estimators=100;, score=0.262 total time=  14.3s
[CV 5/5] END max_depth=None, min_samples_leaf=1, min_samples_split=2, n_estimators=150;, score=0.248 total time=  21.0s
[CV 3/5] END max_depth=None, min_samples_leaf=1, min_samples_split=5, n_estimators=150;, score=0.255 total time=  18.7s
[CV 5/5] END max_depth=None, min_samples_leaf=1, min_samples_split=10, n_estimators=100;, score=0.239 total time=  10.4s
[CV 3/5] END max_depth=None, min_samples_leaf=1, min_samples_split=20, n_estimators=50;, score=0.262 total time=   5.6s
[CV 2/5] END max_depth=None, min_samples_leaf=1, min_samples_split=20, n_estimators=100;, score=0.257 total time=   9.0s
[CV 5/5] END max_depth=None, min_samples_leaf=1, min_samples_split=20, n_estimators=150;, score=0.243 total time=  13.8s
[CV 3/5] END max_depth=None, min_samples