# Machine Learning for Additive Manufacturing: Melt Pool Classification

Load in relevant modules


python package dependencies: ```mendeleev, matplotlib, pandas, numpy, pylab, pprint, sklearn, scipy, os```



In [None]:
import pandas as pd
import numpy as np
import os
import pandas as pd
import numpy as np
import os
from pylab import * # For adjusting frame width only
from meltpoolnet.ml.classification_ml import classify_learn
from meltpoolnet.utils.utils import select_subset, select_process
from meltpoolnet.utils.utils import select_parameters, extract_features

os.getcwd()

# Load in data, perform basic analysis

In [None]:
# Load in the csv data, download from Google Sheets and store in directory
dataset_dir = os.path.join(os.path.abspath(''), '../../', 'datasets')
csv = pd.read_csv(os.path.join(dataset_dir, 'meltpoolgeometry.csv')) 
csv1 = pd.read_csv(os.path.join(dataset_dir, 'meltpoolgeometry.csv'))
csv2 = pd.read_csv(os.path.join(dataset_dir, 'meltpoolclassification.csv'))
regressioncsv =  csv1
class_csv = csv1.append(csv2)
materials = csv['Material'].value_counts()


In [None]:
print('Keys Present in CSV:')
for key in csv.keys():
    print(key)

In [None]:
def print_field_composition(csv_input, field):
    print(field) 
    unique, counts = np.unique(csv_input[field], return_counts = True)
    indices = np.argsort(counts)[::-1]
    unique_sorted = unique[indices]
    counts_sorted = counts[indices]
    for i in range(len(unique_sorted)):
        print(f'{counts_sorted[i]} entries present in {field} {unique_sorted[i]}')
print_field_composition(csv, 'Material')


In [None]:
# Examine distribution of features
for col in csv.columns:
    values = csv[col]
    num_present = len([value for value in values if not pd.isnull(value)])
    total = len(values)
    parameter = col
    print('{:.1f} % samples have '.format(100*num_present/total) + parameter + ' value')

# Machine Learning

## Helper functions for selecting data

In [None]:
# Load in the csv data, download from Google Sheets and store in directory
print("Current directory: ", os.getcwd())
dataset_dir = os.path.join(os.path.abspath(''), '../../', 'datasets')
csv = pd.read_csv(os.path.join(dataset_dir, 'meltpoolgeometry.csv')) 
csv1 = pd.read_csv(os.path.join(dataset_dir, 'meltpoolgeometry.csv'))
csv2 = pd.read_csv(os.path.join(dataset_dir, 'meltpoolclassification.csv'))
regressioncsv =  csv1
class_csv = csv1.append(csv2)
materials = csv['Material'].value_counts()

In [None]:
# Examine distribution of features
for col in csv.columns:
    values = csv[col]
    num_present = len([value for value in values if not pd.isnull(value)])
    total = len(values)
    parameter = col

In [None]:
# Example use case
new_csv = select_subset(class_csv, 'Sub-process','SLM')
parameter_list = ['Power', 'Velocity', 'layer thickness']
label_col = 'meltpool shape'
X, y = select_parameters(new_csv, parameter_list, label_col)


## Classification task 

Predicts the melt pool shape based on features of the build process. The potential classes are `spatter`, `balling`, `keyhole`, `desirable`, and `LOF`  describing either the type of defect, or indicating that there is  no defect in the `desirable` case. The following algorithms are used to generate predictions, and their accuracies are compared.


- Random Forests 'RF'


- Gaussian Process Classification 'GPC'


- Support Vector Classification 'SVC'


- Logistic Regression 'Logistic Regression'


- Gradient Boosted Random Forest 'GB'



- Neural Network 'NN'

The base ML case considers process and thermodynamic parameters.

In [None]:
atomic_features = []
for col in csv.columns:
   # print('%' in col)
    if '%' in col:
        atomic_features.append(col)
atomic_features

### Demonstration with baseline parameters

In [None]:
%matplotlib inline
# Define parameters that models will be trained on and value to be predicted
parameter_list = ['Power', 'Velocity', 'density', 'Cp', 'k' , 'beam D', 'melting T', 'layer thickness', 'absorption coefficient', 'absorption coefficient 2']
parameter_list.extend(atomic_features)
label_col = 'meltpool shape'

# Form dataset array from dataframe, given parameter_list, label_col
# function select_parameters() does this as well

data_list = []
label_list = []
new_csv = select_subset(class_csv, 'Sub-process', 'SLM')

test = 0
for i in range(len(new_csv)):
    success, features, label = extract_features(new_csv.iloc[i], parameter_list, label_col)
    if success < 0:
        continue
    else:
        test += 1
        data_list.append(features)
        label_list.append(label)

        
X= np.array(np.squeeze(data_list))
labels = np.unique(label_list)

# Define labels from csv file
label_id = np.arange(len(labels))
class_labels = np.zeros(len(label_list))
for idx, sample in enumerate(label_list):
    for l_id, lbl in enumerate(labels):
        if lbl == sample:
            class_labels[idx] = l_id
y = np.array(class_labels)

 
class_labels = np.array(class_labels,dtype = 'int')

print(X.shape, y.shape)
fit_models, train_accuracy, train_accuracy_std, test_accuracy, test_accuracy_std = classify_learn(data = X.reshape(-1, len(X[0])), 
                                                                                                    labels = y, 
                                                                                                    label_names = labels,
                                                                                                    plot = False, 
                                                                                                    parameters = parameter_list, 
                                                                                                    parameter_list  = parameter_list,
                                                                                                    title = 'L-PBF', 
                                                                                                    prefix = 'classification', 
                                                                                                    model_name = 'RF')

### Demonstration with thermodynamic parameters, composition, and one hot encoded material features

In [None]:
%matplotlib inline
# Define parameters that models will be trained on and value to be predicted
parameter_list = ['Power', 'Velocity', 'density', 'Cp', 'k' , 'beam D', 'melting T', 'layer thickness', 'absorption coefficient']
parameter_list.extend(atomic_features)
label_col = 'meltpool shape'


#Form dataset array from dataframe, given parameter_list, label_col
# function select_parameters() does this as well
data_list = []
label_list = []
materials_present = []

new_csv = select_process(class_csv, 'PBF')
test = 0
for i in range(len(new_csv)):
    success, features, label = extract_features(new_csv.iloc[i], parameter_list, label_col)
    if success < 0:
      #  print("CONTINUE")
        continue
    else:
        test += 1
        data_list.append(features)
        label_list.append(label)
        materials_present.append(new_csv['Material'].iloc[i])

print('length in PBF data: ', len(new_csv))
print('Used data points: ', test)
X= np.array(np.squeeze(data_list))
labels = np.unique(label_list)

# Define labels from csv file
label_id = np.arange(len(labels))
class_labels = np.zeros(len(label_list))
for idx, sample in enumerate(label_list):
    for l_id, lbl in enumerate(labels):
        if lbl == sample:
            class_labels[idx] = l_id
y = np.array(class_labels)

# Define One Hot Encoding of materials
materials_ohe = [np.zeros(len(materials_present)) for key in materials.keys()]

materials_ohe = np.array(materials_ohe).T

print(len(materials_ohe))
material_pos = np.arange(len(materials.keys()))
for idx in range(len(materials_present)):
    for j_idx in range(len(material_pos)):
        if materials_present[idx] == materials.keys()[j_idx]:
            materials_ohe[idx,j_idx] = 1 

 
class_labels = np.array(class_labels,dtype = 'int')


fit_models_wtohe, \
    train_accuracy_wtohe, \
        train_accuracy_std_wtohe, \
            test_accuracy_wtohe, \
                test_accuracy_std_wtohe = classify_learn(X.reshape(-1, len(X[0])), 
                                                        y, 
                                                        labels, 
                                                        parameters = parameter_list, 
                                                        parameter_list = parameter_list,
                                                        title = 'PBF, PT_wt_OHE', 
                                                        plot = False, 
                                                        prefix = 'classification', 
                                                        model_name = 'NN')
