# Machine Learning for Additive Manufacturing: Melt Pool Classification

Load in relevant modules


python package dependencies: ```mendeleev, matplotlib, pandas, numpy, pylab, pprint, sklearn, scipy, os```



In [7]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import os
from pprint import pprint
from pylab import gca # For adjusting frame width only
from sklearn.preprocessing import StandardScaler
from meltpoolnet.utils.plotting_utils import frame_tick
os.getcwd()

'/media/cmu/DATA1/francis/Previous Projects/AdditiveNet/CNN_AM_Project/AdditiveNet'

# Load in data, perform basic analysis

In [4]:
# Load in the csv data, download from Google Sheets and store in directory
print("Current directory: ", os.getcwd())
csv = pd.read_csv('meltpoolgeometry.csv') 
csv1 = pd.read_csv('meltpoolgeometry.csv')
csv2 = pd.read_csv('meltpoolclassification.csv')
regressioncsv =  csv1
class_csv = csv1.append(csv2)
materials = csv['Material'].value_counts()


Current directory:  /media/cmu/DATA1/francis/Previous Projects/AdditiveNet/CNN_AM_Project/AdditiveNet


In [5]:
print('Keys Present in CSV:')
for key in csv.keys():
    print(key)

Keys Present in CSV:
Material
Process
Sub-process
Power
Velocity
Hatch spacing
depth of meltpool
width of melt pool
length of melt pool
d/l
d/w
l/w
E (J/mm)
E (J/mm3)
layer thickness
beam D
absorption coefficient
absorption coefficient 2
density
Cp
k
melting T
minimal absorptivity
meltpool shape
spatter
Y (wt%)
Zn (wt%)
Mg (wt%)
Si (wt%)
Al (wt%)
Sn (wt%)
Zr (wt%)
W (wt%)
Ti (wt%)
V (wt%)
Co (wt%)
Cu (wt%)
Ta (wt%)
Nb (wt%)
Ni  (wt.%)
Cr  (wt.%)
Fe (wt.%)
Mn (wt%)
Mo (wt.%)
D10
D50
D90
paper ID
paper
porosity
relative density
comment
Unnamed: 52


In [6]:
def print_field_composition(csv_input, field):
    print(field) 
    unique, counts = np.unique(csv_input[field], return_counts = True)
    indices = np.argsort(counts)[::-1]
    unique_sorted = unique[indices]
    counts_sorted = counts[indices]
    for i in range(len(unique_sorted)):
        print(f'{counts_sorted[i]} entries present in {field} {unique_sorted[i]}')
print_field_composition(csv, 'Material')


[1mMaterial[0m
639 entries present in Material SS316L
452 entries present in Material Ti-6Al-4V
147 entries present in Material IN718
103 entries present in Material SS17-4PH
97 entries present in Material IN625
50 entries present in Material IN738LC
41 entries present in Material Hastelloy X
35 entries present in Material Cu10Sn
29 entries present in Material AlSi10Mg
25 entries present in Material Al-2.5Fe
24 entries present in Material Al-C-Co-Fe-Mn-Ni
24 entries present in Material Tungsten
20 entries present in Material Ti-49Al-2Cr-2Nb
19 entries present in Material HCP Cu
18 entries present in Material Invar36
16 entries present in Material SS304
12 entries present in Material WE43
12 entries present in Material MS1-
9 entries present in Material CMSX-4
8 entries present in Material TiC/Inconel 718
6 entries present in Material SS304L
6 entries present in Material Ti6242
5 entries present in Material K403 superalloy
4 entries present in Material Ti-45Al
3 entries present in Mat

In [9]:
# Examine distribution of features
for col in csv.columns:
    values = csv[col]
    num_present = len([value for value in values if not pd.isnull(value)])
    total = len(values)
    parameter = col
    print('{:.1f} % samples have '.format(100*num_present/total) + parameter + ' value')

100.0 % samples have Material value
100.0 % samples have Process value
100.0 % samples have Sub-process value
100.0 % samples have Power value
100.0 % samples have Velocity value
23.7 % samples have Hatch spacing value
80.5 % samples have depth of meltpool value
63.6 % samples have width of melt pool value
17.5 % samples have length of melt pool value
64.2 % samples have d/l value
49.7 % samples have d/w value
35.1 % samples have l/w value
94.0 % samples have E (J/mm) value
20.4 % samples have E (J/mm3) value
79.7 % samples have layer thickness value
81.9 % samples have beam D value
59.5 % samples have absorption coefficient value
63.1 % samples have absorption coefficient 2 value
97.3 % samples have density value
97.3 % samples have Cp value
97.1 % samples have k value
100.0 % samples have melting T value
72.5 % samples have minimal absorptivity value
43.9 % samples have meltpool shape value
0.5 % samples have spatter value
100.0 % samples have Y (wt%) value
100.0 % samples have Zn (w

# Machine Learning

## Helper functions for selecting data

In [10]:
from meltpoolnet.ml.classification_ml import classify_learn
from meltpoolnet.utils.utils import select_subset, select_material, select_process
from meltpoolnet.utils.utils import select_parameters, extract_features

In [12]:
# Load in the csv data, download from Google Sheets and store in directory
print("Current directory: ", os.getcwd())
csv = pd.read_csv('meltpoolgeometry.csv') 
csv1 = pd.read_csv('meltpoolgeometry.csv')
csv2 = pd.read_csv('meltpoolclassification.csv')
regressioncsv =  csv1
class_csv = csv1.append(csv2)
materials = csv['Material'].value_counts()

Current directory:  /media/cmu/DATA1/francis/Previous Projects/AdditiveNet/CNN_AM_Project/AdditiveNet


In [13]:
# Examine distribution of features
for col in csv.columns:
    values = csv[col]
    num_present = len([value for value in values if not pd.isnull(value)])
    total = len(values)
    parameter = col

In [14]:
# Example use case
new_csv = select_subset(class_csv, 'Sub-process','SLM')
parameter_list = ['Power', 'Velocity', 'layer thickness']
label_col = 'meltpool shape'
X, y = select_parameters(new_csv, parameter_list, label_col)


## Classification task 

Predicts the melt pool shape based on features of the build process. The potential classes are `spatter`, `balling`, `keyhole`, `desirable`, and `LOF`  describing either the type of defect, or indicating that there is  no defect in the `desirable` case. The following algorithms are used to generate predictions, and their accuracies are compared.


- Random Forests 'RF'


- Gaussian Process Classification 'GPC'


- Support Vector Classification 'SVC'


- Logistic Regression 'Logistic Regression'


- Gradient Boosted Random Forest 'GB'



- Neural Network 'NN'

The base ML case considers process and thermodynamic parameters.

In [16]:
atomic_features = []
for col in csv.columns:
   # print('%' in col)
    if '%' in col:
        atomic_features.append(col)
atomic_features

['Y (wt%)',
 'Zn (wt%)',
 'Mg (wt%)',
 'Si (wt%)',
 'Al (wt%)',
 'Sn (wt%)',
 'Zr (wt%)',
 'W (wt%)',
 'Ti (wt%)',
 'V (wt%)',
 'Co (wt%)',
 'Cu (wt%)',
 'Ta (wt%)',
 'Nb (wt%)',
 'Ni  (wt.%)',
 'Cr  (wt.%)',
 'Fe (wt.%)',
 'Mn (wt%)',
 'Mo (wt.%)']

### Demonstration with baseline parameters

In [23]:
%matplotlib inline
# Define parameters that models will be trained on and value to be predicted
parameter_list = ['Power', 'Velocity', 'density', 'Cp', 'k' , 'beam D', 'melting T', 'layer thickness', 'absorption coefficient', 'absorption coefficient 2']
parameter_list.extend(atomic_features)
label_col = 'meltpool shape'

# Form dataset array from dataframe, given parameter_list, label_col
# function select_parameters() does this as well

data_list = []
label_list = []
new_csv = select_subset(class_csv, 'Sub-process', 'SLM')

test = 0
for i in range(len(new_csv)):
    success, features, label = extract_features(new_csv.iloc[i], parameter_list, label_col)
    if success < 0:
        continue
    else:
        test += 1
        data_list.append(features)
        label_list.append(label)

        
X= np.array(np.squeeze(data_list))
labels = np.unique(label_list)

# Define labels from csv file
label_id = np.arange(len(labels))
class_labels = np.zeros(len(label_list))
for idx, sample in enumerate(label_list):
    for l_id, lbl in enumerate(labels):
        if lbl == sample:
            class_labels[idx] = l_id
y = np.array(class_labels)

 
class_labels = np.array(class_labels,dtype = 'int')

print(X.shape, y.shape)
fit_models, train_accuracy, train_accuracy_std, test_accuracy, test_accuracy_std = classify_learn(data = X.reshape(-1, len(X[0])), 
                                                                                                    labels = y, 
                                                                                                    label_names = labels,
                                                                                                    plot = False, 
                                                                                                    parameters = parameter_list, 
                                                                                                    parameter_list  = parameter_list,
                                                                                                    title = 'L-PBF', 
                                                                                                    prefix = 'classification', 
                                                                                                    model_name = 'RF')

(252, 29) (252,)
RF Train Accuracy: 1.00000 ± 0.00000, Test Accuracy: 0.85678 ± 0.05490


### Demonstration with thermodynamic parameters, composition, and one hot encoded material features

In [28]:
%matplotlib inline
# Define parameters that models will be trained on and value to be predicted
parameter_list = ['Power', 'Velocity', 'density', 'Cp', 'k' , 'beam D', 'melting T', 'layer thickness', 'absorption coefficient']
parameter_list.extend(atomic_features)
label_col = 'meltpool shape'


#Form dataset array from dataframe, given parameter_list, label_col
# function select_parameters() does this as well
data_list = []
label_list = []
materials_present = []

new_csv = select_process(class_csv, 'PBF')
test = 0
for i in range(len(new_csv)):
    success, features, label = extract_features(new_csv.iloc[i], parameter_list, label_col)
    if success < 0:
      #  print("CONTINUE")
        continue
    else:
        test += 1
        data_list.append(features)
        label_list.append(label)
        materials_present.append(new_csv['Material'].iloc[i])

print('length in PBF data: ', len(new_csv))
print('Used data points: ', test)
X= np.array(np.squeeze(data_list))
labels = np.unique(label_list)

# Define labels from csv file
label_id = np.arange(len(labels))
class_labels = np.zeros(len(label_list))
for idx, sample in enumerate(label_list):
    for l_id, lbl in enumerate(labels):
        if lbl == sample:
            class_labels[idx] = l_id
y = np.array(class_labels)

# Define One Hot Encoding of materials
materials_ohe = [np.zeros(len(materials_present)) for key in materials.keys()]

materials_ohe = np.array(materials_ohe).T

print(len(materials_ohe))
material_pos = np.arange(len(materials.keys()))
for idx in range(len(materials_present)):
    for j_idx in range(len(material_pos)):
        if materials_present[idx] == materials.keys()[j_idx]:
            materials_ohe[idx,j_idx] = 1 

 
class_labels = np.array(class_labels,dtype = 'int')


fit_models_wtohe, \
    train_accuracy_wtohe, \
        train_accuracy_std_wtohe, \
            test_accuracy_wtohe, \
                test_accuracy_std_wtohe = classify_learn(X.reshape(-1, len(X[0])), 
                                                        y, 
                                                        labels, 
                                                        parameters = parameter_list, 
                                                        parameter_list = parameter_list,
                                                        title = 'PBF, PT_wt_OHE', 
                                                        plot = False, 
                                                        prefix = 'classification', 
                                                        model_name = 'NN')


length in PBF data:  2178
Used data points:  358
358
NN Train Accuracy: 0.96996 ± 0.01376, Test Accuracy: 0.81549 ± 0.04534
