# Machine Learning for Additive Manufacturing: Melt Pool Classification

Load in relevant modules


python package dependencies: ```mendeleev, matplotlib, pandas, numpy, pylab, pprint, sklearn, scipy, os```



In [None]:
import pandas as pd
import numpy as np
import os
os.getcwd()
from meltpoolnet.ml.regression_ml import learn
from meltpoolnet.utils.utils import select_subset
from meltpoolnet.utils.utils import select_parameters, extract_features

# Load in data, perform basic analysis

In [None]:
# Load in the csv data, download from Google Sheets and store in directory
dataset_dir = os.path.join(os.path.abspath(''), '../../', 'datasets')
csv = pd.read_csv(os.path.join(dataset_dir, 'meltpoolgeometry.csv')) 
csv1 = pd.read_csv(os.path.join(dataset_dir, 'meltpoolgeometry.csv'))
csv2 = pd.read_csv(os.path.join(dataset_dir, 'meltpoolclassification.csv'))
regressioncsv =  csv1
class_csv = csv1.append(csv2)
materials = csv['Material'].value_counts()


In [None]:
print('Keys Present in CSV:')
for key in csv.keys():
    print(key)

In [None]:
def print_field_composition(csv_input, field):
    print(field) 
    unique, counts = np.unique(csv_input[field], return_counts = True)
    indices = np.argsort(counts)[::-1]
    unique_sorted = unique[indices]
    counts_sorted = counts[indices]
    for i in range(len(unique_sorted)):
        print(f'{counts_sorted[i]} entries present in {field} {unique_sorted[i]}')
print_field_composition(csv, 'Material')


In [None]:
# Examine distribution of features
for col in csv.columns:
    values = csv[col]
    num_present = len([value for value in values if not pd.isnull(value)])
    total = len(values)
    parameter = col
    print('{:.1f} % samples have '.format(100*num_present/total) + parameter + ' value')

# Machine Learning

## Helper functions for selecting data

In [None]:
# Load in the csv data, download from Google Sheets and store in directory
print("Current directory: ", os.getcwd())
csv = pd.read_csv('meltpoolgeometry.csv') 
csv1 = pd.read_csv('meltpoolgeometry.csv')
csv2 = pd.read_csv('meltpoolclassification.csv')
regressioncsv =  csv1
class_csv = csv1.append(csv2)
materials = csv['Material'].value_counts()

In [None]:
# Examine distribution of features
for col in csv.columns:
    values = csv[col]
    num_present = len([value for value in values if not pd.isnull(value)])
    total = len(values)
    parameter = col

In [None]:
# Example use case
new_csv = select_subset(class_csv, 'Sub-process','SLM')
parameter_list = ['Power', 'Velocity', 'layer thickness']
label_col = 'meltpool shape'
X, y = select_parameters(new_csv, parameter_list, label_col)


## Regression Task

Predicts the melt pool depth based on features of the build process, iterating over the algorithms:


- Random Forests 'RF'


- Gaussian Process Regression 'GPR'


- Support Vector Regression 'SVR'


- Ridge Linear Regression 'Ridge'


- Lasso Linear Regression  'Lasso'


- Gradient Boosted Random Forest 'GB'



- Neural Network 'NN'

Base ML case considers process and thermodynamic parameters, and later cells add one-hot-encoding and elemental features


### Demonstration with baseline parameters

In [None]:

parameter_list = ['Power', 'Velocity', 'density', 'Cp', 'k' , 'beam D', 'melting T' , 'layer thickness', 'absorption coefficient']
label_col = 'depth of meltpool'

# Form dataset array from dataframe, given parameter_list, label_col
# function select_parameters() does this as well

data_list = []
label_list = []

new_csv = select_subset(regressioncsv, 'Sub-process','SLM')
materials_present = []

for i in range(len(new_csv)):
    success, features, label = extract_features(new_csv.iloc[i], parameter_list, label_col)
    #breakpoint()
    if success < 0 or  new_csv['paper ID'].iloc[i] in [34]:
        
        continue
    else:
        
        data_list.append(features)
        label_list.append(label)
        materials_present.append(new_csv['Material'].iloc[i])
    materials_ohe = [np.zeros(len(materials_present)) for key in materials.keys()]
    len(materials_ohe)
    materials_ohe = np.array(materials_ohe).T
    material_pos = np.arange(len(materials.keys()))
    
    for idx in range(len(materials_present)):
        for j_idx in range(len(material_pos)):
            if materials_present[idx] == materials.keys()[j_idx]:

                materials_ohe[idx,j_idx] = 1 
X = np.array(np.squeeze(data_list))  

y = np.array(label_list)
print(len(X), 'length of X')

fit_models, train_accuracy, train_accuracy_std, test_accuracy, test_accuracy_std  = learn(X.reshape(-1, len(parameter_list)), y, parameters = parameter_list, plot = True, title = 'LPBF', model_name = 'RF')
