# Machine Learning for Additive Manufacturing

Load in relevant modules


python package dependencies: ```mendeleev, matplotlib, pandas, numpy, pylab, pprint, sklearn, scipy, os```



In [None]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import os
from pprint import pprint
from meltpoolnet.utils.plotting_utils import frame_tick
%matplotlib inline
os.getcwd()

# Load in data, perform basic analysis

In [None]:
# Load in the csv data, download from Google Sheets and store in directory
dataset_dir = os.path.join(os.path.abspath(''), '../../', 'datasets')
csv = pd.read_csv(os.path.join(dataset_dir, 'meltpoolgeometry.csv')) 
csv1 = pd.read_csv(os.path.join(dataset_dir, 'meltpoolgeometry.csv'))
csv2 = pd.read_csv(os.path.join(dataset_dir, 'meltpoolclassification.csv'))
regressioncsv =  csv1
class_csv = csv1.append(csv2)
regressioncsv =  csv1
class_csv = csv1.append(csv2)

In [None]:
all_depths = csv['depth of meltpool']
all_velocity = csv['Velocity']
all_power = csv['Power']
print(len(csv))
csv = csv[all_velocity < 3000][all_power < 3000]
print(len(csv))

In [None]:
all_depths = csv['depth of meltpool']
all_velocity = csv['Velocity']
all_power = csv['Power']
print(len(csv))
csv = csv[all_velocity < 3000][all_power < 3000]
print(len(csv))

In [None]:
dpi = 300
fig = plt.figure(figsize = [4,3], dpi = dpi)
plt.scatter(all_power, all_depths, edgecolors = 'k')

plt.xticks(fontsize = 8)
plt.yticks(fontsize = 8)
frame_tick()
plt.xlabel(r'Laser Power (W)')
plt.ylabel(r'Melt Pool Depths ($\mu m$)')
plt.title('Entire Dataset')

In [None]:
fig = plt.figure(figsize = [4,3], dpi = dpi)
plt.scatter(all_velocity, all_depths, edgecolors = 'k')

plt.xticks(fontsize = 8)
plt.yticks(fontsize = 8)
frame_tick()
plt.xlabel(r'Laser Velocity ($mm/s$)')
plt.ylabel(r'Melt Pool Depths ($\mu m$)')
plt.title('Entire Dataset')

In [None]:
# Plot power vs velocity, color by melt pool depths
# Note this is misleading, as it takes into account different processes

fig = plt.figure(figsize = [4,3], dpi = dpi)
plt.scatter(all_power, all_velocity, c = all_depths, cmap = 'jet', edgecolors='k', vmin = 0, vmax = 500)
plt.xticks(fontsize = 8)
plt.yticks(fontsize = 8)
frame_tick()
plt.xlabel(r'Power ($W$)')
plt.ylabel(r'Velocity ($mm/s$)')
cbar =plt.colorbar()
cbar.ax.set_ylabel(r'Melt Depth ($\mu m$)', rotation=270, labelpad=10)
plt.title('Overall Dataset Distribution')




In [None]:

fig = plt.figure(figsize = [4,3], dpi = dpi)
plt.hist(all_power, edgecolor = 'k', bins = 50)
plt.xticks(fontsize = 8)
plt.yticks(fontsize = 8)
frame_tick()
plt.xlabel(r'Power ($W$)')
plt.ylabel(r'Occurence')


In [None]:
fig = plt.figure(figsize = [4,3], dpi = dpi)
plt.hist(all_velocity, edgecolor = 'k', bins = 30)
plt.xticks(fontsize = 8)
plt.yticks(fontsize = 8)
frame_tick()
plt.xlabel(r'Velocity ($mm/s$)')
plt.ylabel(r'Occurence')

In [None]:
# Examine features present in the dataset

unique, counts = np.unique(csv['Material'], return_counts = True)
pprint(set(zip(unique, counts)))
print("Features: ")
for col in csv.columns:
    print(col)

In [None]:
fig = plt.figure(dpi = 300)
materials = csv['Material'].value_counts()
dir(materials)
materials.keys()
#breakpoint()
y_pos = np.arange(len(materials.keys()))
y_pos_str = np.array([key for key  in materials.keys()], dtype = 'object')
print(y_pos)
#plt.bar(y_pos, materials)
plt.xticks(y_pos, y_pos_str, fontsize = 7)
plt.xlim(-0.5, 5.5)
plt.bar(y_pos, materials, edgecolor = 'k')

plt.ylabel("Occurence")
plt.xlabel("Material")
frame_tick()


In [None]:
materials = csv['Material'].value_counts()
materials.keys()
#len(materials.keys())

In [None]:
for i,j in zip(y_pos, materials.keys()):
    print(i, j)

In [None]:
# Examine data categories

print('Process')
unique, counts = np.unique(csv['Process'], return_counts = True)
pprint(set(zip(unique, counts)))
print('Material')

unique, counts = np.unique(csv['Material'], return_counts = True)
pprint(set(zip(unique, counts)))
print('paper ID')

unique, counts = np.unique(csv['paper ID'], return_counts = True)
pprint(set(zip(unique, counts)))

shape = np.array(csv['meltpool shape'], dtype = 'str')

unique, counts = np.unique(shape, return_counts = True)
pprint(set(zip(unique, counts)))


In [None]:
fig = plt.figure(dpi = 300)
defects = csv['meltpool shape'].value_counts()
dir(materials)
materials.keys()
defects_pos = np.arange(len(defects.keys()))
plt.bar(defects_pos, defects, edgecolor = 'k')
plt.xticks(defects_pos, defects.keys(), fontsize=8)
plt.ylabel("Occurence")
plt.xlabel("Melt pool shape")
frame_tick()


In [None]:
# Examine distribution of features
for col in csv.columns:
    values = csv[col]
    num_present = len([value for value in values if not pd.isnull(value)])
    total = len(values)
    parameter = col
    print('{:.1f} % samples has '.format(100*num_present/total) + parameter + ' value')

In [None]:
atomic_features = []
for col in csv.columns:
   # print('%' in col)
    if '%' in col:
        atomic_features.append(col)
atomic_features

In [None]:
materials_ohe = [np.zeros(len(csv)) for key in materials.keys()]
len(materials_ohe)
materials_ohe = np.array(materials_ohe).T
print(len(materials_ohe))
material_pos = np.arange(len(materials.keys()))
for idx in range(len(csv)):
    for j_idx in range(len(material_pos)):
        if csv['Material'].iloc[idx] == materials.keys()[j_idx]:
            materials_ohe[idx,j_idx] = 1 
print(np.argmax(materials_ohe, axis = 1))


In [None]:
csv.keys()

# Machine Learning

## Helper functions for selecting data

In [None]:
from utils import extract_features

In [None]:
# select subset of data containing material, process, etc
# select_subset generalizes this for any feature
def select_material(csv, material):
    material_csv = csv.loc[csv['Material'] == material]

    return material_csv

def select_process(csv, process):
    process_csv = csv.loc[csv['Process'] == process]
    return process_csv

def select_subset(csv, feature, value):
    select_csv = csv.loc[csv[feature] == value]
    return select_csv

In [None]:
def select_parameters(new_csv, parameter_list, label_col):
    '''
    Select data with valid values for each parameter in parameter_list, and
    select the feature in label_col as the prediction feature
    
    Arguments:
    new_csv: data csv to be processed (Pandas DataFrame)
    parameter_list: list of strings corresponding to feature names
    label_col: string corresponding to feature name
    
    Returns:
    X: n x m numpy array of data features with n samples and m features
    y: n x 1 numpy array of data labels
    '''
    data_list = []
    label_list = []
    for i in range(len(new_csv)):
        success, features, label = extract_features(new_csv.iloc[i], parameter_list, label_col)
        
        if success < 0:
            continue
        else:
            data_list.append(features)
            label_list.append(label)
    X = np.array(np.squeeze(data_list))  
    
    y = np.array(label_list)
    
    return(X, y)

In [None]:

new_csv = select_subset(class_csv, 'Sub-process','SLM')
parameter_list = ['Power', 'Velocity', 'layer thickness']
label_col = 'meltpool shape'
X, y = select_parameters(new_csv, parameter_list, label_col)
