# Applying Machine Learning for Ball Milling Parameters

In [67]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
import seaborn as sns

In [68]:
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import r2_score

def linear_regression(particle_data, features, target):
    '''Function to apply linear regression to test between run_times as features and other characteristics as targets'''

    result_list = []
    X = particle_data[target]

    # Loop through each characteristic feature (dependent variables)
    for feature in features:
        # Check if the column is entirely NaN and skip if true
        if particle_data[feature].isna().all():
            continue

        y = particle_data[[feature]]

        # Split data
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
        
        scaler = MinMaxScaler()
        scaler.fit(X_train)
        X_train = scaler.transform(X_train)
        X_test = scaler.transform(X_test)

        # Linear regression model
        model = LinearRegression()
        model.fit(X_train, y_train)

        y_pred = model.predict(X_test)

        # Calculate R² score
        r2 = r2_score(y_test, y_pred)

        # Compute the average of the characteristic
        avg_value = particle_data[feature].mean()
        std_value = particle_data[feature].std()

        # Append the result to the result list
        result_list.append({'Feature': feature, 'Average': avg_value, 'Standard Deviation':std_value, 'R2': r2})

    # Convert the result list into a DataFrame
    result_table = pd.DataFrame(result_list)

    return result_table

In [69]:
# import data
particle_data = pd.read_csv('/Users/sarah/Documents/areospace research/material recycling project/Ti64_Composition_Tools/Ti_Powder_All_Lots/Powder Results Summary/Merged_Element_Composition_Tables.csv')
#particle_data = particle_data.loc[(particle_data['Feret'] >= 40) & (particle_data['Feret'] <= 100)]
particle_data =particle_data[['Lot', 'GroupID', 'Rev', 'Aluminum (wt%)', 'Carbon (wt%)',
       'Chromium (wt%)', 'Hydrogen (wt%)', 'Iron (wt%)', 'Nickel (wt%)',
       'Nitrogen (wt%)', 'Oxygen (wt%)', 'Silicon (wt%)', 'Titanium (wt%)',
       'Vanadium (wt%)', 'Yttrium (wt%)']]
#particle_data = particle_data.loc[(particle_data['Rev'] >= 3)]
particle_data

Unnamed: 0,Lot,GroupID,Rev,Aluminum (wt%),Carbon (wt%),Chromium (wt%),Hydrogen (wt%),Iron (wt%),Nickel (wt%),Nitrogen (wt%),Oxygen (wt%),Silicon (wt%),Titanium (wt%),Vanadium (wt%),Yttrium (wt%)
0,P1442,2,5,6.41,0.007,0.012,0.0018,0.19,0.010,0.010,0.158,0.010,89.2607,3.95,0.0005
1,P1442,3,3,6.51,0.010,0.014,0.0015,0.20,0.011,0.008,0.148,0.014,89.1380,3.97,0.0005
2,P1442,4,3,6.44,0.010,0.015,0.0017,0.24,0.011,0.005,0.142,0.013,89.2158,3.93,0.0005
3,P1442,5,1,6.50,0.011,0.013,0.0013,0.17,0.010,0.011,0.149,0.013,89.1142,4.03,0.0005
4,P1442,6,1,6.48,0.008,0.013,0.0012,0.19,0.010,0.014,0.162,0.013,89.0613,4.07,0.0005
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
137,P1440,5,2,6.45,0.009,0.013,0.0011,0.20,0.011,0.019,0.158,0.013,89.2294,3.92,0.0005
138,P1440,3,5,6.37,0.007,0.011,0.0020,0.19,0.010,0.005,0.174,0.011,89.3505,3.89,0.0005
139,P1440,3,4,6.42,0.008,0.013,0.0019,3.91,0.010,0.012,0.188,0.010,89.2666,0.18,0.0005
140,P1440,5,3,6.36,0.008,0.013,0.0013,0.17,0.010,0.011,0.169,0.010,89.3272,3.94,0.0005


In [70]:
particle_data['Lot'].unique()

array(['P1442', 'P1444', 'P222', 'P2002', 'P1440', 'P2328', 'P0525'],
      dtype=object)

In [71]:
# Define run times as features (independent variables)
run_times = ['Rev']

# Define target features (dependent variables)
features = ['Oxygen (wt%)','Vanadium (wt%)','Iron (wt%)', 'Aluminum (wt%)']

#particle_data = particle_data.loc[(particle_data['Lot']=='P1442')]

# Call the function and pass the dataset
r2_table = linear_regression(particle_data, features, run_times)

# Print the resulting table with average values and R² scores
r2_table

Unnamed: 0,Feature,Average,Standard Deviation,R2
0,Oxygen (wt%),0.149083,0.034829,0.01861
1,Vanadium (wt%),3.798345,0.827783,0.050344
2,Iron (wt%),0.376338,0.817778,0.061802
3,Aluminum (wt%),6.405211,0.063419,-0.06264


In [72]:
df = pd.read_excel('/Users/sarah/Documents/areospace research/material recycling project/PyImageJ-Particle-Analysis/ALM Ortho Powders/ALM Ortho Powder Data Prepared for Professor Kordijazi.xlsx')
df

Unnamed: 0,Lot,GroupID,Reuse Cycles,Area,X,Y,Perim.,BX,BY,Width,...,MinFeret,AR,Round,Solidity,Aluminum (wt%),Iron (wt%),Oxygen (wt%),Vanadium (wt%),Unnamed: 29,Unnamed: 30
0,P2002,6.0,1.0,2741.358617,612.083774,462.138346,201.783144,582.758505,432.672616,58.696598,...,57.084611,1.054284,0.953426,0.961663,6.43,0.2,0.153,3.97,,
1,P2328,2.0,2.0,3483.014884,623.213855,457.64302,228.862552,590.063396,424.432745,66.282249,...,63.684635,1.098306,0.934873,0.960718,6.44,0.23,0.111,4.01,,
2,P1444,2.0,8.0,1106.34,716.5886,430.54194,137.7801,696.9,411.4,39.74,...,35.82642,1.55576,0.8638,0.9238,6.42,0.2,0.152,4.14,,
3,P1444,1.0,10.0,2616.907526,620.511046,471.900587,195.344651,591.995217,443.235013,57.081314,...,55.659853,1.051948,0.95594,0.963416,6.42,0.22,0.184,4.14,,
4,P1444,1.0,8.0,2850.982368,615.476783,463.960029,203.790224,585.727109,434.076337,59.531958,...,58.213274,1.043691,0.961093,0.965017,6.42,0.195,0.199667,4.01,,
5,P2002,3.0,1.0,2969.685697,621.823334,462.726148,209.68123,591.354205,432.053407,60.919583,...,59.350943,1.066824,0.954335,0.963608,6.41,0.19,0.116,3.95,,
6,P2002,11.0,1.0,3091.652126,626.123556,451.828864,214.029604,594.935305,420.595933,62.381516,...,59.996035,1.074866,0.938501,0.961358,6.43,0.19,0.157,3.92,,s
7,P1442,1.0,8.0,2904.701955,625.550119,474.865208,206.68465,595.462576,444.576534,60.215442,...,59.08664,1.040637,0.964261,0.965607,6.43,0.18,0.2315,4.05,,
8,P2001,1.0,1.0,3240.073354,640.030793,460.667568,220.45638,608.066458,428.624451,63.99185,...,61.478033,1.088094,0.937236,0.963088,0.0,0.0,0.0,0.0,,
9,P1440,2.0,7.0,2832.380692,613.834266,464.889542,203.391612,584.167147,435.061095,59.34121,...,58.014638,1.047195,0.958503,0.96458,6.34,0.17,0.209667,3.98,,
