# Applying Linear Regression for Reuse Powders

In [None]:
# install packages and libraries as well as initializing font to times new roman
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split
import matplotlib as mpl


mpl.rcParams['font.family'] = 'Times New Roman'
mpl.rcParams['axes.unicode_minus'] = False

In [94]:
def linear_regression(particle_data, features, targets):
    '''Function to apply linear regression to test between features and other characteristics as targets
    
    parameters: 
    particle_data (df): pandas dataframe containing particle data 
    features: feature we are using to predict target 
    tagets: target features we want to predit 
    
    return:
    result_table(df): dataframe containing linear regression model metrics (R2), slope, intercept, and standard deviation and average of targer feature values '''

    result_list = []
    # initialize features we want to test 
    X = particle_data[features]

    # iterate through dependent features 
    for target in targets:
        #  skip column if entirely empty 
        if particle_data[target].isna().all():
            continue
        
        # get target characteristic 
        y = particle_data[[target]]

        # split and train data 
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=None)
        
        # linear regression and predict 
        model = LinearRegression()
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)

        # calclate R2 value 
        r2 = r2_score(y_test, y_pred)

        # compute averages and std of data 
        avg_value = particle_data[target].mean()
        std_value = particle_data[target].std()

        # get slope and intercept for the data 
        intercept = model.intercept_[0] 
        slope = model.coef_.flatten().tolist() if len(features) > 1 else model.coef_[0]

        # append result to list
        result_list.append({
            'target': target,
            'Average': avg_value,
            'Standard Deviation': std_value,
            'R2': r2,
            'Intercept': intercept,
            'Slope': slope[0]
        })

    # convert result to dataframe 
    result_table = pd.DataFrame(result_list)

    return result_table

### Linear Regression with ALM Ortho Powders (Elemental Data)

In [99]:
# import data
alm_ortho_particle_data = pd.read_csv('/Users/sarah/Documents/Roux co-op/material recycling project/PyImageJ-Particle-Analysis/ALM Ortho Microscopy Powders/Merged_Element_Composition_Tables.csv')
alm_ortho_particle_data.columns

Index(['Lot', 'GroupID', 'Rev', 'Aluminum (wt%)', 'Carbon (wt%)',
       'Chromium (wt%)', 'Hydrogen (wt%)', 'Iron (wt%)', 'Nickel (wt%)',
       'Nitrogen (wt%)', 'Oxygen (wt%)', 'Silicon (wt%)', 'Titanium (wt%)',
       'Vanadium (wt%)', 'Yttrium (wt%)'],
      dtype='object')

In [None]:
# Cleaning ALM Ortho Data 
# slice for specific lot 
alm_ortho_particle_data_cleaned = alm_ortho_particle_data.loc[alm_ortho_particle_data['Lot']=='P1444']
alm_ortho_particle_data_cleaned = alm_ortho_particle_data_cleaned.dropna()
alm_ortho_particle_data_cleaned = alm_ortho_particle_data_cleaned[['Lot', 'GroupID', 'Rev', 'Aluminum (wt%)', 'Carbon (wt%)','Chromium (wt%)', 'Hydrogen (wt%)', 'Iron (wt%)', 'Nickel (wt%)','Nitrogen (wt%)', 'Oxygen (wt%)', 'Silicon (wt%)', 'Titanium (wt%)','Vanadium (wt%)', 'Yttrium (wt%)']]

Index(['Lot', 'GroupID', 'Rev', 'Aluminum (wt%)', 'Carbon (wt%)',
       'Chromium (wt%)', 'Hydrogen (wt%)', 'Iron (wt%)', 'Nickel (wt%)',
       'Nitrogen (wt%)', 'Oxygen (wt%)', 'Silicon (wt%)', 'Titanium (wt%)',
       'Vanadium (wt%)', 'Yttrium (wt%)'],
      dtype='object')

In [None]:
features = ['Rev']
target = ['Oxygen (wt%)', 'Vanadium (wt%)',  'Iron (wt%)', 'Aluminum (wt%)']

# call linear regression model 
r2_table = linear_regression(alm_ortho_particle_data_cleaned, features=features, targets=target)

# show resulting table 
r2_table = r2_table.round(3)
r2_table

Unnamed: 0,target,Average,Standard Deviation,R2,Intercept,Slope
0,Oxygen (wt%),0.123,0.032,0.936,0.08,0.011
1,Vanadium (wt%),4.007,0.054,-0.048,3.974,0.009
2,Iron (wt%),0.19,0.01,-0.193,0.183,0.002
3,Aluminum (wt%),6.395,0.066,-0.081,6.382,0.004


In [None]:
alm_ortho_particle_data_cleaned = alm_ortho_particle_data_cleaned.loc[(alm_ortho_particle_data_cleaned['Diameter (ABD) (µm)'] >= 40) & (alm_ortho_particle_data_cleaned['Diameter (ABD) (µm)'] <= 110)]