In [1]:
from math import sqrt
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.multioutput import MultiOutputRegressor
from sklearn.metrics import mean_squared_error, make_scorer
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import MinMaxScaler

# Load the data
data = pd.read_excel("C:/Users/orkun/OneDrive/Masaüstü/Ens 492/Code/Last versions/Simdatawithoutspindlevar.xlsx")
data = data.sample(n=35000, random_state=42)# Sampling random 35000 simulations for increasing the speed of ML training process 



# Input and target creation
X = data.drop(columns=['Spindle_Speed','Feed_Rate','Axial_Depth','Radial_Depth'])  # Setting F values as input
y = data[['Feed_Rate','Axial_Depth','Radial_Depth']]  # Setting parameters as outputs

# Normalization of the data
scaler_X = MinMaxScaler()
scaler_y = MinMaxScaler()

X_normalized = scaler_X.fit_transform(X)
y_normalized = scaler_y.fit_transform(y)

# Splitting data for train, test (80%, 20%)
X_train, X_test, y_train, y_test = train_test_split(X_normalized, y_normalized, test_size=0.2, random_state=42)

print(X.shape)
print(y.shape)
print(X_train.shape)
print(X_test.shape)


(35000, 6)
(35000, 3)
(28000, 6)
(7000, 6)


In [2]:
# Gradient Boosting Regressor and GridSearch parameters
base_model = GradientBoostingRegressor()
model = MultiOutputRegressor(base_model)
param_grid = {
    'estimator__n_estimators': [10 ,50,75],
    'estimator__max_features': [ 0.25, 0.5, 1.0],
    'estimator__max_depth': [20, 50, ]
}

# GridSearchCV to find the best hyperparameters
grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=3, verbose=2, return_train_score=True)
grid_search.fit(X_train, y_train)

# Predictions and RMSE for each hyperparameter combination
results = grid_search.cv_results_
mean_test_scores = results['mean_test_score']
params = results['params']

for mean_score, param in zip(mean_test_scores, params):
    # Train the model with the given parameters
    model.set_params(**param)
    model.fit(X_train, y_train)

    # Predictions for the training set
    y_pred_train = model.predict(X_train)

    # Calculate normalized RMSE
    train_mse_normalized = mean_squared_error(y_train, y_pred_train, multioutput='raw_values')
    train_rmse_normalized = np.sqrt(train_mse_normalized)

    # Inverse transform to original scale
    y_pred_train_original = scaler_y.inverse_transform(y_pred_train)
    y_train_original = scaler_y.inverse_transform(y_train)

    # Calculate RMSE on original scale
    train_mse_original = mean_squared_error(y_train_original, y_pred_train_original, multioutput='raw_values')
    train_rmse_original = np.sqrt(train_mse_original)

    print(f"Parameters: {param}")
    print(f"Normalized Training set RMSE (Spindle_Speed, Feed_Rate, Axial_Depth, Radial_Depth): {train_rmse_normalized}")
    print(f"Original Training set RMSE (Spindle_Speed, Feed_Rate, Axial_Depth, Radial_Depth): {train_rmse_original}")
    print('-' * 80)

# Examine the best parameters and model
best_params = grid_search.best_params_
print("Best parameters:", best_params)


best_model = grid_search.best_estimator_


Fitting 3 folds for each of 18 candidates, totalling 54 fits
[CV] END estimator__max_depth=20, estimator__max_features=0.25, estimator__n_estimators=10; total time=   1.4s
[CV] END estimator__max_depth=20, estimator__max_features=0.25, estimator__n_estimators=10; total time=   1.3s
[CV] END estimator__max_depth=20, estimator__max_features=0.25, estimator__n_estimators=10; total time=   1.6s
[CV] END estimator__max_depth=20, estimator__max_features=0.25, estimator__n_estimators=50; total time=   7.9s
[CV] END estimator__max_depth=20, estimator__max_features=0.25, estimator__n_estimators=50; total time=   9.1s
[CV] END estimator__max_depth=20, estimator__max_features=0.25, estimator__n_estimators=50; total time=   8.2s
[CV] END estimator__max_depth=20, estimator__max_features=0.25, estimator__n_estimators=75; total time=  12.3s
[CV] END estimator__max_depth=20, estimator__max_features=0.25, estimator__n_estimators=75; total time=  11.0s
[CV] END estimator__max_depth=20, estimator__max_fe

In [3]:
# Training set predictions
y_pred = grid_search.predict(X_test)

# Inverse transform the scaled predictions and true values
y_pred_unscaled = scaler_y.inverse_transform(y_pred)
y_test_unscaled = scaler_y.inverse_transform(y_test)

# RMSE Calculation
rmse_feed_rate = np.sqrt(mean_squared_error(y_test_unscaled[:, 0], y_pred_unscaled[:, 0]))
rmse_axial_depth = np.sqrt(mean_squared_error(y_test_unscaled[:, 1], y_pred_unscaled[:, 1]))
rmse_radial_depth = np.sqrt(mean_squared_error(y_test_unscaled[:, 2], y_pred_unscaled[:, 2]))

print(f"Best Parameters: {grid_search.best_params_}")
print(f"RMSE for Feed Rate: {rmse_feed_rate}")
print(f"RMSE for Axial Depth: {rmse_axial_depth}")
print(f"RMSE for Radial Depth: {rmse_radial_depth}")

Best Parameters: {'estimator__max_depth': 50, 'estimator__max_features': 0.5, 'estimator__n_estimators': 75}
RMSE for Feed Rate: 0.0024286724141981933
RMSE for Axial Depth: 0.13058662514965666
RMSE for Radial Depth: 0.04561199063815095


In [4]:


# Adding error margin
error_margin = 0.05
y_pred_lower = y_pred_unscaled * (1 - error_margin)
y_pred_upper = y_pred_unscaled * (1 + error_margin)

# Function to determine given values are valid or not
def is_within_error_margin(y_true, y_pred_lower, y_pred_upper):
    return np.all((y_true >= y_pred_lower) & (y_true <= y_pred_upper), axis=1)

# Accuracy rate calculation
correct_classifications = is_within_error_margin(y_test_unscaled, y_pred_lower, y_pred_upper)
accuracy = np.mean(correct_classifications) * 100

print(f"Correct classification rate(%5): {accuracy:.2f}%")


Correct classification rate(%5): 82.17%


In [5]:

#Adding error margin
error_margin = 0.1
y_pred_lower = y_pred_unscaled * (1 - error_margin)
y_pred_upper = y_pred_unscaled * (1 + error_margin)

# Function to determine given values are valid or not
def is_within_error_margin(y_true, y_pred_lower, y_pred_upper):
    return np.all((y_true >= y_pred_lower) & (y_true <= y_pred_upper), axis=1)

# Accuracy rate calculation
correct_classifications = is_within_error_margin(y_test_unscaled, y_pred_lower, y_pred_upper)
accuracy = np.mean(correct_classifications) * 100

print(f"Correct classification rate(%10): {accuracy:.2f}%")


Correct classification rate(%10): 96.13%
