In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
import numpy as np

# Load the data
data = pd.read_csv('aluminum_wire_rod_synthetic_10000.csv')

# Prepare the features (X) and target variable (y)
X = data[['Casting_Temperature_C', 'Rolling_Speed_m_min', 'Cooling_Rate_C_s', 'Elongation_%', 'Conductivity_%_IACS']]
y = data['UTS_MPa']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create and train the linear regression model
model = LinearRegression()
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)

# Calculate metrics
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

# Print the metrics
print(f"Mean Squared Error: {mse:.4f}")
print(f"Root Mean Squared Error: {rmse:.4f}")
print(f"Mean Absolute Error: {mae:.4f}")
print(f"R-squared Score: {r2:.4f}")

# Print feature coefficients
for feature, coef in zip(X.columns, model.coef_):
    print(f"{feature}: {coef:.4f}")

print(f"Intercept: {model.intercept_:.4f}")

Mean Squared Error: 24.9817
Root Mean Squared Error: 4.9982
Mean Absolute Error: 4.0207
R-squared Score: 0.7683
Casting_Temperature_C: 0.4964
Rolling_Speed_m_min: -0.3012
Cooling_Rate_C_s: 0.0176
Elongation_%: 0.0521
Conductivity_%_IACS: 0.0209
Intercept: 196.8656


In [5]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.multioutput import MultiOutputRegressor
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error

# Load the data
data = pd.read_csv('aluminum_wire_rod_synthetic_10000.csv')

# Prepare the features (X) and target variables (y)
X = data[['Casting_Temperature_C', 'Rolling_Speed_m_min', 'Cooling_Rate_C_s']]
y = data[['UTS_MPa', 'Elongation_%', 'Conductivity_%_IACS']]

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create and train the multi-output regression model
model = MultiOutputRegressor(LinearRegression())
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)

# Calculate metrics for each output
for i, col in enumerate(y.columns):
    mse = mean_squared_error(y_test.iloc[:, i], y_pred[:, i])
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(y_test.iloc[:, i], y_pred[:, i])
    r2 = r2_score(y_test.iloc[:, i], y_pred[:, i])
    
    print(f"\nMetrics for {col}:")
    print(f"Mean Squared Error: {mse:.4f}")
    print(f"Root Mean Squared Error: {rmse:.4f}")
    print(f"Mean Absolute Error: {mae:.4f}")
    print(f"R-squared Score: {r2:.4f}")

# Print feature coefficients for each output
for i, col in enumerate(y.columns):
    print(f"\nCoefficients for {col}:")
    for feature, coef in zip(X.columns, model.estimators_[i].coef_):
        print(f"{feature}: {coef:.4f}")
    print(f"Intercept: {model.estimators_[i].intercept_:.4f}")


Metrics for UTS_MPa:
Mean Squared Error: 24.9853
Root Mean Squared Error: 4.9985
Mean Absolute Error: 4.0213
R-squared Score: 0.7683

Metrics for Elongation_%:
Mean Squared Error: 1.0399
Root Mean Squared Error: 1.0198
Mean Absolute Error: 0.8157
R-squared Score: 0.6712

Metrics for Conductivity_%_IACS:
Mean Squared Error: 0.2467
Root Mean Squared Error: 0.4967
Mean Absolute Error: 0.3908
R-squared Score: 0.1262

Coefficients for UTS_MPa:
Casting_Temperature_C: 0.5016
Rolling_Speed_m_min: -0.3013
Cooling_Rate_C_s: 0.0155
Intercept: 198.5263

Coefficients for Elongation_%:
Casting_Temperature_C: 0.0999
Rolling_Speed_m_min: 0.0009
Cooling_Rate_C_s: -0.0490
Intercept: 9.8972

Coefficients for Conductivity_%_IACS:
Casting_Temperature_C: 0.0000
Rolling_Speed_m_min: -0.0097
Cooling_Rate_C_s: 0.0215
Intercept: 54.8915


In [6]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.multioutput import MultiOutputRegressor
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
import xgboost as xgb

# Load the data
data = pd.read_csv('aluminum_wire_rod_synthetic_10000.csv')

# Prepare the features (X) and target variables (y)
X = data[['Casting_Temperature_C', 'Rolling_Speed_m_min', 'Cooling_Rate_C_s']]
y = data[['UTS_MPa', 'Elongation_%', 'Conductivity_%_IACS']]

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create and train the XGBoost multi-output regression model
xgb_model = xgb.XGBRegressor(n_estimators=100, random_state=42)
model = MultiOutputRegressor(xgb_model)
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)

# Calculate metrics for each output
for i, col in enumerate(y.columns):
    mse = mean_squared_error(y_test.iloc[:, i], y_pred[:, i])
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(y_test.iloc[:, i], y_pred[:, i])
    r2 = r2_score(y_test.iloc[:, i], y_pred[:, i])
    
    print(f"\nMetrics for {col}:")
    print(f"Mean Squared Error: {mse:.4f}")
    print(f"Root Mean Squared Error: {rmse:.4f}")
    print(f"Mean Absolute Error: {mae:.4f}")
    print(f"R-squared Score: {r2:.4f}")

# Print feature importances for each output
for i, col in enumerate(y.columns):
    print(f"\nFeature importances for {col}:")
    importances = model.estimators_[i].feature_importances_
    for feature, importance in zip(X.columns, importances):
        print(f"{feature}: {importance:.4f}")


Metrics for UTS_MPa:
Mean Squared Error: 28.5735
Root Mean Squared Error: 5.3454
Mean Absolute Error: 4.2861
R-squared Score: 0.7350

Metrics for Elongation_%:
Mean Squared Error: 1.1890
Root Mean Squared Error: 1.0904
Mean Absolute Error: 0.8674
R-squared Score: 0.6241

Metrics for Conductivity_%_IACS:
Mean Squared Error: 0.2815
Root Mean Squared Error: 0.5306
Mean Absolute Error: 0.4176
R-squared Score: 0.0029

Feature importances for UTS_MPa:
Casting_Temperature_C: 0.6049
Rolling_Speed_m_min: 0.3445
Cooling_Rate_C_s: 0.0506

Feature importances for Elongation_%:
Casting_Temperature_C: 0.8425
Rolling_Speed_m_min: 0.0699
Cooling_Rate_C_s: 0.0876

Feature importances for Conductivity_%_IACS:
Casting_Temperature_C: 0.2421
Rolling_Speed_m_min: 0.4294
Cooling_Rate_C_s: 0.3285


In [7]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.multioutput import MultiOutputRegressor
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import RFE
import xgboost as xgb

# Load and prepare the data
data = pd.read_csv('aluminum_wire_rod_synthetic_10000.csv')
X = data[['Casting_Temperature_C', 'Rolling_Speed_m_min', 'Cooling_Rate_C_s']]
y = data[['UTS_MPa', 'Elongation_%', 'Conductivity_%_IACS']]

# Feature engineering
X['Temp_Speed_Interaction'] = X['Casting_Temperature_C'] * X['Rolling_Speed_m_min']
X['Temp_Cooling_Interaction'] = X['Casting_Temperature_C'] * X['Cooling_Rate_C_s']

# Normalize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Define XGBoost model
xgb_model = xgb.XGBRegressor(random_state=42)

# Perform GridSearchCV for hyperparameter tuning
param_grid = {
    'n_estimators': [100, 200],
    'max_depth': [3, 5, 7],
    'learning_rate': [0.01, 0.1],
    'subsample': [0.8, 1.0],
    'colsample_bytree': [0.8, 1.0]
}

grid_search = GridSearchCV(estimator=xgb_model, param_grid=param_grid, cv=5, n_jobs=-1, verbose=2)
multi_output_model = MultiOutputRegressor(grid_search)

# Fit the model
multi_output_model.fit(X_train, y_train)

# Make predictions
y_pred = multi_output_model.predict(X_test)

# Calculate and print metrics
for i, col in enumerate(y.columns):
    mse = mean_squared_error(y_test.iloc[:, i], y_pred[:, i])
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(y_test.iloc[:, i], y_pred[:, i])
    r2 = r2_score(y_test.iloc[:, i], y_pred[:, i])
    
    print(f"\nMetrics for {col}:")
    print(f"Mean Squared Error: {mse:.4f}")
    print(f"Root Mean Squared Error: {rmse:.4f}")
    print(f"Mean Absolute Error: {mae:.4f}")
    print(f"R-squared Score: {r2:.4f}")

# Print best parameters
for i, col in enumerate(y.columns):
    print(f"\nBest parameters for {col}:")
    print(multi_output_model.estimators_[i].best_params_)

# Feature importance
feature_names = X.columns
for i, col in enumerate(y.columns):
    print(f"\nFeature importances for {col}:")
    importances = multi_output_model.estimators_[i].best_estimator_.feature_importances_
    for feature, importance in zip(feature_names, importances):
        print(f"{feature}: {importance:.4f}")

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X['Temp_Speed_Interaction'] = X['Casting_Temperature_C'] * X['Rolling_Speed_m_min']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X['Temp_Cooling_Interaction'] = X['Casting_Temperature_C'] * X['Cooling_Rate_C_s']


Fitting 5 folds for each of 48 candidates, totalling 240 fits
Fitting 5 folds for each of 48 candidates, totalling 240 fits
Fitting 5 folds for each of 48 candidates, totalling 240 fits

Metrics for UTS_MPa:
Mean Squared Error: 25.3511
Root Mean Squared Error: 5.0350
Mean Absolute Error: 4.0496
R-squared Score: 0.7649

Metrics for Elongation_%:
Mean Squared Error: 1.0500
Root Mean Squared Error: 1.0247
Mean Absolute Error: 0.8180
R-squared Score: 0.6680

Metrics for Conductivity_%_IACS:
Mean Squared Error: 0.2477
Root Mean Squared Error: 0.4977
Mean Absolute Error: 0.3917
R-squared Score: 0.1225

Best parameters for UTS_MPa:
{'colsample_bytree': 1.0, 'learning_rate': 0.1, 'max_depth': 3, 'n_estimators': 100, 'subsample': 0.8}

Best parameters for Elongation_%:
{'colsample_bytree': 1.0, 'learning_rate': 0.1, 'max_depth': 3, 'n_estimators': 100, 'subsample': 0.8}

Best parameters for Conductivity_%_IACS:
{'colsample_bytree': 0.8, 'learning_rate': 0.01, 'max_depth': 3, 'n_estimators': 200

In [None]:
import joblib

# Assuming 'multi_output_model' is your trained model
# and 'scaler' is your StandardScaler object

# Save the model
joblib.dump(multi_output_model, 'aluminum_wire_model.joblib')

# Save the scaler
joblib.dump(scaler, 'scaler.joblib')

print("Model and scaler saved successfully.")