In [None]:
import pandas as pd
import numpy as np
from itertools import product
from sklearn.model_selection import train_test_split
from sklearn.ensemble import AdaBoostRegressor, RandomForestRegressor, GradientBoostingRegressor
from sklearn.linear_model import LinearRegression, ElasticNet, Ridge
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.preprocessing import PolynomialFeatures, PowerTransformer
from sklearn.metrics import r2_score, mean_squared_error
from sklearn.feature_selection import SelectFromModel
import matplotlib.pyplot as plt
import joblib

# Data loading and preprocessing
data_path = 'sum.csv'
data = pd.read_csv(data_path)

pt = PowerTransformer(method='yeo-johnson', standardize=True)
features = data.drop('UTS', axis=1)
transformed_features = pt.fit_transform(features)
transformed_df = pd.DataFrame(transformed_features, columns=features.columns)

transformed_df['UTS'] = data['UTS']

X = transformed_df.drop('UTS', axis=1)
y = transformed_df['UTS']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_1_train, X_2_test, y_1_train, y_2_test = train_test_split(X_test, y_test, test_size=0.5, random_state=42)

# Feature engineering
poly = PolynomialFeatures(2)
X_train_poly = poly.fit_transform(X_train)
X_1_train_poly = poly.transform(X_1_train)
X_2_test_poly = poly.transform(X_2_test)

ada = AdaBoostRegressor(random_state=42)
selector = SelectFromModel(ada, threshold='median')
X_train_selected = selector.fit_transform(X_train_poly, y_train)
X_1_train_selected = selector.transform(X_1_train_poly)
X_2_test_selected = selector.transform(X_2_test_poly)

# Model training and ensemble
models = [
    RandomForestRegressor(n_estimators=100, random_state=42),
    GradientBoostingRegressor(n_estimators=100, random_state=42),
    LinearRegression(),
    SVR(),
    KNeighborsRegressor(),
    DecisionTreeRegressor(random_state=42),
    ElasticNet(random_state=42),
    Ridge(random_state=42)
]

ensemble_train_predictions = []
ensemble_1_train_predictions = []
ensemble_2_test_predictions = []
for model in models:
    model.fit(X_train_selected, y_train)
    ensemble_train_predictions.append(model.predict(X_train_selected))
    ensemble_1_train_predictions.append(model.predict(X_1_train_selected))
    ensemble_2_test_predictions.append(model.predict(X_2_test_selected))

ensemble_1_train_predictions = np.column_stack(ensemble_1_train_predictions)
ensemble_2_test_predictions = np.column_stack(ensemble_2_test_predictions)

final_model = AdaBoostRegressor(n_estimators=100, random_state=42)
final_model.fit(ensemble_1_train_predictions, y_1_train)
final_predictions = final_model.predict(ensemble_1_train_predictions)

final_r2_score = r2_score(y_1_train, final_predictions)
final_mse = mean_squared_error(y_1_train, final_predictions)

print(f"Final R² score: {final_r2_score}")
print(f"Final MSE: {final_mse}")

# Define the path
save_path = r'D:\\XiaoHao_Research\\Machine_Learning_Heat_Resistant_Aluminum_Alloy\\Paper\\Machine_Learning_Figures\\Fitting_Plots\\Model_Usage\\'

# Plot fitting graph
plt.figure(figsize=(15, 10))
plt.scatter(y_1_train, final_predictions, color='blue')
plt.title("UTS", fontsize=36, fontname="Times New Roman", fontweight='bold')
plt.xlabel("True", fontsize=36, fontname="Times New Roman", fontweight='bold')
plt.ylabel("Predict", fontsize=36, fontname="Times New Roman", fontweight='bold')
plt.xticks(fontsize=36, fontname="Times New Roman", fontweight='bold')
plt.yticks(fontsize=36, fontname="Times New Roman", fontweight='bold')
plt.gca().spines['top'].set_linewidth(2)
plt.gca().spines['bottom'].set_linewidth(2)
plt.gca().spines['left'].set_linewidth(2)
plt.gca().spines['right'].set_linewidth(2)

# Plot the red dashed line where true value = predicted value
plt.plot([y_1_train.min(), y_1_train.max()], [y_1_train.min(), y_1_train.max()], 'r--', lw=2)

# Display R² and MSE in the top-left of the plot
plt.text(0.05, 0.95, f'R² = {final_r2_score:.2f}', fontsize=36,
         color='black', fontname="Times New Roman", fontweight='bold', transform=plt.gca().transAxes,
         verticalalignment='top')
plt.text(0.05, 0.9, f'MSE = {final_mse:.2f}', fontsize=36,
         color='black', fontname="Times New Roman", fontweight='bold', transform=plt.gca().transAxes,
         verticalalignment='top')

# Add a note below the MSE explaining the red dashed line
plt.text(0.05, 0.85, '-----', fontsize=36,
         color='red', fontname="Times New Roman", fontweight='bold', transform=plt.gca().transAxes,
         verticalalignment='top')
plt.text(0.12, 0.85, ': True value = Predict value', fontsize=36,
         color='black', fontname="Times New Roman", fontweight='bold', transform=plt.gca().transAxes,
         verticalalignment='top')

# Save the image to the specified path
plt.savefig(f'{save_path}UTS.png', dpi=300, bbox_inches='tight')
plt.close()  # Close the plot to free memory
