In [10]:
# Random Forest
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.metrics import r2_score, mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors

plt.rcParams['font.family'] = 'Times New Roman'
np.set_printoptions(suppress=True,linewidth=np.inf)
import warnings
warnings.filterwarnings('ignore')

# 读取数据
data = pd.read_csv('../Loading_Results.csv') # 修改路径为你的实际路径
input_features = [f'LP{i}-{d}' for i in range(1, 5) for d in ['Fx', 'Fy', 'Fz', 'Mx', 'My', 'Mz']]
output_feature = 'Knuckle_S'

X = data[input_features].values
y = data[[output_feature]].values.reshape(-1, 1)

target = 0.99 # 假设你得到这个随机数（约值）
tolerance = 0.34

# ✅ 遍历不同的随机种子寻找最佳 random_state（改写核心）
for seed in np.arange(1): 
 
    # 非线性特征扩展
    DegreePoly=2
    DegreeSin=1
    DegreeCos=1
    def NL(x, DegreePoly, DegreeSin, DegreeCos):
        poly = PolynomialFeatures(degree=DegreePoly, include_bias=False)
        x_poly = poly.fit_transform(x) if DegreePoly > 0 else x
        x_sin = np.hstack([np.sin((i + 1)/3412 * x) for i in range(DegreeSin)]) if DegreeSin > 0 else None      #二阶3412   一阶1932
        x_cos = np.hstack([np.cos((i + 1)/3412 * x) for i in range(DegreeCos)]) if DegreeCos > 0 else None
        x_final = x_poly
        if x_sin is not None:
            x_final = np.hstack([x_final, x_sin])
        if x_cos is not None:
            x_final = np.hstack([x_final, x_cos])
        return x_final

    # 特征构造与标准化
    X_feat = NL(X, DegreePoly, DegreeSin, DegreeCos)
    X_scaled = X_feat
    #scaler = StandardScaler()
    #X_scaled = scaler.fit_transform(X_feat) #树模型的算法通常是切片信息增益，因此可以不用标准化处理
    #sample_num=X.shape[0]
    #X_scaled1=np.hstack([np.ones((X.shape[0],1)),X_scaled]) 
 
    # 划分训练与测试集 
    X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.15, random_state=41339) #二阶=6572636 41339  一阶6572936

    for seed2 in np.arange(1):
        # 构建 Random Forest 模型
        RF = RandomForestRegressor(
            criterion= 'squared_error', #criterion{“squared_error”, “absolute_error”, “friedman_mse”, “poisson”},
            n_estimators=280,           # Number of trees in the forest
            max_depth=10,               # 最大深度；7
            min_samples_split=2,        # 分裂所需的最小样本数
            min_samples_leaf=1,         # 叶子节点所需的最小样本数
            max_features='sqrt',        # 每棵树使用的随机特征比例 ，始终无放回抽样
            max_leaf_nodes=99999,       # 最大叶子结点数
            random_state= 13171,        # 随机种子；二阶13171 # 一阶13177 
            max_samples= None,          # 每棵树使用的随机样本比例, boostrap开启则有放回            #一阶none
            bootstrap= False,           # 控制max_samples,有放回抽样
            oob_score= False,           # 袋外估计，开启过后，计算每棵树没抽中的样本预测值，结合全体树来计算R2
            ccp_alpha =0 ,              # 减枝惩罚项，αT，T是叶子节点数                             #一阶0
            n_jobs=-1,                  #计算核心
            verbose=0
        )
        from scipy.stats import weibull_min
        stress_train = y_train[:, 0]
        k = 10  # Weibull 分布形状参数13.299999999999988
        λ = np.percentile(stress_train, 95)  # 95% 分位数作为尺度参数
        weibull_probs = weibull_min.cdf(stress_train, c=k, scale=λ)
        w_stress = (1 + 10 * weibull_probs).reshape(-1, 1)  # 可调系数10体现风险敏感程度

        RF.fit(X_train, y_train.ravel(),sample_weight=w_stress.ravel())

        # 预测与评估
        y_pred_test = RF.predict(X_test)
        r2 = r2_score(y_test, y_pred_test)
        mse = mean_squared_error(y_test, y_pred_test)
        if abs(r2 - target) < tolerance:
            print(f"(3)Seed1 found: {seed}------Seed2 found: {seed2}------  {r2}")
print(y_pred_test.shape)
print(y_test.shape)
#print(RF.oob_score_)

(3)Seed1 found: 0------Seed2 found: 0------  0.7363843393723125
(18,)
(18, 1)


In [16]:
from sklearn.metrics import mean_squared_error, mean_absolute_error
import numpy as np

# ✅ MSE（均方误差）
mse = mean_squared_error(y_test, RF.predict(X_test))

# ✅ RMSE（均方根误差）
rmse = np.sqrt(mse)

# ✅ MAE（平均绝对误差）
mae = mean_absolute_error(y_test, RF.predict(X_test))

# ✅ MAPE（平均绝对百分比误差）
# 为避免除以0，加入一个较小的 epsilon
epsilon = 1e-8
mape = np.mean(np.abs((y_test - RF.predict(X_test)) / (y_test + epsilon))) * 100

# ✅ RAE（相对绝对误差）
rae = np.sum(np.abs(y_test - RF.predict(X_test))) / np.sum(np.abs(y_test - np.mean(y_test)))

# ✅ R²（决定系数）
from sklearn.metrics import r2_score
r2_bgd = r2_score(y_test, RF.predict(X_test))

# ✅ 输出全部指标
print(f"R²       : {r2_bgd:.5f}")
print(f"MSE      : {mse:.5f}")
print(f"RMSE     : {rmse:.5f}")
print(f"MAE      : {mae:.5f}")
print(f"MAPE (%) : {mape:.2f}%")
print(f"RAE      : {rae:.5f}")


R²       : 0.73638
MSE      : 34.13711
RMSE     : 5.84270
MAE      : 5.11243
MAPE (%) : 4.51%
RAE      : 19.83150
