In [1]:
from sklearn.datasets import fetch_openml
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import train_test_split
import numpy as np
from tabpfn import TabPFNRegressor  
import pandas as pd
from pandas import read_csv
import math
import torch

In [2]:
def mape(y_true, y_pred, epsilon=1e-8):
    """
    计算平均绝对百分比误差 (MAPE)
    
    参数:
        y_true: 真实值张量
        y_pred: 预测值张量
        epsilon: 避免除以零的小值 (默认1e-8)
    
    返回:
        MAPE值 (百分比形式)
    """
    # 确保张量在相同设备上
    y_pred = y_pred.to(y_true.device)
    
    # 计算绝对百分比误差
    ape = torch.abs((y_true - y_pred) / (y_true + epsilon))
    
    # 计算平均值并转换为百分比
    return torch.mean(ape) * 100


In [3]:
def safe_mape(y_true, y_pred, epsilon=1e-8, threshold=0.01):
    """
    增强版MAPE计算，处理零值和接近零值的情况
    
    参数:
        y_true: 真实值张量
        y_pred: 预测值张量
        epsilon: 避免除以零的小值
        threshold: 忽略接近零值的阈值 (默认0.01)
    
    返回:
        MAPE值 (百分比形式)
    """
    y_true = torch.tensor(y_true)
    y_pred = torch.tensor(y_pred)
    # 确保张量在相同设备上
    y_pred = y_pred.to(y_true.device)
    
    # 创建掩码：忽略接近零值的点
    mask = torch.abs(y_true) > threshold
    
    # 如果没有有效点，返回NaN
    if mask.sum() == 0:
        return torch.tensor(float('nan'), device=y_true.device)
    
    # 计算绝对百分比误差 (仅对有效点)
    ape = torch.abs((y_true[mask] - y_pred[mask]) / (y_true[mask] + epsilon))
    
    return torch.mean(ape) * 100

In [7]:
dataset = pd.read_excel(r'D:\Data\NHRI\小论文4\41、数据集-按工况取测试集\训练集_V2.xlsx')
print(type(dataset))
dataset.head()

<class 'pandas.core.frame.DataFrame'>


Unnamed: 0,门宽,门高,门厚,门后压力,上倾,下倾,对称倾角,开度,倾角正弦值,水头,底缘单宽净动水荷载
0,5.4,5.2,0.95,4,1,0,0,0.1,0.300706,17.2,75.776301
1,5.4,5.2,0.95,4,1,0,0,0.1,0.300706,20.0,94.489334
2,5.4,5.2,0.95,4,1,0,0,0.1,0.300706,22.5,106.259588
3,5.4,5.2,0.95,4,1,0,0,0.1,0.300706,25.0,122.277321
4,5.4,5.2,0.95,4,1,0,0,0.1,0.300706,28.2,134.698419


In [8]:
values = dataset.values
values = values.astype('float32')
print(type(values))
#print(values[0:5])
values = np.array(values)
print(values[0:5])
print(values.shape)

<class 'numpy.ndarray'>
[[5.4000001e+00 5.1999998e+00 9.4999999e-01 4.0000000e+00 1.0000000e+00
  0.0000000e+00 0.0000000e+00 1.0000000e-01 3.0070579e-01 1.7200001e+01
  7.5776299e+01]
 [5.4000001e+00 5.1999998e+00 9.4999999e-01 4.0000000e+00 1.0000000e+00
  0.0000000e+00 0.0000000e+00 1.0000000e-01 3.0070579e-01 2.0000000e+01
  9.4489334e+01]
 [5.4000001e+00 5.1999998e+00 9.4999999e-01 4.0000000e+00 1.0000000e+00
  0.0000000e+00 0.0000000e+00 1.0000000e-01 3.0070579e-01 2.2500000e+01
  1.0625959e+02]
 [5.4000001e+00 5.1999998e+00 9.4999999e-01 4.0000000e+00 1.0000000e+00
  0.0000000e+00 0.0000000e+00 1.0000000e-01 3.0070579e-01 2.5000000e+01
  1.2227732e+02]
 [5.4000001e+00 5.1999998e+00 9.4999999e-01 4.0000000e+00 1.0000000e+00
  0.0000000e+00 0.0000000e+00 1.0000000e-01 3.0070579e-01 2.8200001e+01
  1.3469843e+02]]
(600, 11)


In [9]:
# 因为我的测试集已经单独摘出来放到excel表里面了，这里读的就是 训练集，不用再划分了
X_train = values[:, 0:10]
y_train = values[:, -1]
print(X_train.shape)
print(y_train.shape)

print(type(X_train))
print(type(y_train))

print("X_train.shape: ", X_train.shape)
print("y_train.shape: ", y_train.shape)

(600, 10)
(600,)
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
X_train.shape:  (600, 10)
y_train.shape:  (600,)


In [10]:
# 检测GPU可用性
import torch
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f"Using device: {device}")

Using device: cuda


In [11]:
# Initialize the regressor
regressor = TabPFNRegressor(device = device)  
regressor.fit(X_train, y_train)

In [12]:
# 输出特征的列名
label = '底缘单宽净动水荷载'

## 看训练集上的结果 ----------------------------------


In [23]:
train_data = read_csv(r'D:\Data\NHRI\小论文4\41、数据集-按工况取测试集\训练集_V2.csv', encoding = 'utf-8') #, header = None
train_y = train_data[label]
print(train_data.drop(columns=[label]).head())
pred_y = regressor.predict(train_data.drop(columns=[label]))  # Predict on the test set
for pred in pred_y:
    print(pred)

    门宽   门高    门厚  门后压力  上倾  下倾  对称倾角   开度  倾角正弦值    水头
0  5.4  5.2  0.95     4   1   0     0  0.1  0.301  17.2
1  5.4  5.2  0.95     4   1   0     0  0.1  0.301  20.0
2  5.4  5.2  0.95     4   1   0     0  0.1  0.301  22.5
3  5.4  5.2  0.95     4   1   0     0  0.1  0.301  25.0
4  5.4  5.2  0.95     4   1   0     0  0.1  0.301  28.2




70.46085
86.32597
98.870575
111.023895
126.9432
137.73523
123.41077
140.99063
166.04071
184.50963
207.46625
228.0651
156.09738
183.92685
214.64114
240.5662
268.30923
298.80267
169.00214
192.93718
230.3678
255.1227
289.14093
316.66913
160.81897
184.66583
214.44292
244.40549
273.2304
304.94116
153.74675
173.23909
201.70013
227.147
257.49857
283.68906
119.13219
136.97217
159.77582
177.31442
195.8896
216.04675
90.78804
100.317215
116.3647
130.41136
147.70348
163.15344
33.29557
51.716812
58.976566
68.49431
76.36012
83.28137
60.668545
88.17636
98.82919
113.21278
126.18065
140.61426
85.82185
125.60828
142.35197
167.08467
186.05002
205.22523
99.23615
143.87775
172.30058
190.38147
214.41298
230.838
96.97705
139.01291
161.66388
183.86826
202.02145
223.1853
89.65534
127.44656
139.61649
168.78949
183.98248
207.40024
65.50892
91.81183
105.479996
120.74194
134.58536
149.9576
40.884884
59.244694
68.55766
79.27255
85.94499
97.21257
18.12185
23.104574
34.688286
41.38786
44.62018
50.80474
30.431652
37.2

In [18]:
# Evaluate the model
mse = mean_squared_error(train_y, pred_y)
r2 = r2_score(train_y, pred_y)
mae = mean_absolute_error(train_y, pred_y)
print("Mean Absolute Error (MAE):", mae)
print("Mean Squared Error (MSE):", mse)
print("Root Mean Squared Error (RMSE):", math.sqrt(mse))
print("Mean Absolute Percentage Error (MAPE):", safe_mape(train_y, pred_y).item(), "%")
print("R² Score:", r2)

Mean Absolute Error (MAE): 2.9178824373944603
Mean Squared Error (MSE): 16.384939254813887
Root Mean Squared Error (RMSE): 4.04783142618537
Mean Absolute Percentage Error (MAPE): 3.479157317170229 %
R² Score: 0.9969899927639194


## 用所有工况的测试集数据检验 ----------------------------------


In [24]:
test_data = read_csv(r'D:\Data\NHRI\小论文4\41、数据集-按工况取测试集\完整测试集_V2.csv', encoding = 'utf-8') #, header = None
y_test = test_data[label]
#print(test_data.drop(columns=[label]).head())
y_pred = regressor.predict(test_data.drop(columns=[label]))  # Predict on the test set
for pred in y_pred:
    print(pred)



55.889812
95.112114
123.14603
131.94043
126.303894
117.89406
92.53994
64.7514
44.125004
77.70128
108.87137
124.14522
121.71996
111.99253
84.26103
52.85545
30.944103
45.69625
67.32392
78.92622
69.57611
61.610283
42.51114
25.293833
121.18977
139.6491
147.00967
135.8605
164.58246
146.23782
115.67635
107.45054
142.55582
163.37964
170.76474
155.6236
187.75491
171.80054
132.88597
125.48918
173.50742
192.21338
214.09692
193.27261
211.56573
198.96434
171.91182
154.12799
217.64738
242.85841
251.94069
259.94525
251.09062
234.9028
195.7959
160.33768
187.83977
268.4186
296.13687
303.75244
286.66913
270.52673
205.87057
163.49812
146.64389
181.09636
200.01904
189.73512
213.70918
196.0549
155.13083
125.64634


In [25]:
# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
print("Mean Absolute Error (MAE):", mae)
print("Mean Squared Error (MSE):", mse)
print("Root Mean Squared Error (RMSE):", math.sqrt(mse))
print("Mean Absolute Percentage Error (MAPE):", safe_mape(y_test, y_pred).item(), "%")
print("R² Score:", r2)

Mean Absolute Error (MAE): 3.754896462546453
Mean Squared Error (MSE): 23.78436860770614
Root Mean Squared Error (RMSE): 4.876922042406064
Mean Absolute Percentage Error (MAPE): 3.0146172673851015 %
R² Score: 0.994647950595786


## 用第一工况的数据检验 ----------------------------------


In [29]:
test_data = read_csv(r'D:\Data\NHRI\小论文4\41、数据集-按工况取测试集\测试集1-上倾_17.5度_第一水头.csv', encoding = 'utf-8') #, header = None
y_test = test_data[label]

y_pred = regressor.predict(test_data.drop(columns=[label]))  # Predict on the test set
for pred in y_pred:
    print(pred)



55.872322
95.13737
123.10248
131.92386
126.30313
117.89893
92.546814
64.72936


In [30]:
# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
print("Mean Absolute Error (MAE):", mae)
print("Mean Squared Error (MSE):", mse)
print("Root Mean Squared Error (RMSE):", math.sqrt(mse))
print("Mean Absolute Percentage Error (MAPE):", safe_mape(y_test, y_pred).item(), "%")
print("R² Score:", r2)

Mean Absolute Error (MAE): 7.421216629028324
Mean Squared Error (MSE): 60.60943222217388
Root Mean Squared Error (RMSE): 7.785205984569315
Mean Absolute Percentage Error (MAPE): 7.281335563522717 %
R² Score: 0.9189280890644616


## 用第二工况的数据检验 ----------------------------------


In [31]:
test_data = read_csv(r'D:\Data\NHRI\小论文4\41、数据集-按工况取测试集\测试集2-上倾_30度_第二水头.csv', encoding = 'utf-8') #, header = None
y_test = test_data[label]

y_pred = regressor.predict(test_data.drop(columns=[label]))  # Predict on the test set
for pred in y_pred:
    print(pred)



44.131165
77.70567
108.86435
124.144394
121.72655
111.99283
84.252014
52.830524


In [32]:
# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
print("Mean Absolute Error (MAE):", mae)
print("Mean Squared Error (MSE):", mse)
print("Root Mean Squared Error (RMSE):", math.sqrt(mse))
print("Mean Absolute Percentage Error (MAPE):", safe_mape(y_test, y_pred).item(), "%")
print("R² Score:", r2)

Mean Absolute Error (MAE): 1.7980004463195787
Mean Squared Error (MSE): 6.6414547865304385
Root Mean Squared Error (RMSE): 2.5771020132176448
Mean Absolute Percentage Error (MAPE): 2.678769620893247 %
R² Score: 0.9922774705739787


## 用第三工况的数据检验 ----------------------------------


In [33]:
test_data = read_csv(r'D:\Data\NHRI\小论文4\41、数据集-按工况取测试集\测试集3-上倾_45度_第三水头.csv', encoding = 'utf-8') #, header = None
y_test = test_data[label]

y_pred = regressor.predict(test_data.drop(columns=[label]))  # Predict on the test set
for pred in y_pred:
    print(pred)



30.949425
45.718147
67.32422
78.89418
69.57648
61.616882
42.534096
25.29292


In [34]:
# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
print("Mean Absolute Error (MAE):", mae)
print("Mean Squared Error (MSE):", mse)
print("Root Mean Squared Error (RMSE):", math.sqrt(mse))
print("Mean Absolute Percentage Error (MAPE):", safe_mape(y_test, y_pred).item(), "%")
print("R² Score:", r2)

Mean Absolute Error (MAE): 2.34503804397583
Mean Squared Error (MSE): 10.177574051823491
Root Mean Squared Error (RMSE): 3.1902310342392903
Mean Absolute Percentage Error (MAPE): 4.962866633477472 %
R² Score: 0.9658388492901796


## 用第四工况的数据检验 ----------------------------------


In [35]:
test_data = read_csv(r'D:\Data\NHRI\小论文4\41、数据集-按工况取测试集\测试集4-下倾_30度_第二水头.csv', encoding = 'utf-8') #, header = None
y_test = test_data[label]

y_pred = regressor.predict(test_data.drop(columns=[label]))  # Predict on the test set
for pred in y_pred:
    print(pred)



121.18161
139.6681
147.00868
135.88504
164.59546
146.22125
115.67438
107.440056


In [36]:
# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
print("Mean Absolute Error (MAE):", mae)
print("Mean Squared Error (MSE):", mse)
print("Root Mean Squared Error (RMSE):", math.sqrt(mse))
print("Mean Absolute Percentage Error (MAPE):", safe_mape(y_test, y_pred).item(), "%")
print("R² Score:", r2)

Mean Absolute Error (MAE): 2.0358620681762662
Mean Squared Error (MSE): 7.216874134221222
Root Mean Squared Error (RMSE): 2.6864240421462173
Mean Absolute Percentage Error (MAPE): 1.5059192935116519 %
R² Score: 0.9776112625603562


## 用第五工况的数据检验 ----------------------------------


In [37]:
test_data = read_csv(r'D:\Data\NHRI\小论文4\41、数据集-按工况取测试集\测试集5-下倾_45度_第三水头.csv', encoding = 'utf-8') #, header = None
y_test = test_data[label]

y_pred = regressor.predict(test_data.drop(columns=[label]))  # Predict on the test set
for pred in y_pred:
    print(pred)



142.5575
163.3949
170.75436
155.61926
187.76039
171.80894
132.87108
125.506714


In [38]:
# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
print("Mean Absolute Error (MAE):", mae)
print("Mean Squared Error (MSE):", mse)
print("Root Mean Squared Error (RMSE):", math.sqrt(mse))
print("Mean Absolute Percentage Error (MAPE):", safe_mape(y_test, y_pred).item(), "%")
print("R² Score:", r2)

Mean Absolute Error (MAE): 2.343149459838868
Mean Squared Error (MSE): 6.840193144764137
Root Mean Squared Error (RMSE): 2.6153762912369105
Mean Absolute Percentage Error (MAPE): 1.4786096392491435 %
R² Score: 0.9800526601725187


## 用第六工况的数据检验 ----------------------------------


In [39]:
test_data = read_csv(r'D:\Data\NHRI\小论文4\41、数据集-按工况取测试集\测试集6-下倾_60度_第四水头.csv', encoding = 'utf-8') #, header = None
y_test = test_data[label]

y_pred = regressor.predict(test_data.drop(columns=[label]))  # Predict on the test set
for pred in y_pred:
    print(pred)



173.48453
192.22202
214.10779
193.24738
211.57251
198.9682
171.90536
154.11914


In [40]:
# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
print("Mean Absolute Error (MAE):", mae)
print("Mean Squared Error (MSE):", mse)
print("Root Mean Squared Error (RMSE):", math.sqrt(mse))
print("Mean Absolute Percentage Error (MAPE):", safe_mape(y_test, y_pred).item(), "%")
print("R² Score:", r2)

Mean Absolute Error (MAE): 3.018722442626956
Mean Squared Error (MSE): 16.369987338456472
Root Mean Squared Error (RMSE): 4.0459841001240315
Mean Absolute Percentage Error (MAPE): 1.6065842653867866 %
R² Score: 0.9568435168417324


## 用第七工况的数据检验 ----------------------------------


In [41]:
test_data = read_csv(r'D:\Data\NHRI\小论文4\41、数据集-按工况取测试集\测试集7-对称_17.5度_第五水头.csv', encoding = 'utf-8') #, header = None
y_test = test_data[label]

y_pred = regressor.predict(test_data.drop(columns=[label]))  # Predict on the test set
for pred in y_pred:
    print(pred)



217.61667
242.83887
251.92981
259.9477
251.0954
234.90244
195.79514
160.35992


In [42]:
# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
print("Mean Absolute Error (MAE):", mae)
print("Mean Squared Error (MSE):", mse)
print("Root Mean Squared Error (RMSE):", math.sqrt(mse))
print("Mean Absolute Percentage Error (MAPE):", safe_mape(y_test, y_pred).item(), "%")
print("R² Score:", r2)

Mean Absolute Error (MAE): 3.2833235778808536
Mean Squared Error (MSE): 16.30939963226899
Root Mean Squared Error (RMSE): 4.038489771222528
Mean Absolute Percentage Error (MAPE): 1.576794305017181 %
R² Score: 0.9838254664990417


## 用第八工况的数据检验 ----------------------------------


In [43]:
test_data = read_csv(r'D:\Data\NHRI\小论文4\41、数据集-按工况取测试集\测试集8-对称_30度_第六水头.csv', encoding = 'utf-8') #, header = None
y_test = test_data[label]

y_pred = regressor.predict(test_data.drop(columns=[label]))  # Predict on the test set
for pred in y_pred:
    print(pred)



187.82959
268.45245
296.13416
303.7511
286.66534
270.52017
205.88663
163.53313


In [44]:
# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
print("Mean Absolute Error (MAE):", mae)
print("Mean Squared Error (MSE):", mse)
print("Root Mean Squared Error (RMSE):", math.sqrt(mse))
print("Mean Absolute Percentage Error (MAPE):", safe_mape(y_test, y_pred).item(), "%")
print("R² Score:", r2)

Mean Absolute Error (MAE): 4.914839828491203
Mean Squared Error (MSE): 37.238256567759805
Root Mean Squared Error (RMSE): 6.102315672575437
Mean Absolute Percentage Error (MAPE): 2.265560090788561 %
R² Score: 0.9867636468640787


## 用第九工况的数据检验 ----------------------------------


In [45]:
test_data = read_csv(r'D:\Data\NHRI\小论文4\41、数据集-按工况取测试集\测试集9-对称_60度_第七水头.csv', encoding = 'utf-8') #, header = None
y_test = test_data[label]

y_pred = regressor.predict(test_data.drop(columns=[label]))  # Predict on the test set
for pred in y_pred:
    print(pred)



146.7127
181.0859
200.03247
189.73111
213.65123
196.03465
155.13522
125.60849


In [46]:
# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
print("Mean Absolute Error (MAE):", mae)
print("Mean Squared Error (MSE):", mse)
print("Root Mean Squared Error (RMSE):", math.sqrt(mse))
print("Mean Absolute Percentage Error (MAPE):", safe_mape(y_test, y_pred).item(), "%")
print("R² Score:", r2)

Mean Absolute Error (MAE): 6.615991119384768
Mean Squared Error (MSE): 52.40821304134552
Root Mean Squared Error (RMSE): 7.23935170034897
Mean Absolute Percentage Error (MAPE): 3.7576145470875857 %
R² Score: 0.9100746549868715
