In [5]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, KFold
from sklearn.preprocessing import MinMaxScaler
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import os
import matplotlib.pyplot as plt


# 读取数据集
data = pd.read_csv("/kaggle/input/svmaed/data.csv")
# 提取相关特征列和目标列
features = data[['Total_Mean_Ground_Time', 'Mean_GROUND_Efficiency', 'Mean_FLIGHT_Efficiency']]
target = data['Total_Mean_Delay']

# 数据归一化
scaler_x = MinMaxScaler()
scaler_y = MinMaxScaler()
features_scaled = scaler_x.fit_transform(features)
target_scaled = scaler_y.fit_transform(target.values.reshape(-1, 1))

# 构建序列数据，假设时间步长为14，可按需调整
time_steps = 14
X = []
y = []
for i in range(len(features_scaled) - time_steps):
    X.append(features_scaled[i:i + time_steps])
    y.append(target_scaled[i + time_steps])
X = np.array(X)
y = np.array(y)

# 定义5折交叉验证
kfold = KFold(n_splits=5, shuffle=True, random_state=42)
mse_scores = []
mae_scores = []
mape_scores = []
rmse_scores = []  # 新增用于存储RMSE的列表

# 用于存储每次折叠训练过程中的损失值，方便后续绘制损失曲线（SVM没有像神经网络那样的训练损失概念，这里可记录预测误差等信息用于分析）
all_prediction_errors = []

for train_index, test_index in kfold.split(X):
    # 划分训练集和测试集
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

    # 构建SVM回归模型，这里使用径向基函数（RBF）核，你可根据实际情况选择其他核函数
    model = SVR(kernel='rbf')

    # 训练模型
    model.fit(X_train.reshape(X_train.shape[0], -1), y_train.ravel())

    # 预测
    y_pred = model.predict(X_test.reshape(X_test.shape[0], -1))
    # 逆归一化还原真实值
    y_pred_original = scaler_y.inverse_transform(y_pred.reshape(-1, 1))
    y_test_original = scaler_y.inverse_transform(y_test)

    # 计算均方误差并记录
    mse = mean_squared_error(y_test_original, y_pred_original)
    mse_scores.append(mse)

    # 计算平均绝对误差并记录
    mae = mean_absolute_error(y_test_original, y_pred_original)
    mae_scores.append(mae)

    # 计算平均绝对百分比误差并记录，注意需要处理分母为0的情况
    diff = np.abs(y_test_original - y_pred_original)
    divide = np.where(y_test_original == 0, 1e-10, y_test_original)
    mape = np.mean(diff / divide) * 100
    mape_scores.append(mape)

    # 计算均方根误差并记录
    rmse = np.sqrt(mse)  # 新增计算RMSE的代码，即对均方误差开平方
    rmse_scores.append(rmse)

    # 记录预测误差（这里简单用差值表示，可根据需要调整分析方式）
    prediction_error = np.abs(y_test_original - y_pred_original)
    all_prediction_errors.append(prediction_error)

# 输出每次折叠的均方误差、平均绝对误差、平均绝对百分比误差和均方根误差
for i, (mse, mae, mape, rmse) in enumerate(zip(mse_scores, mae_scores, mape_scores, rmse_scores)):
    print(f"第{i + 1}折 - 均方误差: {mse}, 平均绝对误差: {mae}, 平均绝对百分比误差: {mape}, 均方根误差: {rmse}")
print("平均均方误差:", np.mean(mse_scores))
print("平均平均绝对误差:", np.mean(mae_scores))
print("平均平均绝对百分比误差:", np.mean(mape_scores))
print("平均均方根误差:", np.mean(rmse_scores))  # 新增输出平均均方根误差的代码

第1折 - 均方误差: 445.47373422370987, 平均绝对误差: 17.459649289225798, 平均绝对百分比误差: 12.693992973814941, 均方根误差: 21.10624870088737
第2折 - 均方误差: 652.66850237804, 平均绝对误差: 18.333848556697, 平均绝对百分比误差: -60.484360994300935, 均方根误差: 25.54737760276072
第3折 - 均方误差: 374.924349603628, 平均绝对误差: 16.021000062993014, 平均绝对百分比误差: 126.55712580598019, 均方根误差: 19.362963347680747
第4折 - 均方误差: 492.79051711621537, 平均绝对误差: 17.86306366570067, 平均绝对百分比误差: -40.22058773664734, 均方根误差: 22.198885492659656
第5折 - 均方误差: 506.63327617067813, 平均绝对误差: 17.9780787118332, 平均绝对百分比误差: 50.06192933465472, 均方根误差: 22.50851563676908
平均均方误差: 494.4980758984543
平均平均绝对误差: 17.531128057289937
平均平均绝对百分比误差: 17.721619876700316
平均均方根误差: 22.144798156151516
