In [2]:
# -*- coding: utf-8 -*-
import os
import math
import pandas as pd
import numpy as np
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error, mean_absolute_error
import numpy.linalg as la

# ==================== 数据预处理函数 ====================

def preprocess_data(data, time_len, rate, seq_len, pre_len):
    data = np.mat(data)
    train_size = int(time_len * rate)
    train_data = data[:train_size]
    test_data = data[train_size:time_len]
    
    trainX, trainY, testX, testY = [], [], [], []
    for i in range(len(train_data) - seq_len - pre_len):
        a = train_data[i: i + seq_len + pre_len]
        trainX.append(a[:seq_len])
        trainY.append(a[seq_len: seq_len + pre_len])
    for i in range(len(test_data) - seq_len - pre_len):
        b = test_data[i: i + seq_len + pre_len]
        testX.append(b[:seq_len])
        testY.append(b[seq_len: seq_len + pre_len])
    return trainX, trainY, testX, testY
    
# ==================== 评估函数定义 ====================

def evaluation1(a, b):
    rmse = math.sqrt(mean_squared_error(a, b))
    mae = mean_absolute_error(a, b)
    return rmse, mae

def evaluation2(a, b):
    F_norm = la.norm(a - b, 'fro') / la.norm(a, 'fro')
    r2 = 1 - ((a - b) ** 2).sum() / ((a - a.mean()) ** 2).sum()
    var = 1 - np.var(a - b) / np.var(a)
    return 1 - F_norm, r2, var

# ==================== 数据加载与归一化 ====================

data_path = 'data/feature_matrix_X.csv'
data = pd.read_csv(data_path)
data = data.values

# 最大值归一化
max_values = np.max(data, axis=0)
max_values[max_values == 0] = 1  # 防止除零
data = data / max_values



# ==================== 参数设置 ====================

time_len = data.shape[0]
num_nodes = data.shape[1]
train_rate = 0.8
seq_len = 12
pre_len = 1

trainX, trainY, testX, testY = preprocess_data(data, time_len, train_rate, seq_len, pre_len)

# ==================== 训练 SVR 模型 ====================

result = []
for i in range(num_nodes):
    a = np.mat(data)[:, i]
    a_X, a_Y, t_X, t_Y = preprocess_data(a, time_len, train_rate, seq_len, pre_len)

    a_X = np.reshape(np.array(a_X), [-1, seq_len])
    a_Y = np.reshape(np.array(a_Y), [-1, pre_len])
    a_Y = np.mean(a_Y, axis=1)  # SVR 目标只能是一维向量

    t_X = np.reshape(np.array(t_X), [-1, seq_len])
    t_Y = np.reshape(np.array(t_Y), [-1, pre_len])

    svr_model = SVR(kernel='linear')
    svr_model.fit(a_X, a_Y)
    predictions = svr_model.predict(t_X)

    # 对预测结果重复 pre_len 次以对齐维度
    predictions = np.array(predictions).reshape(-1, 1).repeat(pre_len, axis=1)
    result.append(predictions)

# ==================== 整合预测结果 ====================

result1 = np.array(result)               # shape: [num_nodes, samples, pre_len]
result1 = np.reshape(result1, [num_nodes, -1]).T  # shape: [samples, num_nodes]
testY1 = np.array(testY)
testY1 = np.reshape(testY1, [-1, num_nodes])      # shape: [samples, num_nodes]

# ==================== 反归一化处理 ====================

result1 = result1 * max_values           # 每列乘对应最大值
testY1 = testY1 * max_values



# ==================== 评估模型性能 ====================

rmse, mae = evaluation1(result1, testY1)
acc, r2, expl_var = evaluation2(result1, testY1)


# ==================== 输出与保存结果 ====================

print('SVR_rmse: %r' % rmse,
      'SVR_mae: %r' % mae,
      'SVR_acc: %r' % acc,
      'SVR_r2: %r' % r2,
      'SVR_var: %r' % expl_var)

# 保存目录
output_dir = os.path.join('out', 'SVR', f'precipitationSVR_{seq_len}_seq{pre_len}_pre')
os.makedirs(output_dir, exist_ok=True)

# 保存预测值与真实值
pd.DataFrame(result1).to_csv(os.path.join(output_dir, 'test_prediction.csv'), index=False)
pd.DataFrame(testY1).to_csv(os.path.join(output_dir, 'test_true.csv'), index=False)

# 保存评估指标（带列名）
evaluation = {
    'RMSE': [rmse],
    'MAE': [mae],
    'Accuracy': [acc],
    'R2': [r2],
    'Explained Variance': [expl_var]
}
eval_df = pd.DataFrame(evaluation)
eval_df.to_csv(os.path.join(output_dir, 'evalution.csv'), index=False)


SVR_rmse: 2.1181131433997002 SVR_mae: 1.5262932901955009 SVR_acc: 0.7394348605371424 SVR_r2: 0.41034697738703974 SVR_var: 0.4111269450421863
