In [1]:
import pandas as pd
import os 
from statsmodels.tsa.statespace.sarimax import SARIMAX
import matplotlib.pyplot as plt
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.statespace.sarimax import SARIMAX
from tqdm import trange
import numpy as np
import pmdarima as pm

import warnings

# 忽略所有警告
warnings.simplefilter("ignore")

In [2]:
data_path = os.path.join("VMD_Data", 'has_seasonality')

test_size = 1163
pre_len =25
file_name = [p for p in os.listdir(data_path) if len(p.split('_'))==2]

In [3]:
for file in file_name:
    print("==================={}=================".format(file))
    path = os.path.join(data_path, file)

    # 读取数据
    df = pd.read_csv(path)

    # 将日期列设置为索引
    df['date'] = pd.to_datetime(df['date'])  # 确保日期列是日期格式
    df.set_index('date', inplace=True)

    # 拆分训练集和测试集
    train_size = len(df) - test_size
    train, test = df[0:train_size], df[train_size:]

    # 使用auto_arima函数选择SARIMA模型的参数
    auto_model = pm.auto_arima(train, seasonal=True, m=7, stepwise=True, suppress_warnings=True, trace=True)

    # # 输出选择的模型参数
    # print(auto_model.order)
    # print(auto_model.seasonal_order)

    # 使用选定的模型参数创建SARIMA模型
    order = auto_model.order
    seasonal_order = auto_model.seasonal_order

    all_pred = []

    for i in trange(train_size, len(df), pre_len):# 注意这里的步长变成了pre_len
        # 计算还需要预测的步数
        remaining_steps = len(df) - i
         # 在循环的这一步中需要预测的步数是 pre_len 和 remaining_steps 中的较小值
        steps_to_forecast = min(pre_len, remaining_steps)
        # 为了保持模型的更新，我们在每次迭代中都使用最新的数据
        sarima_model = SARIMAX(df[:i], order=order, seasonal_order=seasonal_order)

        # 训练模型
        sarima_fit = sarima_model.fit(disp=False)
        # 进行预测,这里好像变了
        sarima_forecast = sarima_fit.get_forecast(steps=steps_to_forecast)
        sarima_pred = sarima_forecast.predicted_mean
        # 添加预测值到列表，这里我们确保即使在预测步数少于 pre_len 的情况下也能正常工作
        all_pred.extend(sarima_pred)
    # 截断all_pred列表以确保其长度与测试集相同
    pred_val = np.array(all_pred)[:test_size]

    # 将测试集的真实值转换为一维数组
    true_val = test.values.flatten()
    # 此时 pred_val 和 true_val 的长度应该相同
    assert len(pred_val) == len(true_val), "Length of predictions and true values do not match."
    # 将结果保存到 CSV 文件
    pd.DataFrame({
    "pred": pred_val,
    "true": true_val,
    }).to_csv(os.path.join(data_path, file.split('.')[0] + '_sarima.csv'))

print("预测完成并保存到CSV文件。")

       #   # 当循环接近时间序列末尾时，我们需要处理边界情况
       #  if i + pre_len > len(df):
       #      steps_to_end = len(df) - i
       #      sarima_pred = sarima_pred[:steps_to_end]
       #  all_pred.append(sarima_pred)
       # # 将预测结果扁平化并转换为一个数组
    # pred_val = np.array(all_pred).flatten()
    # true_val = test.values.reshape(-1)
    # print(len(pred_val))
    # print(len(true_val))
#     pd.DataFrame.from_dict(
#         {
#             "pred":pred_val,
#             "true":true_val,
#         }
#     ).to_csv(os.path.join(data_path, file.split('.')[0] + '_sarima.csv'))
# print("预测完成并保存到CSV文件。")

Performing stepwise search to minimize aic
 ARIMA(2,0,2)(1,0,1)[7] intercept   : AIC=-46391.850, Time=10.15 sec
 ARIMA(0,0,0)(0,0,0)[7] intercept   : AIC=9781.508, Time=0.26 sec
 ARIMA(1,0,0)(1,0,0)[7] intercept   : AIC=inf, Time=1.90 sec
 ARIMA(0,0,1)(0,0,1)[7] intercept   : AIC=inf, Time=2.67 sec
 ARIMA(0,0,0)(0,0,0)[7]             : AIC=9782.996, Time=0.11 sec
 ARIMA(2,0,2)(0,0,1)[7] intercept   : AIC=-47038.006, Time=3.27 sec
 ARIMA(2,0,2)(0,0,0)[7] intercept   : AIC=-47040.006, Time=1.49 sec
 ARIMA(2,0,2)(1,0,0)[7] intercept   : AIC=-44543.745, Time=2.54 sec
 ARIMA(1,0,2)(0,0,0)[7] intercept   : AIC=inf, Time=3.83 sec
 ARIMA(2,0,1)(0,0,0)[7] intercept   : AIC=-47027.990, Time=2.66 sec
 ARIMA(3,0,2)(0,0,0)[7] intercept   : AIC=-47205.643, Time=1.73 sec
 ARIMA(3,0,2)(1,0,0)[7] intercept   : AIC=-44677.172, Time=8.19 sec
 ARIMA(3,0,2)(0,0,1)[7] intercept   : AIC=-47203.647, Time=3.61 sec
 ARIMA(3,0,2)(1,0,1)[7] intercept   : AIC=-46616.698, Time=13.08 sec
 ARIMA(3,0,1)(0,0,0)[7] inte

100%|██████████| 47/47 [01:03<00:00,  1.35s/it]


Performing stepwise search to minimize aic
 ARIMA(2,0,2)(1,0,1)[7] intercept   : AIC=-44120.138, Time=2.45 sec
 ARIMA(0,0,0)(0,0,0)[7] intercept   : AIC=4958.649, Time=0.28 sec
 ARIMA(1,0,0)(1,0,0)[7] intercept   : AIC=inf, Time=2.39 sec
 ARIMA(0,0,1)(0,0,1)[7] intercept   : AIC=inf, Time=4.41 sec
 ARIMA(0,0,0)(0,0,0)[7]             : AIC=4956.962, Time=0.18 sec
 ARIMA(2,0,2)(0,0,1)[7] intercept   : AIC=-45738.217, Time=6.31 sec
 ARIMA(2,0,2)(0,0,0)[7] intercept   : AIC=-45740.217, Time=1.70 sec
 ARIMA(2,0,2)(1,0,0)[7] intercept   : AIC=-44121.238, Time=3.44 sec
 ARIMA(1,0,2)(0,0,0)[7] intercept   : AIC=inf, Time=3.47 sec
 ARIMA(2,0,1)(0,0,0)[7] intercept   : AIC=-45649.542, Time=0.71 sec
 ARIMA(3,0,2)(0,0,0)[7] intercept   : AIC=-45934.647, Time=3.60 sec
 ARIMA(3,0,2)(1,0,0)[7] intercept   : AIC=-44186.708, Time=5.04 sec
 ARIMA(3,0,2)(0,0,1)[7] intercept   : AIC=-45932.647, Time=3.71 sec
 ARIMA(3,0,2)(1,0,1)[7] intercept   : AIC=-45861.417, Time=9.87 sec
 ARIMA(3,0,1)(0,0,0)[7] interc

100%|██████████| 47/47 [01:13<00:00,  1.57s/it]


Performing stepwise search to minimize aic
 ARIMA(2,0,2)(1,1,1)[7] intercept   : AIC=-43008.277, Time=11.69 sec
 ARIMA(0,0,0)(0,1,0)[7] intercept   : AIC=-8195.518, Time=0.53 sec
 ARIMA(1,0,0)(1,1,0)[7] intercept   : AIC=inf, Time=5.38 sec
 ARIMA(0,0,1)(0,1,1)[7] intercept   : AIC=inf, Time=5.84 sec
 ARIMA(0,0,0)(0,1,0)[7]             : AIC=-8197.518, Time=0.27 sec
 ARIMA(2,0,2)(0,1,1)[7] intercept   : AIC=-42490.394, Time=4.95 sec
 ARIMA(2,0,2)(1,1,0)[7] intercept   : AIC=-42999.764, Time=8.94 sec
 ARIMA(2,0,2)(2,1,1)[7] intercept   : AIC=-42414.104, Time=17.37 sec
 ARIMA(2,0,2)(1,1,2)[7] intercept   : AIC=-43083.543, Time=5.78 sec
 ARIMA(2,0,2)(0,1,2)[7] intercept   : AIC=-42488.415, Time=20.59 sec
 ARIMA(2,0,2)(2,1,2)[7] intercept   : AIC=-42543.651, Time=19.48 sec
 ARIMA(1,0,2)(1,1,2)[7] intercept   : AIC=-39068.943, Time=21.70 sec
 ARIMA(2,0,1)(1,1,2)[7] intercept   : AIC=-42950.717, Time=12.43 sec
 ARIMA(3,0,2)(1,1,2)[7] intercept   : AIC=inf, Time=nan sec
 ARIMA(2,0,3)(1,1,2)[7]

100%|██████████| 47/47 [22:13<00:00, 28.37s/it]


Performing stepwise search to minimize aic
 ARIMA(2,0,2)(1,0,1)[7] intercept   : AIC=-43384.244, Time=5.37 sec
 ARIMA(0,0,0)(0,0,0)[7] intercept   : AIC=1393.060, Time=0.77 sec
 ARIMA(1,0,0)(1,0,0)[7] intercept   : AIC=-15171.500, Time=3.51 sec
 ARIMA(0,0,1)(0,0,1)[7] intercept   : AIC=inf, Time=4.71 sec
 ARIMA(0,0,0)(0,0,0)[7]             : AIC=1391.229, Time=0.26 sec
 ARIMA(2,0,2)(0,0,1)[7] intercept   : AIC=-44114.439, Time=16.56 sec
 ARIMA(2,0,2)(0,0,0)[7] intercept   : AIC=-44118.585, Time=6.20 sec
 ARIMA(2,0,2)(1,0,0)[7] intercept   : AIC=-44077.683, Time=7.36 sec
 ARIMA(1,0,2)(0,0,0)[7] intercept   : AIC=inf, Time=9.76 sec
 ARIMA(2,0,1)(0,0,0)[7] intercept   : AIC=-43705.771, Time=6.05 sec
 ARIMA(3,0,2)(0,0,0)[7] intercept   : AIC=-44527.109, Time=10.00 sec
 ARIMA(3,0,2)(1,0,0)[7] intercept   : AIC=-44026.392, Time=9.18 sec
 ARIMA(3,0,2)(0,0,1)[7] intercept   : AIC=-44525.109, Time=14.18 sec
 ARIMA(3,0,2)(1,0,1)[7] intercept   : AIC=-44026.633, Time=9.60 sec
 ARIMA(3,0,1)(0,0,0)

100%|██████████| 47/47 [02:13<00:00,  2.84s/it]


Performing stepwise search to minimize aic
 ARIMA(2,0,2)(1,0,1)[7] intercept   : AIC=-41546.762, Time=7.02 sec
 ARIMA(0,0,0)(0,0,0)[7] intercept   : AIC=-4480.626, Time=0.68 sec
 ARIMA(1,0,0)(1,0,0)[7] intercept   : AIC=-17242.429, Time=4.54 sec
 ARIMA(0,0,1)(0,0,1)[7] intercept   : AIC=inf, Time=8.23 sec
 ARIMA(0,0,0)(0,0,0)[7]             : AIC=-4482.562, Time=0.25 sec
 ARIMA(2,0,2)(0,0,1)[7] intercept   : AIC=-41328.177, Time=2.80 sec
 ARIMA(2,0,2)(1,0,0)[7] intercept   : AIC=-41550.806, Time=7.74 sec
 ARIMA(2,0,2)(0,0,0)[7] intercept   : AIC=-41330.174, Time=3.27 sec
 ARIMA(2,0,2)(2,0,0)[7] intercept   : AIC=-41071.267, Time=25.24 sec
 ARIMA(2,0,2)(2,0,1)[7] intercept   : AIC=-41363.058, Time=24.56 sec
 ARIMA(1,0,2)(1,0,0)[7] intercept   : AIC=inf, Time=12.38 sec
 ARIMA(2,0,1)(1,0,0)[7] intercept   : AIC=-41683.530, Time=12.54 sec
 ARIMA(2,0,1)(0,0,0)[7] intercept   : AIC=-41232.058, Time=5.66 sec
 ARIMA(2,0,1)(2,0,0)[7] intercept   : AIC=-40707.049, Time=26.13 sec
 ARIMA(2,0,1)(1,

100%|██████████| 47/47 [02:19<00:00,  2.97s/it]


Performing stepwise search to minimize aic
 ARIMA(2,0,2)(1,1,1)[7] intercept   : AIC=-32045.865, Time=26.79 sec
 ARIMA(0,0,0)(0,1,0)[7] intercept   : AIC=-8190.154, Time=1.68 sec
 ARIMA(1,0,0)(1,1,0)[7] intercept   : AIC=-8635.882, Time=5.67 sec
 ARIMA(0,0,1)(0,1,1)[7] intercept   : AIC=inf, Time=9.73 sec
 ARIMA(0,0,0)(0,1,0)[7]             : AIC=-8192.153, Time=0.40 sec
 ARIMA(2,0,2)(0,1,1)[7] intercept   : AIC=-32054.959, Time=24.98 sec
 ARIMA(2,0,2)(0,1,0)[7] intercept   : AIC=-29607.459, Time=11.11 sec
 ARIMA(2,0,2)(0,1,2)[7] intercept   : AIC=-31093.401, Time=47.00 sec
 ARIMA(2,0,2)(1,1,0)[7] intercept   : AIC=-31297.969, Time=14.86 sec
 ARIMA(2,0,2)(1,1,2)[7] intercept   : AIC=-31574.235, Time=41.29 sec
 ARIMA(1,0,2)(0,1,1)[7] intercept   : AIC=inf, Time=25.59 sec
 ARIMA(2,0,1)(0,1,1)[7] intercept   : AIC=inf, Time=10.84 sec
 ARIMA(3,0,2)(0,1,1)[7] intercept   : AIC=-31051.675, Time=9.42 sec
 ARIMA(2,0,3)(0,1,1)[7] intercept   : AIC=-31461.780, Time=11.97 sec
 ARIMA(1,0,1)(0,1,1)

100%|██████████| 47/47 [19:59<00:00, 25.52s/it]

预测完成并保存到CSV文件。



