In [1]:
import pandas as pd
import numpy as np
from pmdarima import auto_arima
from sklearn.metrics import mean_squared_error
from statsmodels.tsa.stattools import adfuller

In [3]:


# 读取数据
df_csv = pd.read_csv('df_past_order_with_cluster.csv', index_col=0, parse_dates=True)
df_csv.drop('SKU', axis=1, inplace=True)
# 转置
df_csv = df_csv.T

# 将数据集划分为训练集和测试集
train = df_csv.loc['2016-06-01':'2016-06-18']
test = df_csv.loc['2016-06-19':'2016-06-23']  # 注意这里日期范围的调整
sum=0
# 遍历每一列进行预测
for col in train.columns:
    train_data = train[col]
    test_data = test[col]
    train_data_array = train_data.values.flatten()
    train_data_array = train_data_array.astype('float32')
    test_data_array = test_data.values.flatten()
    test_data_array = test_data_array.astype('float32') 
    
    # 检验数据是否平稳
    adf_result = adfuller(train_data_array)
    print(f'ADF Statistic: {adf_result[0]}')
    print(f'p-value: {adf_result[1]}')
    print(f'Critical Values: {adf_result[4]}')
    
    # 建立贝叶斯 ARIMA 模型
    model = auto_arima(train_data_array, 
                       m=1,  # 因为数据是日数据
                       start_p=4,
                       start_q=1,
                       max_p=6,
                       max_q=6,
                       d=1,  # 因为数据可能已经是一阶差分的
                       seasonal=False,  # 假设数据没有季节性
                       stepwise=True,
                       error_action='ignore',
                       n_fits=10,
                       trace=True)
    
    # 进行预测
    predictions = model.predict(n_periods=len(test_data_array))
    
    # 计算RMSE
    rmse = np.sqrt(mean_squared_error(test_data_array, predictions))
    sum+=rmse
    print(f'预测值 for {col}: {predictions}')
    print(f'真实值 for {col}: {test_data_array}')
    print(f'RMSE for {col}: {rmse}')
    print('-------------------------')
temp=sum/106
print({sum})
print({temp})

  df_csv = pd.read_csv('df_past_order_with_cluster.csv', index_col=0, parse_dates=True)


ADF Statistic: 1.961216329098574
p-value: 0.9986203859632253
Critical Values: {'1%': -4.331573, '5%': -3.23295, '10%': -2.7487}
Performing stepwise search to minimize aic
 ARIMA(4,1,1)(0,0,0)[0] intercept   : AIC=inf, Time=0.15 sec
 ARIMA(0,1,0)(0,0,0)[0] intercept   : AIC=198.600, Time=0.00 sec
 ARIMA(1,1,0)(0,0,0)[0] intercept   : AIC=200.329, Time=0.02 sec
 ARIMA(0,1,1)(0,0,0)[0] intercept   : AIC=inf, Time=0.02 sec
 ARIMA(0,1,0)(0,0,0)[0]             : AIC=196.631, Time=0.01 sec
 ARIMA(1,1,1)(0,0,0)[0] intercept   : AIC=inf, Time=0.05 sec

Best model:  ARIMA(0,1,0)(0,0,0)[0]          
Total fit time: 0.255 seconds
预测值 for shang-hai-shi: [293.79998779 293.79998779 293.79998779 293.79998779 293.79998779]
真实值 for shang-hai-shi: [366.8 359.4 232.5 238.9 270.3]
RMSE for shang-hai-shi: 58.23454066794625
-------------------------
ADF Statistic: -1.2259176514183592
p-value: 0.6623484226195778
Critical Values: {'1%': -4.331573, '5%': -3.23295, '10%': -2.7487}
Performing stepwise search to m