In [14]:
import pandas as pd
import numpy as np
from pmdarima import auto_arima
from sklearn.metrics import mean_squared_error
from statsmodels.tsa.stattools import adfuller

In [15]:


# 读取数据
df_csv = pd.read_csv('df_past_order_with_cluster_im.csv', index_col=0, parse_dates=True)
df_csv.drop('SKU', axis=1, inplace=True)
# 转置
df_csv = df_csv.T

# 将数据集划分为训练集和测试集
train = df_csv.loc['2016-06-01':'2016-06-18']
test = df_csv.loc['2016-06-19':'2016-06-23']  # 注意这里日期范围的调整
sum=0
# 遍历每一列进行预测
for col in train.columns:
    train_data = train[col]
    test_data = test[col]
    train_data_array = train_data.values.flatten()
    train_data_array = train_data_array.astype('float32')
    test_data_array = test_data.values.flatten()
    test_data_array = test_data_array.astype('float32') 
    
    # 检验数据是否平稳
    adf_result = adfuller(train_data_array)
    print(f'ADF Statistic: {adf_result[0]}')
    print(f'p-value: {adf_result[1]}')
    print(f'Critical Values: {adf_result[4]}')
    
    # 建立贝叶斯 ARIMA 模型
    model = auto_arima(train_data_array, 
                       m=1,  # 因为数据是日数据
                       start_p=1,
                       start_q=1,
                       max_p=6,
                       max_q=6,
                       d=1,  # 因为数据可能已经是一阶差分的
                       seasonal=False,  # 假设数据没有季节性
                       stepwise=True,
                       error_action='ignore',
                       n_fits=10,
                       trace=True)
    
    # 进行预测
    predictions = model.predict(n_periods=len(test_data_array))
    
    # 计算RMSE
    rmse = np.sqrt(mean_squared_error(test_data_array, predictions))
    sum+=rmse
    print(f'预测值 for {col}: {predictions}')
    print(f'真实值 for {col}: {test_data_array}')
    print(f'RMSE for {col}: {rmse}')
    print('-------------------------')
temp=sum/106
print({sum})
print({temp})

  df_csv = pd.read_csv('df_past_order_with_cluster_im.csv', index_col=0, parse_dates=True)


ADF Statistic: -3.8446077249963695
p-value: 0.002482512301067748
Critical Values: {'1%': -3.9644434814814815, '5%': -3.0849081481481484, '10%': -2.6818144444444445}
Performing stepwise search to minimize aic
 ARIMA(1,1,1)(0,0,0)[0] intercept   : AIC=inf, Time=0.27 sec
 ARIMA(0,1,0)(0,0,0)[0] intercept   : AIC=202.405, Time=0.02 sec
 ARIMA(1,1,0)(0,0,0)[0] intercept   : AIC=203.905, Time=0.04 sec
 ARIMA(0,1,1)(0,0,0)[0] intercept   : AIC=203.739, Time=0.04 sec
 ARIMA(0,1,0)(0,0,0)[0]             : AIC=200.433, Time=0.01 sec

Best model:  ARIMA(0,1,0)(0,0,0)[0]          
Total fit time: 0.424 seconds
预测值 for shang-hai-shi: [484. 484. 484. 484. 484.]
真实值 for shang-hai-shi: [559.  499.9 457.5 448.8 449. ]
RMSE for shang-hai-shi: 42.53022611104575
-------------------------
ADF Statistic: -0.9966860880472832
p-value: 0.7544710545893367
Critical Values: {'1%': -4.331573, '5%': -3.23295, '10%': -2.7487}
Performing stepwise search to minimize aic
 ARIMA(1,1,1)(0,0,0)[0] intercept   : AIC=95.942