In [38]:
import pandas as pd
import numpy as np
from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.vector_ar.var_model import VAR
from statsmodels.tsa.vector_ar.vecm import coint_johansen


In [43]:
# 读取文件
data = pd.read_excel('分类日销售量、日均利率、日利润.xlsx')

# 定义单位根检验函数
def adf_test(series):
    result = adfuller(series)
    p_value = result[1]
    return p_value

# 对销售量和利润分别进行单位根检验
sales_p_value = adf_test(data['日销量(千克)'])
profit_p_value = adf_test(data['日平均利润率'])

# 输出检验结果
print(f'销售量的单位根检验p-value: {sales_p_value}')
print(f'利润率的单位根检验p-value: {profit_p_value}')


销售量的单位根检验p-value: 0.2524035379310153
利润率的单位根检验p-value: 3.0437896702554437e-30


In [44]:
# 如果p-value大于显著性水平（例如0.05），则进行一次差分
if sales_p_value > 0.05:
    data['销售量差分'] = data['日销量(千克)'].diff()
else:
    data['销售量差分'] = data['日销量(千克)']

if profit_p_value > 0.05:
    data['利润率差分'] = data['日平均利润率'].diff()
else:
    data['利润率差分'] = data['日平均利润率']


In [45]:
# 提取差分后的数据
diff_data = data[['销售量差分', '利润率差分']].dropna()

# 进行Johansen检验
coint_result = coint_johansen(diff_data, det_order=0, k_ar_diff=1)

# 输出检验结果
print(f'协整检验的特征值：{coint_result.lr1}')
print(f'协整检验的临界值(5%显著性水平)：{coint_result.cvt[1]}')


协整检验的特征值：[381.89486636 140.44351843]
协整检验的临界值(5%显著性水平)：[2.7055 3.8415 6.6349]


In [46]:
import pandas as pd
from statsmodels.tsa.stattools import grangercausalitytests


# 假设你的数据包含两列，一列是销售量，另一列是利润率
sales = data['日销量(千克)']
profit_rate = data['日平均利润率']

# 进行格兰杰因果检验
max_lag = 5  # 最大滞后阶数，你可以根据需求调整
test_result = grangercausalitytests(data[['日销量(千克)', '日平均利润率']], max_lag, verbose=True)

# 打印检验结果
for lag in range(1, max_lag + 1):
    print(f'滞后阶数 {lag}:')
    print('F检验结果:')
    print(test_result[lag][0])
    print('Chi-squared检验结果:')
    print(test_result[lag][1])
    print()



Granger Causality
number of lags (no zero) 1
ssr based F test:         F=0.1512  , p=0.6977  , df_denom=355, df_num=1
ssr based chi2 test:   chi2=0.1524  , p=0.6962  , df=1
likelihood ratio test: chi2=0.1524  , p=0.6962  , df=1
parameter F test:         F=0.1512  , p=0.6977  , df_denom=355, df_num=1

Granger Causality
number of lags (no zero) 2
ssr based F test:         F=0.1980  , p=0.8204  , df_denom=352, df_num=2
ssr based chi2 test:   chi2=0.4017  , p=0.8180  , df=2
likelihood ratio test: chi2=0.4014  , p=0.8181  , df=2
parameter F test:         F=0.1980  , p=0.8204  , df_denom=352, df_num=2

Granger Causality
number of lags (no zero) 3
ssr based F test:         F=0.1672  , p=0.9185  , df_denom=349, df_num=3
ssr based chi2 test:   chi2=0.5117  , p=0.9163  , df=3
likelihood ratio test: chi2=0.5113  , p=0.9164  , df=3
parameter F test:         F=0.1672  , p=0.9185  , df_denom=349, df_num=3

Granger Causality
number of lags (no zero) 4
ssr based F test:         F=0.6975  , p=0.5941  

In [31]:
# 选择合适的滞后阶数
model = VAR(diff_data)
lags = model.select_order()

# 拟合VAR模型
var_model = model.fit(lags.selected_orders['aic'])

# 打印VAR模型的结果
print(var_model.summary())

  Summary of Regression Results   
Model:                         VAR
Method:                        OLS
Date:           Sun, 10, Sep, 2023
Time:                     12:08:38
--------------------------------------------------------------------
No. of Equations:         2.00000    BIC:                    10.3709
Nobs:                     341.000    HQIC:                   9.89768
Log likelihood:          -2531.84    FPE:                    14555.7
AIC:                      9.58429    Det(Omega_mle):         11972.0
--------------------------------------------------------------------
Results for equation 销售量差分
               coefficient       std. error           t-stat            prob
----------------------------------------------------------------------------
const            -1.898722         1.839754           -1.032           0.302
L1.销售量差分         -0.657278         0.055687          -11.803           0.000
L1.利润率差分         -0.086304         0.332972           -0.259           0.795

  self._init_dates(dates, freq)
