### 考虑 CRSP 价值加权指数月度收益率，时间从 1926-1 到 2008-12

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

raw_data = []
with open("../ftsdata/m-ibm3dx2608.txt", "r", encoding="utf-8") as file:
    for line in file.readlines():
        line = line.strip("\n").strip(" ").replace("\t", " ").split(" ")
        line = list(filter(lambda x: x != "", line))
        raw_data.append(line)
data = pd.DataFrame(raw_data[1:], columns=raw_data[0])

data["date"] = pd.to_datetime(data["date"], format="%Y%m%d")
data.set_index("date", inplace=True)
data = data.apply(pd.to_numeric)
data.head()

In [None]:
from statsmodels.tsa.ar_model import AutoReg


aic_list, bic_list = [], []
# 拟合AR模型
for i in range(1, 15):
    model = AutoReg(data["vwrtn"], lags=i)  # 假设使用1阶自回归模型，调整lags参数根据实际需要
    resm = model.fit()

    print(f"AIC = {resm.aic}, BIC = {resm.bic}")
    aic_list.append(resm.aic)
    bic_list.append(resm.bic)

best_aic = np.argmin(aic_list) + 1
best_bic = np.argmin(bic_list) + 1

print(f"best_aic = {best_aic}, best_bic = {best_bic}")

In [None]:
plt.figure(figsize=(8, 4))
plt.plot(data["vwrtn"], label='Value Weighted Return', color="green")
plt.title('CRSP Value Weighted Index Monthly Return')
plt.ylabel('Value Weighted')
plt.legend()
plt.show()

In [None]:
from statsmodels.graphics.tsaplots import plot_acf

plt.figure(figsize=(8, 4))
ax = plt.gca()  # 获取当前的轴对象
plot_acf(data["vwrtn"], ax=ax, lags=30)
ax.set_ylim(-0.25, 0.25)
ax.set_xlabel('Lag', fontsize=16)  # 设置横轴标签
plt.title('ACF of Value Weighted Return')
plt.show()

In [None]:
from statsmodels.graphics.tsaplots import plot_pacf

plt.figure(figsize=(8, 4))
ax = plt.gca()  # 获取当前的轴对象
plot_pacf(data["vwrtn"], ax=ax, lags=30)
ax.set_ylim(-0.25, 0.25)
ax.set_xlabel('Lag', fontsize=16)  # 设置横轴标签
plt.title('PACF of Value Weighted Return')
plt.show()

### 例子：美国的国民生产总值 (GNP) 经过季节调整后的季度增长率

In [None]:
import pandas as pd
import numpy as np

# 读取数据文件
da = pd.read_csv("../ftsdata/q-gnp4710.txt", sep='\s+', dtype=float)
data = pd.DataFrame(da["VALUE"].values, index=pd.date_range(start="1947-01", periods=len(da), freq='QE'), columns=["value"])
data.head()

In [None]:
from statsmodels.graphics.tsaplots import plot_acf

plt.figure(figsize=(8, 4))
ax = plt.gca()  # 获取当前的轴对象
plot_acf(np.diff(np.log(data["value"])), ax=ax, lags=30)
ax.set_ylim(-0.25, 1)
ax.set_xlabel('Lag', fontsize=16)  # 设置横轴标签
plt.title('')
plt.show()

In [None]:
from statsmodels.graphics.tsaplots import plot_pacf

plt.figure(figsize=(8, 4))
ax = plt.gca()  # 获取当前的轴对象
plot_pacf(np.diff(np.log(data["value"])), ax=ax, lags=30)
ax.set_ylim(-0.25, 1)
ax.set_xlabel('Lag', fontsize=16)  # 设置横轴标签
plt.title('')
plt.show()