In [3]:
import pandas as pd
import statsmodels.api as sm

# 假设以下是 ETF 和 SPY 的 K 线数据（示例数据）
data_etf = {
    'Date': ['2023-01-01', '2023-01-02', '2023-01-03'],
    'Open': [100, 101, 102],
    'High': [101, 102, 103],
    'Low': [99, 100, 101],
    'Close': [100, 101, 102]
}

data_spy = {
    'Date': ['2023-01-01', '2023-01-02', '2023-01-03'],
    'Open': [400, 402, 404],
    'High': [402, 404, 406],
    'Low': [398, 400, 402],
    'Close': [400, 402, 404]
}

# 将数据读入 DataFrame
df_etf = pd.DataFrame(data_etf)
df_spy = pd.DataFrame(data_spy)

# 确保日期对齐，并按日期排序
df_etf['Date'] = pd.to_datetime(df_etf['Date'])
df_spy['Date'] = pd.to_datetime(df_spy['Date'])
df_etf = df_etf.sort_values(by='Date')
df_spy = df_spy.sort_values(by='Date')
print(type(df_etf))
print(df_etf.head())
# 合并两个数据集，基于 Date
df = pd.merge(df_etf[['Date', 'Close']], df_spy[['Date', 'Close']], on='Date', suffixes=('_ETF', '_SPY'))

# 计算每日收益率（简单收益率）
df['ETF_Return'] = df['Close_ETF'].pct_change()
df['SPY_Return'] = df['Close_SPY'].pct_change()

# 去除空值
df = df.dropna()

# 执行线性回归计算 Alpha 和 Beta
X = sm.add_constant(df['SPY_Return'])  # 添加常数项
y = df['ETF_Return']

model = sm.OLS(y, X).fit()  # 线性回归
alpha = model.params['const']  # Alpha 值
beta = model.params['SPY_Return']  # Beta 值

# 输出结果
print("Alpha:", alpha)
print("Beta:", beta)


<class 'pandas.core.frame.DataFrame'>
        Date  Open  High  Low  Close
0 2023-01-01   100   101   99    100
1 2023-01-02   101   102  100    101
2 2023-01-03   102   103  101    102
Alpha: -0.009900990099164337
Beta: 3.9801980198329967
