In [18]:
# 美股量化交易策略实现 - 完整版
# 使用真实美股数据，实现5种经典量化策略

# --- 环境设置 ---
import pandas as pd
import numpy as np
import yfinance as yf
from datetime import datetime
import warnings
import matplotlib.pyplot as plt
from matplotlib import rcParams
warnings.filterwarnings('ignore')
rcParams['font.sans-serif'] = ['Misans']
# --- 配置参数 ---
TICKERS = ['AAPL', 'MSFT', 'GOOGL', 'AMZN', 'NVDA', 'TSLA', 'META', 'NFLX']
START_DATE = '2023-01-01'
END_DATE = '2025-08-30'

# --- 数据获取和预处理 ---
print("正在下载数据...")
raw_data = yf.download(TICKERS, start=START_DATE, end=END_DATE, progress=False)

# 提取价格和成交量数据
prices = raw_data['Close'].copy()  # 收盘价
volumes = raw_data['Volume'].copy()  # 成交量
highs = raw_data['High'].copy()  # 最高价
lows = raw_data['Low'].copy()  # 最低价
# 计算收益率
returns = prices.pct_change().dropna()

print("数据准备完成！")
print(f"数据范围: {prices.index[0].strftime('%Y-%m-%d')} 至 {prices.index[-1].strftime('%Y-%m-%d')}")
print(f"股票池: {', '.join(TICKERS)}")
print(f"交易天数: {len(prices)}")

正在下载数据...
数据准备完成！
数据范围: 2023-01-03 至 2025-08-29
股票池: AAPL, MSFT, GOOGL, AMZN, NVDA, TSLA, META, NFLX
交易天数: 667


In [19]:


# ====================
# 策略1: 动量策略 (Momentum Strategy)
# ====================
print("\n" + "=" * 60)
print("策略1: 动量策略 - 追涨杀跌")
print("=" * 60)

# 动量计算：20日滚动平均收益
lookback = 20
momentum = returns.rolling(window=lookback).mean()


# 构建投资组合
def build_momentum_portfolio(row):
    """根据动量排序，做多前3只，做空后3只"""
    if row.isna().sum() > len(row) / 2:
        return pd.Series(0, index=row.index)

    sorted_stocks = row.sort_values(ascending=False)
    portfolio = pd.Series(0, index=row.index)

    # 做多动量最强的3只
    top3 = sorted_stocks.head(3).index
    portfolio[top3] = 1 / 3

    # 做空动量最弱的3只
    bottom3 = sorted_stocks.tail(3).index
    portfolio[bottom3] = -1 / 3

    return portfolio


# 生成持仓权重
momentum_weights = momentum.apply(build_momentum_portfolio, axis=1)

# 计算策略收益
momentum_returns = (momentum_weights.shift(1) * returns).sum(axis=1)
momentum_cum = (1 + momentum_returns).cumprod()

# 计算策略指标
annual_return = momentum_returns.mean() * 252
sharpe_ratio = momentum_returns.mean() / momentum_returns.std() * np.sqrt(252)
max_dd = (momentum_cum / momentum_cum.cummax() - 1).min()

print(f"年化收益率: {annual_return:.2%}")
print(f"夏普比率: {sharpe_ratio:.2f}")
print(f"最大回撤: {max_dd:.2%}")
print(f"累计收益: {(momentum_cum.iloc[-1] - 1):.2%}")


策略1: 动量策略 - 追涨杀跌
年化收益率: 5.89%
夏普比率: 0.23
最大回撤: -33.23%
累计收益: 7.01%


In [20]:
# ====================
# 策略2: 均值回归策略 (Mean Reversion with Bollinger Bands)
# ====================
print("\n" + "=" * 60)
print("策略2: 均值回归策略 - 布林带交易")
print("=" * 60)

# 计算布林带
bb_period = 20
bb_std = 2

# 对每只股票计算布林带
ma = prices.rolling(window=bb_period).mean()
std = prices.rolling(window=bb_period).std()
upper_band = ma + bb_std * std
lower_band = ma - bb_std * std

# 生成交易信号
mr_signals = pd.DataFrame(0, index=prices.index, columns=prices.columns)

for ticker in TICKERS:
    # 突破下轨买入
    mr_signals.loc[prices[ticker] < lower_band[ticker], ticker] = 1
    # 突破上轨卖出  
    mr_signals.loc[prices[ticker] > upper_band[ticker], ticker] = -1

# 计算策略收益（等权重分配到有信号的股票）
mr_positions = mr_signals.div(mr_signals.abs().sum(axis=1), axis=0).fillna(0)
mr_returns = (mr_positions.shift(1) * returns).sum(axis=1)
mr_cum = (1 + mr_returns).cumprod()

# 策略指标
mr_annual = mr_returns.mean() * 252
mr_sharpe = mr_returns.mean() / mr_returns.std() * np.sqrt(252)
mr_maxdd = (mr_cum / mr_cum.cummax() - 1).min()

print(f"年化收益率: {mr_annual:.2%}")
print(f"夏普比率: {mr_sharpe:.2f}")
print(f"最大回撤: {mr_maxdd:.2%}")
print(f"累计收益: {(mr_cum.iloc[-1] - 1):.2%}")


策略2: 均值回归策略 - 布林带交易
年化收益率: 0.62%
夏普比率: 0.02
最大回撤: -30.84%
累计收益: -8.98%


In [21]:
# ====================
# 策略3: 事件驱动策略 (Volume Spike Event Strategy)
# ====================
print("\n" + "=" * 60)
print("策略3: 事件驱动策略 - 成交量异常")
print("=" * 60)

# 计算成交量异常
vol_lookback = 20
avg_volume = volumes.rolling(window=vol_lookback).mean()
volume_ratio = volumes / avg_volume

# 成交量暴增信号（超过2倍平均成交量）
volume_spike_threshold = 2.0
event_signals = (volume_ratio > volume_spike_threshold).astype(int)

# 持有期策略：事件发生后持有5天
holding_days = 5
event_positions = pd.DataFrame(0, index=prices.index, columns=prices.columns)

for col in event_signals.columns:
    for i in range(len(event_signals) - holding_days):
        if event_signals[col].iloc[i] == 1:
            # 在接下来的5天持有
            event_positions[col].iloc[i + 1:i + 1 + holding_days] = 1

# 等权重分配
event_weights = event_positions.div(event_positions.sum(axis=1), axis=0).fillna(0)
event_returns = (event_weights * returns).sum(axis=1)
event_cum = (1 + event_returns).cumprod()

# 策略指标
event_annual = event_returns.mean() * 252
event_sharpe = event_returns.mean() / event_returns.std() * np.sqrt(252) if event_returns.std() > 0 else 0
event_maxdd = (event_cum / event_cum.cummax() - 1).min()

print(f"年化收益率: {event_annual:.2%}")
print(f"夏普比率: {event_sharpe:.2f}")
print(f"最大回撤: {event_maxdd:.2%}")
print(f"累计收益: {(event_cum.iloc[-1] - 1):.2%}")


策略3: 事件驱动策略 - 成交量异常
年化收益率: 39.01%
夏普比率: 1.32
最大回撤: -17.50%
累计收益: 150.85%


In [22]:
# event_sharpe

In [23]:
# ====================
# 策略4: 多因子选股策略 (Multi-Factor Strategy)
# ====================
print("\n" + "=" * 60)
print("策略4: 多因子选股策略")
print("=" * 60)

# 因子1: 动量因子 (20日收益)
factor_momentum = returns.rolling(window=20).mean()

# 因子2: 低波动因子 (负的20日波动率)
factor_volatility = -returns.rolling(window=20).std()

# 因子3: 价值因子 (使用价格相对位置)
price_position = (prices - lows.rolling(20).min()) / (highs.rolling(20).max() - lows.rolling(20).min())
factor_value = -price_position  # 价格越低得分越高


# 标准化因子
def z_score(df):
    """横截面标准化"""
    return df.sub(df.mean(axis=1), axis=0).div(df.std(axis=1), axis=0)


# 计算综合因子得分
factor_momentum_z = z_score(factor_momentum.dropna())
factor_volatility_z = z_score(factor_volatility.dropna())
factor_value_z = z_score(factor_value.dropna())

# 对齐索引
common_idx = factor_momentum_z.index.intersection(factor_volatility_z.index).intersection(factor_value_z.index)
composite_score = (factor_momentum_z.loc[common_idx] +
                   factor_volatility_z.loc[common_idx] +
                   factor_value_z.loc[common_idx]) / 3


# 选股：每期选择得分最高的3只
def select_top_stocks(row):
    """选择因子得分最高的股票"""
    if row.isna().sum() > len(row) / 2:
        return pd.Series(0, index=row.index)

    portfolio = pd.Series(0, index=row.index)
    top_stocks = row.nlargest(3).index
    portfolio[top_stocks] = 1 / 3
    return portfolio


factor_weights = composite_score.apply(select_top_stocks, axis=1)

# 计算策略收益
returns_aligned = returns.loc[factor_weights.index]
factor_returns = (factor_weights.shift(1) * returns_aligned).sum(axis=1)
factor_cum = (1 + factor_returns).cumprod()

# 策略指标
factor_annual = factor_returns.mean() * 252
factor_sharpe = factor_returns.mean() / factor_returns.std() * np.sqrt(252)
factor_maxdd = (factor_cum / factor_cum.cummax() - 1).min()

print(f"年化收益率: {factor_annual:.2%}")
print(f"夏普比率: {factor_sharpe:.2f}")
print(f"最大回撤: {factor_maxdd:.2%}")
print(f"累计收益: {(factor_cum.iloc[-1] - 1):.2%}")


策略4: 多因子选股策略
年化收益率: 44.91%
夏普比率: 1.78
最大回撤: -22.71%
累计收益: 191.92%


In [24]:
# 假设 prices, returns, highs, lows, TICKERS, common_idx 已经从您的原始 Notebook 中加载

import pandas as pd
import numpy as np
import yfinance as yf
from datetime import datetime

# --- 新增函数：获取基本面数据 ---
def get_fundamental_factors(tickers):
    """
    获取股票的基本面数据（PE 和 ROE）。
    注意：此数据是静态的（最新报告值），在实际回测中应在每个调仓日获取并使用
    前一期的数据，但此处为简化，仅获取当前最新数据。
    """
    fundamental_data = {}
    for t in tickers:
        try:
            stock_info = yf.Ticker(t).info
            # 因子：价值 (E/P)
            # PE 越低越好，因此使用其倒数 E/P（盈利/价格）
            pe_ratio = stock_info.get("trailingPE")
            e_p = 1 / pe_ratio if pe_ratio and pe_ratio > 0 else 0

            # 因子：质量 (ROE)
            # ROE 越高越好
            roe = stock_info.get("returnOnEquity") if stock_info.get("returnOnEquity") else 0

            fundamental_data[t] = {'E_P': e_p, 'ROE': roe}
        except Exception:
            fundamental_data[t] = {'E_P': 0, 'ROE': 0}
            print(f"无法获取 {t} 的基本面数据")

    # 将数据转换为 DataFrame
    df = pd.DataFrame(fundamental_data).T
    return df

# --- 获取基本面数据 (只需运行一次) ---
print("正在获取基本面数据...")
current_fundamentals = get_fundamental_factors(TICKERS)
print("基本面数据获取完成！")
print(current_fundamentals)

# ====================
# 策略4: 增强型多因子选股策略
# ====================

# 因子1: 动量因子 (20日收益)
factor_momentum = returns.rolling(window=20).mean()

# 因子2: 低波动因子 (负的20日波动率)
factor_volatility = -returns.rolling(window=20).std()

# 因子3: 价值因子 (E/P) - 随时间变化，需要更复杂的建模。
# 简化处理：在回测期内，假设所有股票的价值排名保持不变（仅用于演示）。
# 实际应在每个调仓日重新计算因子，此处仅演示因子构成。
# 步骤：将静态基本面数据扩展为与价格数据相同的形状，以便对齐。

# 创建一个与 daily returns 相同索引的 DataFrame，填充基本面数据
def create_daily_fundamental_series(fundamental_score, index):
    """将静态的基本面分数扩展为每日的时间序列"""
    factor_series = pd.DataFrame(index=index, columns=fundamental_score.index)
    for ticker in fundamental_score.index:
        factor_series[ticker] = fundamental_score.loc[ticker] # 填充静态分数
    return factor_series

# 因子3: 价值因子 (E/P) - 分数越高，价值越高
factor_value = create_daily_fundamental_series(current_fundamentals['E_P'], returns.index).fillna(0)

# 因子4: 质量因子 (ROE) - 分数越高，质量越好
factor_quality = create_daily_fundamental_series(current_fundamentals['ROE'], returns.index).fillna(0)


# 标准化因子
def z_score(df):
    """横截面标准化"""
    # 避免除以 0
    std_safe = df.std(axis=1).replace(0, 1) 
    return df.sub(df.mean(axis=1), axis=0).div(std_safe, axis=0)

# 计算综合因子得分
factor_momentum_z = z_score(factor_momentum.dropna())
factor_volatility_z = z_score(factor_volatility.dropna())
factor_value_z = z_score(factor_value.dropna())
factor_quality_z = z_score(factor_quality.dropna())


# 对齐索引
all_factors = [factor_momentum_z, factor_volatility_z, factor_value_z, factor_quality_z]
common_idx = factor_momentum_z.index
for factor in all_factors:
    common_idx = common_idx.intersection(factor.index)

# 计算 4 因子等权重综合得分
composite_score = (factor_momentum_z.loc[common_idx] +
                   factor_volatility_z.loc[common_idx] +
                   factor_value_z.loc[common_idx] +
                   factor_quality_z.loc[common_idx]) / 4


# 选股：每期选择得分最高的3只 (保持与原策略一致)
def select_top_stocks(row):
    """选择因子得分最高的股票"""
    if row.isna().sum() > len(row) / 2:
        return pd.Series(0, index=row.index)

    portfolio = pd.Series(0, index=row.index)
    top_stocks = row.nlargest(3).index
    portfolio[top_stocks] = 1 / 3
    return portfolio


factor_weights = composite_score.apply(select_top_stocks, axis=1)

# 计算策略收益
returns_aligned = returns.loc[factor_weights.index]
factor_returns = (factor_weights.shift(1) * returns_aligned).sum(axis=1)
factor_cum = (1 + factor_returns).cumprod()

# 策略指标
factor_annual = factor_returns.mean() * 252
factor_sharpe = factor_returns.mean() / factor_returns.std() * np.sqrt(252)
factor_maxdd = (factor_cum / factor_cum.cummax() - 1).min()

print("\n" + "=" * 60)
print("增强型 4 因子选股策略 (动量/低波动/价值/质量)")
print("=" * 60)
print(f"年化收益率: {factor_annual:.2%}")
print(f"夏普比率: {factor_sharpe:.2f}")
print(f"最大回撤: {factor_maxdd:.2%}")
print(f"累计收益: {(factor_cum.iloc[-1] - 1):.2%}")

正在获取基本面数据...
基本面数据获取完成！
            E_P      ROE
AAPL   0.025797  1.49814
MSFT   0.026708  0.33281
GOOGL  0.038006  0.34829
AMZN   0.029803  0.24770
NVDA   0.019698  1.09417
TSLA   0.003792  0.08177
META   0.037096  0.40648
NFLX   0.019403  0.43548

增强型 4 因子选股策略 (动量/低波动/价值/质量)
年化收益率: 48.39%
夏普比率: 1.84
最大回撤: -24.99%
累计收益: 216.94%


In [25]:
# ====================
# 策略5: 技术指标组合策略 (RSI + MACD)
# ====================
print("\n" + "=" * 60)
print("策略5: 技术指标组合策略 (RSI + MACD)")
print("=" * 60)


# 计算RSI
def calculate_rsi(prices, period=14):
    """计算RSI指标"""
    delta = prices.diff()
    gain = delta.where(delta > 0, 0).rolling(window=period).mean()
    loss = -delta.where(delta < 0, 0).rolling(window=period).mean()
    rs = gain / loss
    rsi = 100 - (100 / (1 + rs))
    return rsi


# 计算MACD
def calculate_macd(prices, fast=12, slow=26, signal=9):
    """计算MACD指标"""
    ema_fast = prices.ewm(span=fast, adjust=False).mean()
    ema_slow = prices.ewm(span=slow, adjust=False).mean()
    macd_line = ema_fast - ema_slow
    signal_line = macd_line.ewm(span=signal, adjust=False).mean()
    histogram = macd_line - signal_line
    return histogram


# 对每只股票计算技术指标
rsi = pd.DataFrame(index=prices.index, columns=prices.columns)
macd = pd.DataFrame(index=prices.index, columns=prices.columns)

for ticker in TICKERS:
    rsi[ticker] = calculate_rsi(prices[ticker])
    macd[ticker] = calculate_macd(prices[ticker])

# 生成交易信号
tech_signals = pd.DataFrame(0, index=prices.index, columns=prices.columns)

for ticker in TICKERS:
    # 买入信号: RSI超卖(<30) 且 MACD转正
    buy_signal = (rsi[ticker] < 30) & (macd[ticker] > 0)
    tech_signals.loc[buy_signal, ticker] = 1

    # 卖出信号: RSI超买(>70) 且 MACD转负
    sell_signal = (rsi[ticker] > 70) & (macd[ticker] < 0)
    tech_signals.loc[sell_signal, ticker] = -1

# 等权重分配到有信号的股票
tech_positions = tech_signals.div(tech_signals.abs().sum(axis=1), axis=0).fillna(0)
tech_returns = (tech_positions.shift(1) * returns).sum(axis=1)
tech_cum = (1 + tech_returns).cumprod()

# 策略指标
tech_annual = tech_returns.mean() * 252
tech_sharpe = tech_returns.mean() / tech_returns.std() * np.sqrt(252) if tech_returns.std() > 0 else 0
tech_maxdd = (tech_cum / tech_cum.cummax() - 1).min()

print(f"年化收益率: {tech_annual:.2%}")
print(f"夏普比率: {tech_sharpe:.2f}")
print(f"最大回撤: {tech_maxdd:.2%}")
print(f"累计收益: {(tech_cum.iloc[-1] - 1):.2%}")


策略5: 技术指标组合策略 (RSI + MACD)
年化收益率: -5.18%
夏普比率: -0.51
最大回撤: -23.52%
累计收益: -14.00%


In [26]:
# ====================
# 策略对比汇总
# ====================
print("\n" + "=" * 60)
print("策略表现对比汇总")
print("=" * 60)

# 构建对比表
summary_data = {
    '策略名称': ['动量策略', '均值回归', '事件驱动', '多因子', '技术指标'],
    '年化收益': [f"{annual_return:.2%}", f"{mr_annual:.2%}", f"{event_annual:.2%}",
                 f"{factor_annual:.2%}", f"{tech_annual:.2%}"],
    '夏普比率': [f"{sharpe_ratio:.2f}", f"{mr_sharpe:.2f}", f"{event_sharpe:.2f}",
                 f"{factor_sharpe:.2f}", f"{tech_sharpe:.2f}"],
    '最大回撤': [f"{max_dd:.2%}", f"{mr_maxdd:.2%}", f"{event_maxdd:.2%}",
                 f"{factor_maxdd:.2%}", f"{tech_maxdd:.2%}"],
    '累计收益': [f"{(momentum_cum.iloc[-1] - 1):.2%}", f"{(mr_cum.iloc[-1] - 1):.2%}",
                 f"{(event_cum.iloc[-1] - 1):.2%}", f"{(factor_cum.iloc[-1] - 1):.2%}",
                 f"{(tech_cum.iloc[-1] - 1):.2%}"]
}

summary_df = pd.DataFrame(summary_data)
summary_df


策略表现对比汇总


Unnamed: 0,策略名称,年化收益,夏普比率,最大回撤,累计收益
0,动量策略,5.89%,0.23,-33.23%,7.01%
1,均值回归,0.62%,0.02,-30.84%,-8.98%
2,事件驱动,39.01%,1.32,-17.50%,150.85%
3,多因子,48.39%,1.84,-24.99%,216.94%
4,技术指标,-5.18%,-0.51,-23.52%,-14.00%


In [27]:
## 6.基本面因子

def strategy_fundamental_factors(tickers):
    """基本面因子选股：PE、PB、ROE、负债率"""
    data = {}
    for t in tickers:
        try:
            stock = yf.Ticker(t)
            info = stock.info
            pe = info.get("trailingPE")
            pb = info.get("priceToBook")
            roe = info.get("returnOnEquity")
            debt = info.get("debtToEquity")
            data[t] = [pe, pb, roe, debt]
        except Exception:
            continue

    df = pd.DataFrame(data, index=["PE", "PB", "ROE", "DebtEq"]).T

    # 打分规则：低PE、低PB、低DebtEq 好，高ROE好
    df["score"] = -df["PE"].rank() - df["PB"].rank() - df["DebtEq"].rank() + df["ROE"].rank()

    return df.sort_values("score", ascending=False)


def strategy_risk_premia(prices, returns, volumes):
    """构建风险溢价因子（Value, Momentum, Size, Low Vol, Quality）"""
    last_price = prices.iloc[-1]
    market_cap = last_price * volumes.iloc[-1]  # 简化版市值：价格 * 成交量近似
    momentum = (prices / prices.shift(252)).iloc[-1] - 1  # 过去12个月收益
    vol = returns.rolling(252).std().iloc[-1]  # 年化波动率
    quality = returns.mean() / returns.std()  # 简单Sharpe替代

    factors = pd.DataFrame({
        "Price": last_price,
        "Momentum": momentum,
        "Size": market_cap,
        "Volatility": vol,
        "Quality": quality
    })

    # 打分：Value（低价），Momentum（高），Size（小），LowVol（低），Quality（高）
    factors["score"] = -factors["Price"].rank() + factors["Momentum"].rank() - factors["Size"].rank() - factors[
        "Volatility"].rank() + factors["Quality"].rank()

    return factors.sort_values("score", ascending=False)


def strategy_sector_rotation(start="2023-01-01", end="2025-08-30"):
    """行业轮动：买过去3个月表现最好的行业ETF"""
    etfs = {
        "Tech": "XLK",
        "Consumer": "XLY",
        "Energy": "XLE",
        "Utilities": "XLU"
    }
    data = yf.download(list(etfs.values()), start=start, end=end, progress=False)["Close"]

    momentum = (data / data.shift(63)).iloc[-1] - 1  # 3个月收益
    best_sector = momentum.idxmax()

    print("行业ETF动量：")
    print(momentum.sort_values(ascending=False))
    print(f"推荐持仓行业：{best_sector}")

    return momentum


print("\n=== 基本面因子选股 ===")
fundamental_ranking = strategy_fundamental_factors(TICKERS)
print(fundamental_ranking)

print("\n=== 风险溢价因子组合 ===")
risk_factors = strategy_risk_premia(prices, returns, volumes)
print(risk_factors)

print("\n=== 行业轮动 ===")
sector_momentum = strategy_sector_rotation(START_DATE, END_DATE)



=== 基本面因子选股 ===
               PE         PB      ROE   DebtEq  score
GOOGL   26.311632   8.222659  0.34829   11.481   -1.0
META    26.957232   9.592814  0.40648   25.406   -4.0
NVDA    50.766384  43.323610  1.09417   10.584   -7.0
AMZN    33.554195   7.019258  0.24770   47.808   -8.0
MSFT    37.442165  11.069605  0.33281   32.661  -10.0
AAPL    38.764793  57.652897  1.49814  154.486  -13.0
NFLX    51.537250  20.616304  0.43548   67.866  -14.0
TSLA   263.712600  18.364538  0.08177   16.823  -15.0

=== 风险溢价因子组合 ===
              Price  Momentum          Size  Volatility   Quality  score
Ticker                                                                  
NFLX    1208.250000  0.736690  3.878241e+09    0.020826  0.109459    1.0
AMZN     229.000000  0.322782  5.999617e+09    0.021267  0.083135   -2.0
GOOGL    212.719742  0.299134  8.451015e+09    0.019559  0.079011   -2.0
META     738.201782  0.427913  6.695859e+09    0.023051  0.120967   -3.0
NVDA     174.170166  0.358028  4.236827e+

In [29]:
!jupyter nbconvert --to html finance.ipynb

[NbConvertApp] Converting notebook finance.ipynb to html
[NbConvertApp] Writing 380345 bytes to finance.html
