# A/H 溢价季度策略回测分析

本 Notebook 参考 `ah_premium_quarterly_bt.py`，分步实现 A/H 溢价季度策略回测，便于交互分析和结果展示。

In [None]:
# 导入必要库
import duckdb
from qs.backtester.data import Bar, DataFeed
from qs.backtester.broker import Broker
from qs.backtester.engine import BacktestEngine
from qs.backtester.stats import (
    compute_annual_returns,
    compute_max_drawdown,
    compute_risk_metrics,
)
from qs.strategy.ah_premium_quarterly import AHPremiumQuarterlyStrategy
import matplotlib.pyplot as plt

In [None]:
# 全局字体设置（中文显示 & 负号正常 & 回退逻辑）
import matplotlib
from matplotlib import font_manager, rcParams
import os, warnings

# 可选：抑制 glyph 缺失警告（若仍未找到字体时可打开）
# warnings.filterwarnings('ignore', 'Glyph .* missing from font')

# 1. 首先尝试直接已注册字体
preferred_names = [
    "Microsoft YaHei",
    "SimHei",
    "Arial Unicode MS",
    "Noto Sans CJK SC",
    "Source Han Sans CN",
    "Source Han Sans SC",
]

available = {f.name for f in font_manager.fontManager.ttflist}
selected = None
for name in preferred_names:
    if name in available:
        selected = name
        break

# 2. 若未命中，尝试手动添加常见 Windows 字体路径
if selected is None:
    candidate_files = [
        r"C:\\Windows\\Fonts\\msyh.ttc",  # 微软雅黑
        r"C:\\Windows\\Fonts\\msyhl.ttc",  # 微软雅黑Light
        r"C:\\Windows\\Fonts\\simhei.ttf",  # 黑体
        r"C:\\Windows\\Fonts\\msyh.ttf",  # 旧格式
    ]
    for fp in candidate_files:
        if os.path.exists(fp):
            try:
                font_manager.fontManager.addfont(fp)
            except Exception:
                pass
    # 重新获取
    available = {f.name for f in font_manager.fontManager.ttflist}
    for name in preferred_names:
        if name in available:
            selected = name
            break

# 3. 再次 fallback：若依旧没有，尝试下载/放置 Noto Sans （此处提示，不自动下载）
if selected is None:
    print(
        "[字体提示] 未找到常见中文字体，可下载 Noto Sans CJK 放入 fonts/ 目录, 如: fonts/NotoSansCJKsc-Regular.otf"
    )
    local_noto = "fonts/NotoSansCJKsc-Regular.otf"
    if os.path.exists(local_noto):
        try:
            font_manager.fontManager.addfont(local_noto)
            available = {f.name for f in font_manager.fontManager.ttflist}
            for name in preferred_names:
                if name in available:
                    selected = name
                    break
        except Exception as e:
            print("[字体加载失败]", e)

# 4. 设置 rcParams
if selected:
    rcParams["font.sans-serif"] = [selected]
else:
    # 最后保留 DejaVu Sans 但提示
    print("[警告] 仍使用默认字体，中文可能乱码。")
rcParams["axes.unicode_minus"] = False
print("Using font family:", rcParams.get("font.sans-serif", []))

In [None]:
# 加载交易日历数据
start_date = "20180101"
db_path = "data/data.duckdb"
con = duckdb.connect(db_path, read_only=True)
q = f"""
SELECT trade_date,
       MIN(open) AS open,
       MIN(high) AS high,
       MIN(low) AS low,
       MIN(close) AS close,
       NULL AS pct_chg
FROM (
  SELECT trade_date, open, high, low, close FROM daily_a
  UNION ALL
  SELECT trade_date, open, high, low, close FROM daily_h
)
WHERE trade_date >= '{start_date}'
GROUP BY 1
ORDER BY 1
"""
rows = con.execute(q).fetchall()
con.close()
bars = [Bar(*r) for r in rows]
feed = DataFeed(bars)
print(f"Loaded {len(bars)} trading days from {start_date}")

In [None]:
# 设置回测参数
TOP_K = 5
BOTTOM_K = 5
START_DATE = "20180101"
INIT_CASH = 1_000_000
CAPITAL_SPLIT = 0.5  # H股分配比例

In [None]:
# 初始化回测组件
broker = Broker(cash=INIT_CASH, enable_trade_log=False)
strat = AHPremiumQuarterlyStrategy(
    top_k=TOP_K,
    bottom_k=BOTTOM_K,
    start_date=START_DATE,
    capital_split=CAPITAL_SPLIT,
    premium_use_adjusted=False,  # 溢价采用原始价，避免前复权跨标的失真
    use_adjusted=True,  # 头寸估值/成交仍用复权价
)
engine = BacktestEngine(feed, broker, strat)

In [None]:
# 运行回测引擎
curve = engine.run()
print(f"回测完成，曲线长度: {len(curve)}")

In [None]:
# 计算年度收益率
ann = compute_annual_returns(curve)
print("年度收益率:")
for y, r in ann.items():
    print(f"  {y}: {r:.2%}")

In [None]:
# 计算最大回撤
max_dd, dd_peak, dd_trough = compute_max_drawdown(curve)
print(f"最大回撤: {max_dd:.2%} from {dd_peak} to {dd_trough}")

In [None]:
# 计算风险指标
risk = compute_risk_metrics(curve, INIT_CASH)
print("风险指标:")
for k, v in risk.items():
    if k.endswith("Rate") or k in ("CAGR", "AnnReturn", "AnnVol", "Sharpe"):
        print(f"  {k}: {v:.4f}")
    else:
        print(f"  {k}: {v}")

## 凯利公式资金使用比例估计

下面单元基于回测得到的每日策略权益序列，计算离散 Kelly、连续近似 Kelly 以及数值最大化估计，并给出分数 Kelly 建议（1/2、1/4）。

In [None]:
# 凯利公式计算
import numpy as np, pandas as pd, math

curve_df_k = pd.DataFrame(
    {
        "date": [pd.to_datetime(b.trade_date) for b in curve],
        "equity": [b.equity for b in curve],
    }
).sort_values("date")
curve_df_k["ret"] = curve_df_k["equity"].pct_change()
rets = curve_df_k["ret"].dropna()

if len(rets) < 20:
    print("样本过少，无法可靠估算 Kelly。")
else:
    wins = rets[rets > 0]
    losses = rets[rets < 0]
    p = len(wins) / len(rets) if len(rets) else np.nan
    avg_win = wins.mean() if len(wins) else np.nan
    avg_loss = -losses.mean() if len(losses) else np.nan
    R = (avg_win / avg_loss) if (avg_loss and not math.isclose(avg_loss, 0)) else np.nan
    kelly_discrete = (p - (1 - p) / R) if (R and R > 0) else np.nan
    mu = rets.mean()
    sigma2 = rets.var(ddof=0)
    kelly_cont = (mu / sigma2) if sigma2 > 0 else np.nan

    def growth_func(f: float) -> float:
        if f < 0:
            return -1e9
        if not (avg_win and avg_loss) or (1 - f * avg_loss) <= 0:
            return -1e9
        return p * math.log(1 + f * avg_win) + (1 - p) * math.log(1 - f * avg_loss)

    if not np.isnan(kelly_discrete):
        grid_upper = min(3 * kelly_discrete, 1.5) if kelly_discrete > 0 else 1.0
    else:
        grid_upper = 1.0
    grid = np.linspace(0, max(0.01, grid_upper), 400)
    growth_vals = [growth_func(f) for f in grid]
    f_opt_num = grid[int(np.argmax(growth_vals))] if growth_vals else np.nan

    suggestions = {
        "Kelly(离散公式)": kelly_discrete,
        "Kelly(连续近似)": kelly_cont,
        "数值最大化估计": f_opt_num,
        "Half Kelly": kelly_discrete / 2 if not np.isnan(kelly_discrete) else np.nan,
        "Quarter Kelly": kelly_discrete / 4 if not np.isnan(kelly_discrete) else np.nan,
    }

    print("--- 凯利参数估计 ---")
    print(
        f"样本数: {len(rets)} 胜率 p = {p:.4f}" if not np.isnan(p) else "胜率不可计算"
    )
    print(
        f"平均上涨幅度 avg_win = {avg_win:.5f}"
        if not np.isnan(avg_win)
        else "avg_win NA"
    )
    print(
        f"平均下跌幅度 avg_loss = {avg_loss:.5f}"
        if not np.isnan(avg_loss)
        else "avg_loss NA"
    )
    print(f"盈亏比 R = {R:.4f}" if not np.isnan(R) else "R NA")
    print(f"日收益均值 mu = {mu:.5f}, 方差 sigma^2 = {sigma2:.6f}")

    print("\n--- 凯利结果 (建议投入占可分配资金比例) ---")
    for k, v in suggestions.items():
        if v is None or np.isnan(v):
            print(f"  {k}: NA")
        else:
            print(f"  {k}: {v:.4%}")

    print(
        "\n说明: 1) 实盘常用分数凯利降低回撤; 2) 连续近似假设小幅独立收益; 3) 数值最大化基于二点分布近似; 4) 结果代表整体杠杆系数而非单笔内部权重。"
    )

### 基于再平衡周期收益的凯利估计

下面改用策略再平衡点之间的区间收益 (仅在每次再平衡时截取一次权益)，形成低频独立性更高的收益序列，重新计算 Kelly 比例，并与日频结果对比。

In [None]:
# 再平衡周期 Kelly 计算
import numpy as np, pandas as pd, math

# 1) 取再平衡日期列表 (含首尾)
if hasattr(strat, "get_rebalance_history"):
    rh = strat.get_rebalance_history()
else:
    rh = []

# 构建日期 -> equity 映射
curve_map = {b.trade_date: b.equity for b in curve}
rebalance_dates = sorted({r["rebalance_date"] for r in rh})
# 若第一个再平衡不是起始，加入起始；加入最后一个交易日
if curve:
    start_day = curve[0].trade_date
    end_day = curve[-1].trade_date
    if not rebalance_dates or rebalance_dates[0] != start_day:
        rebalance_dates = [start_day] + rebalance_dates
    if end_day not in rebalance_dates:
        rebalance_dates.append(end_day)

# 2) 生成区间收益 (从日期 i 到 i+1 期间的总收益率)
period_rets = []
for i in range(len(rebalance_dates) - 1):
    d0, d1 = rebalance_dates[i], rebalance_dates[i + 1]
    if d0 in curve_map and d1 in curve_map:
        r = curve_map[d1] / curve_map[d0] - 1
        period_rets.append({"start": d0, "end": d1, "ret": r})
period_df = pd.DataFrame(period_rets)

if period_df.empty or len(period_df) < 5:
    print("再平衡周期样本不足，无法计算 Kelly。")
else:
    rets_p = period_df["ret"]
    wins = rets_p[rets_p > 0]
    losses = rets_p[rets_p < 0]
    p = len(wins) / len(rets_p) if len(rets_p) else np.nan
    avg_win = wins.mean() if len(wins) else np.nan
    avg_loss = -losses.mean() if len(losses) else np.nan
    R = (avg_win / avg_loss) if (avg_loss and not math.isclose(avg_loss, 0)) else np.nan
    kelly_discrete = (p - (1 - p) / R) if (R and R > 0) else np.nan
    mu = rets_p.mean()
    sigma2 = rets_p.var(ddof=0)
    kelly_cont = (mu / sigma2) if sigma2 > 0 else np.nan

    def growth_func(f: float) -> float:
        if f < 0:
            return -1e9
        if not (avg_win and avg_loss) or (1 - f * avg_loss) <= 0:
            return -1e9
        return p * math.log(1 + f * avg_win) + (1 - p) * math.log(1 - f * avg_loss)

    if not np.isnan(kelly_discrete):
        grid_upper = min(3 * kelly_discrete, 3.0) if kelly_discrete > 0 else 1.0
    else:
        grid_upper = 1.0
    grid = np.linspace(0, max(0.05, grid_upper), 400)
    growth_vals = [growth_func(f) for f in grid]
    f_opt_num = grid[int(np.argmax(growth_vals))] if growth_vals else np.nan

    suggestions = {
        "Period Kelly(离散)": kelly_discrete,
        "Period Kelly(连续)": kelly_cont,
        "Period 数值最大化": f_opt_num,
        "Half (离散)": kelly_discrete / 2 if not np.isnan(kelly_discrete) else np.nan,
        "Quarter (离散)": (
            kelly_discrete / 4 if not np.isnan(kelly_discrete) else np.nan
        ),
    }

    print("--- 再平衡周期收益统计 ---")
    print(period_df.head())
    print(f"样本数={len(rets_p)} 胜率={p:.4f}" if not np.isnan(p) else "胜率 NA")
    print(f"avg_win={avg_win:.5f}" if not np.isnan(avg_win) else "avg_win NA")
    print(f"avg_loss={avg_loss:.5f}" if not np.isnan(avg_loss) else "avg_loss NA")
    print(f"R={R:.4f}" if not np.isnan(R) else "R NA")
    print(f"mu={mu:.5f} sigma2={sigma2:.6f}")

    print("\n--- 再平衡周期 Kelly 建议 ---")
    for k, v in suggestions.items():
        if v is None or np.isnan(v):
            print(f"  {k}: NA")
        else:
            print(f"  {k}: {v:.4%}")

    # 对比日频 Kelly（若上面之前已算并仍在变量中）
    try:
        print("\n(日频离散 Kelly vs 周期离散 Kelly 对比)")
        print("  日频离散 Kelly:", f"{kelly_discrete:.4%}")  # 变量名覆盖注意
    except Exception:
        pass

## 策略 vs 三大指数归一化收益曲线对比

下方代码将查询 `index_daily` (000300.SH) 与 `index_global` (HSI, IXIC) 的收盘价，与策略净值一起按起始日期归一化后绘制对比曲线。

In [None]:
# 三大指数与策略净值归一化对比绘图
import pandas as pd

# 1. 读取指数数据
con = duckdb.connect("data/data.duckdb", read_only=True)
idx_sh300 = con.execute(
    "SELECT trade_date, close FROM index_daily WHERE ts_code='000300.SH' AND trade_date >= ? ORDER BY trade_date",
    [START_DATE],
).fetchdf()
idx_hsi = con.execute(
    "SELECT trade_date, close FROM index_global WHERE ts_code='HSI' AND trade_date >= ? ORDER BY trade_date",
    [START_DATE],
).fetchdf()
idx_ixic = con.execute(
    "SELECT trade_date, close FROM index_global WHERE ts_code='IXIC' AND trade_date >= ? ORDER BY trade_date",
    [START_DATE],
).fetchdf()
con.close()

# 2. 转换日期 & 归一化
for df in (idx_sh300, idx_hsi, idx_ixic):
    df["date"] = pd.to_datetime(df["trade_date"])
    base = df["close"].iloc[0] if not df.empty else 1.0
    df["norm"] = df["close"] / base

# 策略曲线 DataFrame
curve_df = pd.DataFrame(
    {
        "date": [pd.to_datetime(b.trade_date) for b in curve],
        "equity": [b.equity for b in curve],
    }
)
curve_df.sort_values("date", inplace=True)
curve_df["norm"] = curve_df["equity"] / curve_df["equity"].iloc[0]

# 3. 对齐日期（内连接可选；此处保持各自跨度，绘图自动处理）

# 4. 绘图
fig, ax = plt.subplots(figsize=(12, 6))
ax.plot(
    curve_df["date"], curve_df["norm"], label="策略净值", linewidth=1.6, color="black"
)
ax.plot(idx_sh300["date"], idx_sh300["norm"], label="沪深300 指数", linewidth=1.0)
ax.plot(idx_hsi["date"], idx_hsi["norm"], label="恒生指数", linewidth=1.0)
ax.plot(idx_ixic["date"], idx_ixic["norm"], label="纳斯达克指数", linewidth=1.0)

ax.set_title(f"A/H 溢价季度策略 vs 三大指数（自 {START_DATE} 起）")
ax.set_xlabel("日期")
ax.set_ylabel("归一化收益 (起点=1)")
ax.set_ylim(bottom=0)
ax.legend(loc="upper left", fontsize=9)
ax.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

In [None]:
# 交易历史总费用汇总
from collections import defaultdict

print(f"总费用(佣金+卖出税): {broker.total_fees:.2f}")
print(f"成交笔数: {len(broker.trades)}")
if broker.trades:
    # 按年份汇总费用
    yearly = defaultdict(float)
    for tr in broker.trades:
        yearly[tr.trade_date[:4]] += tr.fees
    print("年度费用:")
    for y in sorted(yearly):
        print(f"  {y}: {yearly[y]:.2f}")
    # 前几条示例
    print("前5条交易记录(日期, 动作, 标的, 手数, 费用):")
    for tr in broker.trades[:5]:
        print(
            f"  {tr.trade_date} {tr.action} {tr.symbol} size={int(tr.size)} fees={tr.fees:.2f}"
        )

In [15]:
# 基于数据库最新交易日生成当前策略建议持仓 (不运行整段历史回测)
import duckdb, pandas as pd, math

# 1. 确定最新可用交易日 (A/H 同时有数据)，以及其前一交易日作为 premium 观测日
raw_db = strat.dbr  # 使用已实例化策略的原始库路径
con = duckdb.connect(raw_db, read_only=True)
latest_a = con.execute("SELECT max(trade_date) FROM daily_a").fetchone()[0]
latest_h = con.execute("SELECT max(trade_date) FROM daily_h").fetchone()[0]
latest_trade_date = min(latest_a, latest_h)
# 找前一交易日（在 A/H 任一表中存在）
prev_row = con.execute(
    f"""
    SELECT trade_date FROM (
      SELECT trade_date FROM daily_a WHERE trade_date < '{latest_trade_date}'
      UNION
      SELECT trade_date FROM daily_h WHERE trade_date < '{latest_trade_date}'
    ) ORDER BY trade_date DESC LIMIT 1
    """
).fetchone()
prev_trade_date = prev_row[0] if prev_row else None
con.close()

if prev_trade_date is None:
    print("无法找到上一交易日，终止。")
else:
    # 2. 读取上一交易日 premium 快照并生成当期建议权重
    recs = strat._load_premium_for_date(prev_trade_date)  # 直接调用内部函数 (分析用途)
    if not recs:
        print(f"上一交易日 {prev_trade_date} 未能获得溢价记录或缺 FX。")
    else:
        sorted_recs = sorted(recs, key=lambda r: r.premium_pct)
        bottom = sorted_recs[:BOTTOM_K]
        top = sorted_recs[-TOP_K:]
        if not bottom or not top:
            print("样本不足以选出双边组合。")
        else:
            w_each_a = (1 - CAPITAL_SPLIT) / len(bottom)
            w_each_h = CAPITAL_SPLIT / len(top)
            rows = []
            for r in bottom:
                rows.append(
                    {
                        "symbol": r.cn_code,
                        "leg": "A",
                        "pair_name": r.name,
                        "premium_pct": r.premium_pct,
                        "target_weight": w_each_a,
                        "a_close_raw": r.a_close_raw,
                        "h_close_raw_cny": r.h_close_raw_cny,
                        "a_close_adj": r.a_close_adj,
                        "h_close_adj_cny": r.h_close_adj_cny,
                    }
                )
            for r in top:
                rows.append(
                    {
                        "symbol": r.hk_code,
                        "leg": "H",
                        "pair_name": r.name,
                        "premium_pct": r.premium_pct,
                        "target_weight": w_each_h,
                        "a_close_raw": r.a_close_raw,
                        "h_close_raw_cny": r.h_close_raw_cny,
                        "a_close_adj": r.a_close_adj,
                        "h_close_adj_cny": r.h_close_adj_cny,
                    }
                )
            latest_positions_df = pd.DataFrame(rows)
            latest_positions_df.sort_values(
                "target_weight", ascending=False, inplace=True
            )
            print(
                f"最新交易日: {latest_trade_date} | premium 观测日: {prev_trade_date}"
            )
            print(
                f"A腿 {len(bottom)} 只, H腿 {len(top)} 只; 每只 A 权重 ~{w_each_a:.4f}, 每只 H 权重 ~{w_each_h:.4f}"
            )
            display_cols = [
                "symbol",
                "leg",
                "pair_name",
                "premium_pct",
                "target_weight",
                "a_close_raw",
                "h_close_raw_cny",
            ]
            try:
                from IPython.display import display

                display(latest_positions_df[display_cols])
            except Exception:
                print(latest_positions_df[display_cols].to_string(index=False))
            # 简要汇总
            print("\n权重合计: ", latest_positions_df["target_weight"].sum())
            print("前3条记录预览:")
            print(latest_positions_df.head(3).to_string(index=False))
            # 可选: 保存
            latest_positions_df.to_csv(
                "latest_strategy_positions.csv", index=False, encoding="utf-8-sig"
            )
            print("已保存为 latest_strategy_positions.csv")

[AHPremiumQuarterlyStrategy] _load_premium_for_date(20250807): 0.103s, 149 records (premium_use_adjusted=False)
最新交易日: 20250808 | premium 观测日: 20250807
A腿 5 只, H腿 5 只; 每只 A 权重 ~0.1000, 每只 H 权重 ~0.1000


Unnamed: 0,symbol,leg,pair_name,premium_pct,target_weight,a_close_raw,h_close_raw_cny
0,300750.SZ,A,宁德时代,-28.467134,0.1,263.05,367.733063
1,600036.SH,A,招商银行,-3.315828,0.1,45.15,46.69844
2,601899.SH,A,紫金矿业,-1.370486,0.1,20.3,20.582074
3,000333.SZ,A,美的集团,-0.584209,0.1,71.48,71.900047
4,603259.SH,A,药明康德,-0.390272,0.1,91.21,91.567362
5,01057.HK,H,浙江世宝,164.732439,0.1,12.52,4.729303
6,01033.HK,H,中石化油服,176.029028,0.1,2.02,0.731807
7,01349.HK,H,复旦张江,176.86943,0.1,10.46,3.777954
8,02218.HK,H,安德利果汁,184.598874,0.1,49.1,17.252352
9,03678.HK,H,弘业期货,203.995078,0.1,12.18,4.006644



权重合计:  1.0
前3条记录预览:
   symbol leg pair_name  premium_pct  target_weight  a_close_raw  h_close_raw_cny  a_close_adj  h_close_adj_cny
300750.SZ   A      宁德时代   -28.467134            0.1       263.05       367.733063       263.05       367.733063
600036.SH   A      招商银行    -3.315828            0.1        45.15        46.698440        45.15        46.698440
601899.SH   A      紫金矿业    -1.370486            0.1        20.30        20.582074        20.30        20.582074
已保存为 latest_strategy_positions.csv
