# 环境设置

In [None]:
'''环境设置'''

import tushare as ts
import pandas as pd
import numpy as np
import datetime
import os
import sys
import time
# 初始化tushare pro接口
ts.set_token("Your token here")
pro = ts.pro_api()

# 设置输出列太多时不要换行
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.width', 5000)

In [2]:
'''获取交易日历'''

# 设定研究时间范围：过去500个交易日（约两年），截止到2025-10-23
end_date = "20251023" # datetime.datetime.today().strftime("%Y%m%d")
start_date = (datetime.datetime.strptime(end_date, "%Y%m%d") - datetime.timedelta(days=760)).strftime("%Y%m%d")
print(f"研究时间范围：{start_date} 至 {end_date}")

# 获取交易日历，上交所和深交所，
cal_sh = pro.trade_cal(exchange='SSE', start_date=start_date, end_date=end_date)
cal_sz = pro.trade_cal(exchange='SZSE', start_date=start_date, end_date=end_date)
# 两个都只保留is_open=1的交易日，所有列都保留
cal_sh = cal_sh[cal_sh['is_open'] == 1].reset_index(drop=True)
cal_sz = cal_sz[cal_sz['is_open'] == 1].reset_index(drop=True)
# 检查一下两个表格的cal_date是否一致
if not cal_sh['cal_date'].equals(cal_sz['cal_date']):
    print("警告：上交所和深交所的交易日历不一致！")
    sys.exit(1)
# 使用上交所的交易日历作为最终的交易日历
cal = cal_sh.copy()
# print(cal) # 输出交易日历
# 把日期设为index，然后按顺序排列，删掉exchange列
cal = cal.set_index('cal_date').sort_index().drop(columns=['exchange'])
# 把交易日历保存到本地，保存为data/trade_cal.csv，如果data目录不存在则创建
if not os.path.exists('data'):
    os.makedirs('data')
cal.to_csv('data/trade_cal.csv')
print(f"交易日历已保存到 data/trade_cal.csv ，共 {len(cal)} 个交易日。")

研究时间范围：20230924 至 20251023
交易日历已保存到 data/trade_cal.csv ，共 500 个交易日。


In [3]:
# 获取banchmark指数日线数据，沪深300指数 000300.SH和中证2000指数 000906.SH
benchmarks = ['000300.SH', '000906.SH']
for benchmark in benchmarks:
    df_benchmark = pro.index_daily(ts_code=benchmark, start_date=start_date, end_date=end_date)
    df_benchmark = df_benchmark.sort_values(by='trade_date').reset_index(drop=True)
    # 把日期设为index，然后按顺序排列
    df_benchmark = df_benchmark.set_index('trade_date').sort_index()
    # 把指数日线数据保存到本地，保存为data/benchmark/{benchmark}_daily.csv
    if not os.path.exists('data/benchmark'):
        os.makedirs('data/benchmark')
    df_benchmark.to_csv(f'data/benchmark/{benchmark}_daily.csv')
    print(f"指数 {benchmark} 日线数据已保存到 data/benchmark/{benchmark}_daily.csv ，共 {len(df_benchmark)} 条记录。")

指数 000300.SH 日线数据已保存到 data/benchmark/000300.SH_daily.csv ，共 500 条记录。
指数 000906.SH 日线数据已保存到 data/benchmark/000906.SH_daily.csv ，共 500 条记录。


# 一、数据抓取

## 1. 个股数据 - 招商银行(600036.SH)、比亚迪(002594.SZ)、贵州茅台(600519.SH)

In [4]:
# 1.1 【d】股票复权行情数据
print("=" * 80)
print("1.1 股票复权行情数据 (ts.pro_bar)")
print("=" * 80)

stocks = ['600036.SH', '002594.SZ', '600519.SH']
stock_names = ['招商银行', '比亚迪', '贵州茅台']

for stock, name in zip(stocks, stock_names):
    print(f"\n{name} ({stock}):")
    df = ts.pro_bar(ts_code=stock, adj='hfq', start_date=start_date, end_date=end_date)
    # 重命名列：保留trade_date和ts_code，其他列加前缀indiv_d_
    cols_to_rename = {col: f'indiv_d_{col}' for col in df.columns}
    df.rename(columns=cols_to_rename, inplace=True)
    print(df.head())
    print(f"总共 {len(df)} 条记录")
    # 保存到本地data目录，文件名为data/stock/raw/{stock}_indiv_d.csv
    if not os.path.exists('data/stock/raw'):
        os.makedirs('data/stock/raw')
    df.to_csv(f'data/stock/raw/{stock}_indiv_d.csv', index=False)
    print(f"数据已保存到 data/stock/raw/{stock}_indiv_d.csv")

1.1 股票复权行情数据 (ts.pro_bar)

招商银行 (600036.SH):


  data['adj_factor'] = data['adj_factor'].fillna(method='bfill')


  indiv_d_ts_code indiv_d_trade_date  indiv_d_open  indiv_d_high  indiv_d_low  indiv_d_close  indiv_d_pre_close  indiv_d_change  indiv_d_pct_chg  indiv_d_vol  indiv_d_amount
0       600036.SH           20251023        265.13        268.54       264.05         266.96             265.13            1.83             0.69    713173.73     3004975.799
1       600036.SH           20251022        265.32        266.71       263.74         265.13             265.32           -0.19            -0.07    551333.64     2311324.892
2       600036.SH           20251021        262.22        266.08       262.22         265.32             261.78            3.54             1.35    759931.24     3184239.228
3       600036.SH           20251020        262.98        264.12       259.76         261.78             262.85           -1.07            -0.41    649477.29     2694843.445
4       600036.SH           20251017        264.69        266.83       261.53         262.85             265.00           -2.15   

  data['adj_factor'] = data['adj_factor'].fillna(method='bfill')


  indiv_d_ts_code indiv_d_trade_date  indiv_d_open  indiv_d_high  indiv_d_low  indiv_d_close  indiv_d_pre_close  indiv_d_change  indiv_d_pct_chg  indiv_d_vol  indiv_d_amount
0       002594.SZ           20251023        325.38        326.74       321.29         326.61             326.90           -0.29            -0.09    378603.42     3889661.811
1       002594.SZ           20251022        327.90        328.60       325.61         326.90             329.98           -3.08            -0.93    357874.19     3715302.136
2       002594.SZ           20251021        329.76        330.96       328.35         329.98             329.23            0.75             0.23    299074.19     3133789.475
3       002594.SZ           20251020        331.43        332.72       327.84         329.23             328.94            0.29             0.09    355614.52     3721699.722
4       002594.SZ           20251017        335.78        335.87       328.41         328.94             336.91           -7.97   

  data['adj_factor'] = data['adj_factor'].fillna(method='bfill')


In [5]:
# 1.2 【din】每日指标
print("=" * 80)
print("1.2 股票每日指标 (pro.daily_basic)")
print("=" * 80)

for stock, name in zip(stocks, stock_names):
    print(f"\n{name} ({stock}):")
    df = pro.daily_basic(ts_code=stock, start_date=start_date, end_date=end_date)
    # 重命名列：保留trade_date和ts_code，其他列加前缀indiv_din_
    cols_to_rename = {col: f'indiv_din_{col}' for col in df.columns}
    df.rename(columns=cols_to_rename, inplace=True)
    print(df.head())
    print(f"总共 {len(df)} 条记录")
    # 保存到本地data目录，文件名为data/stock/raw/{stock}_indiv_din.csv
    if not os.path.exists('data/stock/raw'):
        os.makedirs('data/stock/raw')
    df.to_csv(f'data/stock/raw/{stock}_indiv_din.csv', index=False)
    print(f"数据已保存到 data/stock/raw/{stock}_indiv_din.csv")

1.2 股票每日指标 (pro.daily_basic)

招商银行 (600036.SH):
  indiv_din_ts_code indiv_din_trade_date  indiv_din_close  indiv_din_turnover_rate  indiv_din_turnover_rate_f  indiv_din_volume_ratio  indiv_din_pe  indiv_din_pe_ttm  indiv_din_pb  indiv_din_ps  indiv_din_ps_ttm  indiv_din_dv_ratio  indiv_din_dv_ttm  indiv_din_total_share  indiv_din_float_share  indiv_din_free_share  indiv_din_total_mv  indiv_din_circ_mv
0         600036.SH             20251023            42.24                   0.3457                     0.5832                    0.93        7.1789            7.1699        1.0533        3.1565            3.1846              4.6685            4.7349           2.521985e+06           2.062894e+06           1222783.368        1.065286e+08       8.713666e+07
1         600036.SH             20251022            41.95                   0.2673                     0.4509                    0.63        7.1296            7.1207        1.0461        3.1348            3.1627              4.7008       

In [6]:
# 1.3 【fin】财务指标
print("=" * 80)
print("1.3 股票财务指标 (pro.fina_indicator)")
print("=" * 80)

for stock, name in zip(stocks, stock_names):
    print(f"\n{name} ({stock}):")
    # 这里财务数据抓取start_date要往前多加180天，以确保能获取到足够的历史财务数据
    fin_start_date = (datetime.datetime.strptime(start_date, "%Y%m%d") - datetime.timedelta(days=180)).strftime("%Y%m%d")
    
    df = pro.fina_indicator(ts_code=stock, start_date=fin_start_date, end_date=end_date)
    # 重命名列：保留ts_code，其他列加前缀indiv_fin_
    cols_to_rename = {col: f'indiv_fin_{col}' for col in df.columns}
    df.rename(columns=cols_to_rename, inplace=True)
    print(df)
    print(f"总共 {len(df)} 条记录")
    # 保存到本地data目录，文件名为data/stock/raw/{stock}_indiv_fin.csv
    if not os.path.exists('data/stock/raw'):
        os.makedirs('data/stock/raw')
    df.to_csv(f'data/stock/raw/{stock}_indiv_fin.csv', index=False)
    print(f"数据已保存到 data/stock/raw/{stock}_indiv_fin.csv")

1.3 股票财务指标 (pro.fina_indicator)

招商银行 (600036.SH):
   indiv_fin_ts_code indiv_fin_ann_date indiv_fin_end_date  indiv_fin_eps  indiv_fin_dt_eps  indiv_fin_total_revenue_ps  indiv_fin_revenue_ps  indiv_fin_capital_rese_ps  indiv_fin_surplus_rese_ps  indiv_fin_undist_profit_ps  indiv_fin_extra_item  indiv_fin_profit_dedt indiv_fin_gross_margin indiv_fin_current_ratio indiv_fin_quick_ratio indiv_fin_cash_ratio indiv_fin_ar_turn indiv_fin_ca_turn indiv_fin_fa_turn  indiv_fin_assets_turn  indiv_fin_op_income indiv_fin_ebit indiv_fin_ebitda indiv_fin_fcff indiv_fin_fcfe indiv_fin_current_exint indiv_fin_noncurrent_exint indiv_fin_interestdebt indiv_fin_netdebt indiv_fin_tangible_asset indiv_fin_working_capital indiv_fin_networking_capital indiv_fin_invest_capital  indiv_fin_retained_earnings  indiv_fin_diluted2_eps  indiv_fin_bps  indiv_fin_ocfps  indiv_fin_retainedps  indiv_fin_cfps indiv_fin_ebit_ps indiv_fin_fcff_ps indiv_fin_fcfe_ps  indiv_fin_netprofit_margin indiv_fin_grossprofit_margin

In [7]:
# 1.4 【sh】股东人数
print("=" * 80)
print("1.4 股东人数 (pro.stk_holdernumber)")
print("=" * 80)

for stock, name in zip(stocks, stock_names):
    print(f"\n{name} ({stock}):")
    # 获取股东人数数据,这里start_date同样往前多加180天
    df = pro.stk_holdernumber(ts_code=stock, start_date=fin_start_date, end_date=end_date)
    # 重命名列：保留ts_code，其他列加前缀indiv_sh_
    cols_to_rename = {col: f'indiv_sh_{col}' for col in df.columns}
    df.rename(columns=cols_to_rename, inplace=True)
    print(df.head())
    print(f"总共 {len(df)} 条记录")
    # 保存到本地data目录，文件名为data/stock/raw/{stock}_indiv_sh.csv
    if not os.path.exists('data/stock/raw'):
        os.makedirs('data/stock/raw')
    df.to_csv(f'data/stock/raw/{stock}_indiv_sh.csv', index=False)
    print(f"数据已保存到 data/stock/raw/{stock}_indiv_sh.csv")

1.4 股东人数 (pro.stk_holdernumber)

招商银行 (600036.SH):
  indiv_sh_ts_code indiv_sh_ann_date indiv_sh_end_date  indiv_sh_holder_num
0        600036.SH          20250830          20250630               410379
1        600036.SH          20250430          20250331               434959
2        600036.SH          20250326          20250228               421011
3        600036.SH          20250326          20241231               459175
4        600036.SH          20241030          20240930               522103
总共 12 条记录
数据已保存到 data/stock/raw/600036.SH_indiv_sh.csv

比亚迪 (002594.SZ):
  indiv_sh_ts_code indiv_sh_ann_date indiv_sh_end_date  indiv_sh_holder_num
0        002594.SZ          20250830          20250630               323939
1        002594.SZ          20250426          20250331               203708
2        002594.SZ          20250325          20250228               199293
3        002594.SZ          20250325          20241231               271334
4        002594.SZ          20241031    

In [8]:
# 1.5 【chip】每日筹码及胜率
print("=" * 80)
print("1.5 每日筹码及胜率 (pro.cyq_perf)")
print("=" * 80)

for stock, name in zip(stocks, stock_names):
    print(f"\n{name} ({stock}):")
    df = pro.cyq_perf(ts_code=stock, start_date=start_date, end_date=end_date)
    # 重命名列：保留trade_date和ts_code，其他列加前缀indiv_chip_
    cols_to_rename = {col: f'indiv_chip_{col}' for col in df.columns}
    df.rename(columns=cols_to_rename, inplace=True)
    print(df.head())
    print(f"总共 {len(df)} 条记录")
    # 保存到本地data目录，文件名为data/stock/raw/{stock}_indiv_chip.csv
    if not os.path.exists('data/stock/raw'):
        os.makedirs('data/stock/raw')
    df.to_csv(f'data/stock/raw/{stock}_indiv_chip.csv', index=False)
    print(f"数据已保存到 data/stock/raw/{stock}_indiv_chip.csv")

1.5 每日筹码及胜率 (pro.cyq_perf)

招商银行 (600036.SH):
  indiv_chip_ts_code indiv_chip_trade_date  indiv_chip_his_low  indiv_chip_his_high  indiv_chip_cost_5pct  indiv_chip_cost_15pct  indiv_chip_cost_50pct  indiv_chip_cost_85pct  indiv_chip_cost_95pct  indiv_chip_weight_avg  indiv_chip_winner_rate
0          600036.SH              20251023                 1.2                 47.2                  26.8                   32.0                   41.2                   44.0                   45.2                  39.18                   63.75
1          600036.SH              20251022                 1.2                 47.2                  26.8                   32.0                   41.2                   44.0                   45.2                  39.18                   59.32
2          600036.SH              20251021                 1.2                 47.2                  26.8                   32.0                   41.2                   44.0                   45.2                  39.1

In [9]:
# 1.6 【techin】技术面因子
print("=" * 80)
print("1.6 技术面因子 (pro.stk_factor)")
print("=" * 80)

for stock, name in zip(stocks, stock_names):
    print(f"\n{name} ({stock}):")
    df = pro.stk_factor(ts_code=stock, start_date=start_date, end_date=end_date)
    # 重命名列：保留trade_date和ts_code，其他列加前缀indiv_techin_
    cols_to_rename = {col: f'indiv_techin_{col}' for col in df.columns}
    df.rename(columns=cols_to_rename, inplace=True)
    print(df.head())
    print(f"总共 {len(df)} 条记录")
    # 保存到本地data目录，文件名为data/stock/raw/{stock}_indiv_techin.csv
    if not os.path.exists('data/stock/raw'):
        os.makedirs('data/stock/raw')
    df.to_csv(f'data/stock/raw/{stock}_indiv_techin.csv', index=False)
    print(f"数据已保存到 data/stock/raw/{stock}_indiv_techin.csv")

1.6 技术面因子 (pro.stk_factor)

招商银行 (600036.SH):
  indiv_techin_ts_code indiv_techin_trade_date  indiv_techin_close  indiv_techin_open  indiv_techin_high  indiv_techin_low  indiv_techin_pre_close  indiv_techin_change  indiv_techin_pct_change  indiv_techin_vol  indiv_techin_amount  indiv_techin_adj_factor  indiv_techin_open_hfq  indiv_techin_open_qfq  indiv_techin_close_hfq  indiv_techin_close_qfq  indiv_techin_high_hfq  indiv_techin_high_qfq  indiv_techin_low_hfq  indiv_techin_low_qfq  indiv_techin_pre_close_hfq  indiv_techin_pre_close_qfq  indiv_techin_macd_dif  indiv_techin_macd_dea  indiv_techin_macd  indiv_techin_kdj_k  indiv_techin_kdj_d  indiv_techin_kdj_j  indiv_techin_rsi_6  indiv_techin_rsi_12  indiv_techin_rsi_24  indiv_techin_boll_upper  indiv_techin_boll_mid  indiv_techin_boll_lower  indiv_techin_cci
0            600036.SH                20251023               42.24              41.95              42.49             41.78                   41.95                 0.29            

In [10]:
# 1.7 【margin】融资融券
print("=" * 80)
print("1.7 融资融券 (pro.margin_detail)")
print("=" * 80)

for stock, name in zip(stocks, stock_names):
    print(f"\n{name} ({stock}):")
    df = pro.margin_detail(ts_code=stock, start_date=start_date, end_date=end_date)
    # 重命名列：保留trade_date和ts_code，其他列加前缀indiv_margin_
    cols_to_rename = {col: f'indiv_margin_{col}' for col in df.columns}
    df.rename(columns=cols_to_rename, inplace=True)
    print(df.head())
    print(f"总共 {len(df)} 条记录")
    # 保存到本地data目录，文件名为data/stock/raw/{stock}_indiv_margin.csv
    if not os.path.exists('data/stock/raw'):
        os.makedirs('data/stock/raw')
    df.to_csv(f'data/stock/raw/{stock}_indiv_margin.csv', index=False)
    print(f"数据已保存到 data/stock/raw/{stock}_indiv_margin.csv")  

1.7 融资融券 (pro.margin_detail)

招商银行 (600036.SH):
  indiv_margin_trade_date indiv_margin_ts_code  indiv_margin_rzye  indiv_margin_rqye  indiv_margin_rzmre  indiv_margin_rqyl  indiv_margin_rzche  indiv_margin_rqchl  indiv_margin_rqmcl  indiv_margin_rzrqye
0                20251023            600036.SH       9.416259e+09         39285312.0         202344940.0           930050.0         217303999.0             85000.0            116800.0         9.455544e+09
1                20251022            600036.SH       9.431218e+09         37681587.5         204312257.0           898250.0         152299137.0            108600.0            114800.0         9.468899e+09
2                20251021            600036.SH       9.379204e+09         37448259.0         151076691.0           892050.0         245486804.0             49300.0            203700.0         9.416653e+09
3                20251020            600036.SH       9.473615e+09         30553463.0         222582830.0           737650.0         

## 2. ETF数据 - 510300华泰柏瑞沪深300ETF、510500南方中证500ETF

In [11]:
# 2.1 【d】ETF日线行情
print("=" * 80)
print("2.1 ETF日线行情 (pro.fund_daily)")
print("=" * 80)

etfs = ['510300.SH', '510500.SH']
etf_names = ['华泰柏瑞沪深300ETF', '南方中证500ETF']

for etf, name in zip(etfs, etf_names):
    print(f"\n{name} ({etf}):")
    df = pro.fund_daily(ts_code=etf, start_date=start_date, end_date=end_date)
    # 重命名列：保留trade_date和ts_code，其他列加前缀indiv_d_
    cols_to_rename = {col: f'indiv_d_{col}' for col in df.columns}
    df.rename(columns=cols_to_rename, inplace=True)
    print(df.head())
    print(f"总共 {len(df)} 条记录")
    # 保存到本地data目录，文件名为data/etf/raw/{etf}_indiv_d.csv
    if not os.path.exists('data/etf/raw'):
        os.makedirs('data/etf/raw')
    df.to_csv(f'data/etf/raw/{etf}_indiv_d.csv', index=False)
    print(f"数据已保存到 data/etf/raw/{etf}_indiv_d.csv")

2.1 ETF日线行情 (pro.fund_daily)

华泰柏瑞沪深300ETF (510300.SH):
  indiv_d_ts_code indiv_d_trade_date  indiv_d_pre_close  indiv_d_open  indiv_d_high  indiv_d_low  indiv_d_close  indiv_d_change  indiv_d_pct_chg  indiv_d_vol  indiv_d_amount
0       510300.SH           20251023              4.695         4.687         4.719        4.642          4.713           0.018           0.3834   6993428.41     3267824.361
1       510300.SH           20251022              4.710         4.690         4.708        4.668          4.695          -0.015          -0.3185   5544841.98     2601506.365
2       510300.SH           20251021              4.640         4.656         4.723        4.652          4.710           0.070           1.5086   9598685.19     4511371.971
3       510300.SH           20251020              4.624         4.651         4.678        4.624          4.640           0.016           0.3460   7644178.05     3554256.321
4       510300.SH           20251017              4.721         4.715     

In [12]:
# 2.2 【adj_d】基金复权因子
print("=" * 80)
print("2.2 基金复权因子 (pro.fund_adj)")
print("=" * 80)

for etf, name in zip(etfs, etf_names):
    print(f"\n{name} ({etf}):")
    df = pro.fund_adj(ts_code=etf, start_date=start_date, end_date=end_date)
    # 重命名列：保留trade_date和ts_code，其他列加前缀indiv_adj_d_
    cols_to_rename = {col: f'indiv_adj_d_{col}' for col in df.columns}
    df.rename(columns=cols_to_rename, inplace=True)
    print(df.head())
    print(f"总共 {len(df)} 条记录")
    # 保存到本地data目录，文件名为data/etf/raw/{etf}_indiv_adj_d.csv
    if not os.path.exists('data/etf/raw'):
        os.makedirs('data/etf/raw')
    df.to_csv(f'data/etf/raw/{etf}_indiv_adj_d.csv', index=False)
    print(f"数据已保存到 data/etf/raw/{etf}_indiv_adj_d.csv")

2.2 基金复权因子 (pro.fund_adj)

华泰柏瑞沪深300ETF (510300.SH):
  indiv_adj_d_ts_code indiv_adj_d_trade_date  indiv_adj_d_adj_factor
0           510300.SH               20251023                   1.235
1           510300.SH               20251022                   1.235
2           510300.SH               20251021                   1.235
3           510300.SH               20251020                   1.235
4           510300.SH               20251017                   1.235
总共 500 条记录
数据已保存到 data/etf/raw/510300.SH_indiv_adj_d.csv

南方中证500ETF (510500.SH):
  indiv_adj_d_ts_code indiv_adj_d_trade_date  indiv_adj_d_adj_factor
0           510500.SH               20251023                   0.332
1           510500.SH               20251022                   0.332
2           510500.SH               20251021                   0.332
3           510500.SH               20251020                   0.332
4           510500.SH               20251017                   0.332
总共 500 条记录
数据已保存到 data/etf/raw/510500

In [13]:
# 2.3 【ind】指数日线行情
print("=" * 80)
print("2.3 指数日线行情 (pro.index_daily)")
print("=" * 80)

indices = ['000300.SH', '000905.SH']
index_names = ['沪深300', '中证500']

for index, name in zip(indices, index_names):
    print(f"\n{name} ({index}):")
    df = pro.index_daily(ts_code=index, start_date=start_date, end_date=end_date)
    # 重命名列：保留trade_date和ts_code，其他列加前缀indiv_ind_
    cols_to_rename = {col: f'indiv_ind_{col}' for col in df.columns}
    df.rename(columns=cols_to_rename, inplace=True)
    print(df.head())
    print(f"总共 {len(df)} 条记录")
    # 保存到本地data目录，文件名为data/etf/raw/{etf}_indiv_ind.csv，其中'000300.SH'对应510300.SH，'000905.SH'对应510500.SH
    etf_map = {'000300.SH': '510300.SH', '000905.SH': '510500.SH'}
    etf_code = etf_map[index]
    if not os.path.exists('data/etf/raw'):
        os.makedirs('data/etf/raw')
    df.to_csv(f'data/etf/raw/{etf_code}_indiv_ind.csv', index=False)
    print(f"数据已保存到 data/etf/raw/{etf_code}_indiv_ind.csv")

2.3 指数日线行情 (pro.index_daily)

沪深300 (000300.SH):
  indiv_ind_ts_code indiv_ind_trade_date  indiv_ind_close  indiv_ind_open  indiv_ind_high  indiv_ind_low  indiv_ind_pre_close  indiv_ind_change  indiv_ind_pct_chg  indiv_ind_vol  indiv_ind_amount
0         000300.SH             20251023        4606.3447       4578.7018       4611.3409      4541.6944            4592.5700           13.7747             0.2999    200463917.0      4.208002e+08
1         000300.SH             20251022        4592.5700       4576.0277       4603.4884      4567.2361            4607.8715          -15.3015            -0.3321    185333960.0      4.409353e+08
2         000300.SH             20251021        4607.8715       4556.1224       4615.4630      4547.9283            4538.2201           69.6514             1.5348    215711175.0      5.513903e+08
3         000300.SH             20251020        4538.2201       4558.2421       4570.4376      4520.5024            4514.2345           23.9856             0.5313    2

In [14]:
# 2.4 【din】指数每日指标
print("=" * 80)
print("2.4 指数每日指标 (pro.index_dailybasic)")
print("=" * 80)

for index, name in zip(indices, index_names):
    print(f"\n{name} ({index}):")
    df = pro.index_dailybasic(ts_code=index, start_date=start_date, end_date=end_date)
    # 重命名列:保留trade_date和ts_code,其他列加前缀indiv_din_
    cols_to_rename = {col: f'indiv_din_{col}' for col in df.columns}
    df.rename(columns=cols_to_rename, inplace=True)
    print(df.head())
    print(f"总共 {len(df)} 条记录")
    # ✅ 修复:文件名应该是 indiv_din.csv
    etf_map = {'000300.SH': '510300.SH', '000905.SH': '510500.SH'}
    etf_code = etf_map[index]
    if not os.path.exists('data/etf/raw'):
        os.makedirs('data/etf/raw')
    df.to_csv(f'data/etf/raw/{etf_code}_indiv_din.csv', index=False)  # ✅ 改为 indiv_din.csv
    print(f"数据已保存到 data/etf/raw/{etf_code}_indiv_din.csv")

2.4 指数每日指标 (pro.index_dailybasic)

沪深300 (000300.SH):
  indiv_din_ts_code indiv_din_trade_date  indiv_din_total_mv  indiv_din_float_mv  indiv_din_total_share  indiv_din_float_share  indiv_din_free_share  indiv_din_turnover_rate  indiv_din_turnover_rate_f  indiv_din_pe  indiv_din_pe_ttm  indiv_din_pb
0         000300.SH             20251023        6.671456e+13        5.118369e+13           4.324165e+12           3.273451e+12          1.107107e+12                     0.61                       1.81         14.68             14.46          1.50
1         000300.SH             20251022        6.643323e+13        5.100118e+13           4.324139e+12           3.273425e+12          1.107081e+12                     0.57                       1.67         14.61             14.40          1.50
2         000300.SH             20251021        6.637206e+13        5.100656e+13           4.324140e+12           3.273426e+12          1.107080e+12                     0.66                       1.95     

In [15]:
# 2.5 【techin】指数技术因子
print("=" * 80)
print("2.5 指数技术因子 (pro.idx_factor_pro)")
print("=" * 80)

for index, name in zip(indices, index_names):
    print(f"\n{name} ({index}):")
    df = pro.idx_factor_pro(ts_code=index, start_date=start_date, end_date=end_date)
    # 重命名列：保留trade_date和ts_code，其他列加前缀indiv_techin_
    cols_to_rename = {col: f'indiv_techin_{col}' for col in df.columns}
    df.rename(columns=cols_to_rename, inplace=True)
    print(df.head())
    print(f"总共 {len(df)} 条记录")
    # 保存到本地data目录，文件名为data/etf/raw/{etf}_indiv_techin.csv，其中'000300.SH'对应510300.SH，'000905.SH'对应510500.SH
    etf_map = {'000300.SH': '510300.SH', '000905.SH': '510500.SH'}
    etf_code = etf_map[index]
    if not os.path.exists('data/etf/raw'):
        os.makedirs('data/etf/raw')
    df.to_csv(f'data/etf/raw/{etf_code}_indiv_techin.csv', index=False)
    print(f"数据已保存到 data/etf/raw/{etf_code}_indiv_techin.csv")

2.5 指数技术因子 (pro.idx_factor_pro)

沪深300 (000300.SH):
  indiv_techin_ts_code indiv_techin_trade_date  indiv_techin_open  indiv_techin_high  indiv_techin_low  indiv_techin_close  indiv_techin_pre_close  indiv_techin_change  indiv_techin_pct_change  indiv_techin_vol  indiv_techin_amount  indiv_techin_asi_bfq  indiv_techin_asit_bfq  indiv_techin_atr_bfq  indiv_techin_bbi_bfq  indiv_techin_bias1_bfq  indiv_techin_bias2_bfq  indiv_techin_bias3_bfq  indiv_techin_boll_lower_bfq  indiv_techin_boll_mid_bfq  indiv_techin_boll_upper_bfq  indiv_techin_brar_ar_bfq  indiv_techin_brar_br_bfq  indiv_techin_cci_bfq  indiv_techin_cr_bfq  indiv_techin_dfma_dif_bfq  indiv_techin_dfma_difma_bfq  indiv_techin_dmi_adx_bfq  indiv_techin_dmi_adxr_bfq  indiv_techin_dmi_mdi_bfq  indiv_techin_dmi_pdi_bfq  indiv_techin_downdays  indiv_techin_updays  indiv_techin_dpo_bfq  indiv_techin_madpo_bfq  indiv_techin_ema_bfq_10  indiv_techin_ema_bfq_20  indiv_techin_ema_bfq_250  indiv_techin_ema_bfq_30  indiv_techin_ema_bfq_5

## 3. 其他因子

### 3.1 大盘数据

In [16]:
# 3.1.1 【din】指数每日指标 - 大盘指数
print("=" * 80)
print("3.1.1 大盘指数每日指标 (pro.index_dailybasic)")
print("=" * 80)

# 上证指数、深证成指
market_indices = ['000001.SH', '399001.SZ']
market_names = ['上证指数', '深证成指']

for index, name in zip(market_indices, market_names):
    print(f"\n{name} ({index}):")
    df = pro.index_dailybasic(ts_code=index, start_date=start_date, end_date=end_date)
    # 重命名列：保留trade_date和ts_code，其他列加前缀market_din_
    cols_to_rename = {col: f'market_din_{col}' for col in df.columns}
    df.rename(columns=cols_to_rename, inplace=True)
    print(df.head())
    print(f"总共 {len(df)} 条记录")
    # 保存到本地data目录，文件名为data/macro/raw/{index}_market_din.csv
    if not os.path.exists('data/macro/raw'):
        os.makedirs('data/macro/raw')
    df.to_csv(f'data/macro/raw/{index}_market_din.csv', index=False)
    print(f"数据已保存到 data/macro/raw/{index}_market_din.csv")

3.1.1 大盘指数每日指标 (pro.index_dailybasic)

上证指数 (000001.SH):
  market_din_ts_code market_din_trade_date  market_din_total_mv  market_din_float_mv  market_din_total_share  market_din_float_share  market_din_free_share  market_din_turnover_rate  market_din_turnover_rate_f  market_din_pe  market_din_pe_ttm  market_din_pb
0          000001.SH              20251023         7.494141e+13         5.916896e+13            5.836393e+12            4.692181e+12           1.805565e+12                      1.19                        3.08          16.98              16.85           1.51
1          000001.SH              20251022         7.474205e+13         5.903764e+13            5.836059e+12            4.691890e+12           1.805329e+12                      1.14                        2.97          16.93              16.81           1.51
2          000001.SH              20251021         7.469487e+13         5.907047e+13            5.835987e+12            4.691794e+12           1.805169e+12           

In [17]:
# 3.1.2 【flow】资金流向
print("=" * 80)
print("3.1.2 资金流向 (pro.moneyflow_mkt_dc)")
print("=" * 80)

df = pro.moneyflow_mkt_dc(start_date=start_date, end_date=end_date)
# 重命名列：保留trade_date，其他列加前缀market_flow_
cols_to_rename = {col: f'market_flow_{col}' for col in df.columns}
df.rename(columns=cols_to_rename, inplace=True)
print(df.head())
print(f"总共 {len(df)} 条记录")
# 保存到本地data目录，文件名为data/macro/raw/hsgt_moneyflow.csv
if not os.path.exists('data/macro/raw'):
    os.makedirs('data/macro/raw')
df.to_csv(f'data/macro/raw/moneyflow.csv', index=False)
print(f"数据已保存到 data/macro/raw/moneyflow.csv")

print(df)

3.1.2 资金流向 (pro.moneyflow_mkt_dc)
  market_flow_trade_date  market_flow_close_sh  market_flow_pct_change_sh  market_flow_close_sz  market_flow_pct_change_sz  market_flow_net_amount  market_flow_net_amount_rate  market_flow_buy_elg_amount  market_flow_buy_elg_amount_rate  market_flow_buy_lg_amount  market_flow_buy_lg_amount_rate  market_flow_buy_md_amount  market_flow_buy_md_amount_rate  market_flow_buy_sm_amount  market_flow_buy_sm_amount_rate
0               20251023               3922.41                       0.22              13025.45                       0.22           -3.931443e+10                        -2.39               -2.068833e+10                            -1.26              -1.862610e+10                           -1.13               2.259755e+09                            0.14               3.705467e+10                            2.25
1               20251022               3913.76                      -0.07              12996.61                      -0.62           -5.10

In [18]:
# 3.1.3 【margin】融资融券 - 市场汇总
print("=" * 80)
print("3.1.3 融资融券市场汇总 (pro.margin)")
print("=" * 80)

df = pro.margin(start_date=start_date, end_date=end_date)
# 重命名列：保留trade_date，其他列加前缀market_margin_
cols_to_rename = {col: f'market_margin_{col}' for col in df.columns}
df.rename(columns=cols_to_rename, inplace=True)
print(df.head())
print(f"总共 {len(df)} 条记录")
# 保存到本地data目录，文件名为data/macro/raw/market_margin.csv
if not os.path.exists('data/macro/raw'):
    os.makedirs('data/macro/raw')
df.to_csv(f'data/macro/raw/market_margin.csv', index=False)
print(f"数据已保存到 data/macro/raw/market_margin.csv")

3.1.3 融资融券市场汇总 (pro.margin)
  market_margin_trade_date market_margin_exchange_id  market_margin_rzye  market_margin_rzmre  market_margin_rzche  market_margin_rqye  market_margin_rqmcl  market_margin_rzrqye  market_margin_rqyl
0                 20251023                       SSE        1.232014e+12         8.728375e+10         8.826683e+10        1.177634e+10           95434219.0          1.243790e+12        2.513866e+09
1                 20251023                       BSE        7.546326e+09         4.658379e+08         2.131609e+09        2.567240e+05                  0.0          7.546583e+09        1.596300e+04
2                 20251023                      SZSE        1.194363e+12         8.911113e+10         8.968996e+10        5.344439e+09           28285854.0          1.199707e+12        7.074945e+08
3                 20251022                       SSE        1.233014e+12         9.231166e+10         8.780912e+10        1.157095e+10           68070506.0          1.244585e+12   

### 3.2 宏观数据

In [19]:
# 3.2.1 【shibor】SHIBOR利率
print("=" * 80)
print("3.2.1 SHIBOR利率 (pro.shibor)")
print("=" * 80)

df = pro.shibor(start_date=start_date, end_date=end_date)
# 重命名列：保留trade_date(如果有date列则改名)，其他列加前缀macro_shibor_，trade_date不变
cols_to_rename = {}
for col in df.columns:
    if col == 'date':
        cols_to_rename[col] = 'trade_date'
    elif col != 'trade_date':
        cols_to_rename[col] = f'macro_shibor_{col}'
df.rename(columns=cols_to_rename, inplace=True)
# 输出前5行
print(df.head())
print(f"总共 {len(df)} 条记录")
# 保存到本地data目录，文件名为data/macro/raw/shibor.csv
if not os.path.exists('data/macro/raw'):
    os.makedirs('data/macro/raw')
df.to_csv(f'data/macro/raw/shibor.csv', index=False)
print(f"数据已保存到 data/macro/raw/shibor.csv")

3.2.1 SHIBOR利率 (pro.shibor)
  trade_date  macro_shibor_on  macro_shibor_1w  macro_shibor_2w  macro_shibor_1m  macro_shibor_3m  macro_shibor_6m  macro_shibor_9m  macro_shibor_1y
0   20251023            1.318            1.417            1.512            1.556            1.594            1.641            1.662            1.680
1   20251022            1.318            1.422            1.452            1.557            1.592            1.641            1.662            1.679
2   20251021            1.317            1.426            1.504            1.557            1.586            1.641            1.662            1.675
3   20251020            1.317            1.418            1.468            1.557            1.582            1.640            1.660            1.670
4   20251017            1.318            1.415            1.461            1.559            1.580            1.640            1.660            1.670
总共 516 条记录
数据已保存到 data/macro/raw/shibor.csv


In [20]:
# 3.2.2 【gdp】GDP数据
print("=" * 80)
print("3.2.2 GDP数据 (pro.cn_gdp)")
print("=" * 80)

# GDP数据按季度发布,转换start_date和end_date为季度格式 (YYYYQ)
# 这里start_date同样往前多加180天
start_q = fin_start_date[:4] + '1'  # 从年份开始的第一个季度
end_q = end_date[:4] + '4'  # 结束年份的第四个季度
df = pro.cn_gdp(start_q=start_q, end_q=end_q)
# 重命名列：保留quarter作为时间标识，其他列加前缀macro_gdp_
cols_to_rename = {col: f'macro_gdp_{col}' for col in df.columns if col not in ['quarter', 'trade_date', 'ts_code']}
df.rename(columns=cols_to_rename, inplace=True)
print(df.head())
print(f"总共 {len(df)} 条记录")
# 保存到本地data目录，文件名为data/macro/raw/gdp.csv
if not os.path.exists('data/macro/raw'):
    os.makedirs('data/macro/raw')
df.to_csv(f'data/macro/raw/gdp.csv', index=False)
print(f"数据已保存到 data/macro/raw/gdp.csv")

3.2.2 GDP数据 (pro.cn_gdp)
  quarter  macro_gdp_gdp  macro_gdp_gdp_yoy  macro_gdp_pi  macro_gdp_pi_yoy  macro_gdp_si  macro_gdp_si_yoy  macro_gdp_ti  macro_gdp_ti_yoy
0  2024Q4      1349083.5                5.0       91413.9               3.5      492087.1               5.3      765582.5               5.0
1  2024Q3       949745.7                4.8       57733.1               3.4      361361.6               5.4      530651.1               4.7
2  2024Q2       616836.0                5.0       30660.0               3.5      236529.9               5.8      349646.1               4.6
3  2024Q1       296299.0                5.3       11538.0               3.3      109846.0               6.0      174915.0               5.0
4  2023Q4      1260582.1                5.2       89755.2               4.1      482588.5               4.7      688238.4               5.8
总共 8 条记录
数据已保存到 data/macro/raw/gdp.csv


In [21]:
# 3.2.3 【cpi】CPI数据
print("=" * 80)
print("3.2.3 CPI数据 (pro.cn_cpi)")
print("=" * 80)

# CPI数据按月发布,转换start_date和end_date为月份格式 (YYYYMM)
# 这里start_date同样往前多加180天
start_m = fin_start_date[:6]
end_m = end_date[:6]
df = pro.cn_cpi(start_m=start_m, end_m=end_m)
# 重命名列：保留month作为时间标识，其他列加前缀macro_cpi_
cols_to_rename = {col: f'macro_cpi_{col}' for col in df.columns if col not in ['month', 'trade_date', 'ts_code']}
df.rename(columns=cols_to_rename, inplace=True)
print(df.head())
print(f"总共 {len(df)} 条记录")
# 保存到本地data目录，文件名为data/macro/raw/cpi.csv
if not os.path.exists('data/macro/raw'):
    os.makedirs('data/macro/raw')
df.to_csv(f'data/macro/raw/cpi.csv', index=False)
print(f"数据已保存到 data/macro/raw/cpi.csv")

3.2.3 CPI数据 (pro.cn_cpi)
    month  macro_cpi_nt_val  macro_cpi_nt_yoy  macro_cpi_nt_mom  macro_cpi_nt_accu  macro_cpi_town_val  macro_cpi_town_yoy  macro_cpi_town_mom  macro_cpi_town_accu  macro_cpi_cnt_val  macro_cpi_cnt_yoy  macro_cpi_cnt_mom  macro_cpi_cnt_accu
0  202509              99.7              -0.3               0.1               -0.1                99.8                -0.2                 0.0                 99.9               99.5               -0.5                0.2                99.7
1  202508              99.6              -0.4               0.0               -0.1                99.7                -0.3                 0.0                100.0               99.4               -0.6                0.1                99.7
2  202507             100.0               0.0               0.4               -0.1               100.0                 0.0                 0.4                100.0               99.7               -0.3                0.3                99.7
3  202506  

In [22]:
# 3.2.4 【ppi】PPI数据
print("=" * 80)
print("3.2.4 PPI数据 (pro.cn_ppi)")
print("=" * 80)

# PPI数据按月发布,转换start_date和end_date为月份格式 (YYYYMM)
# 这里start_date同样往前多加180天
start_m = fin_start_date[:6]
end_m = end_date[:6]
df = pro.cn_ppi(start_m=start_m, end_m=end_m)
# 重命名列：保留month作为时间标识，其他列加前缀macro_ppi_
cols_to_rename = {col: f'macro_ppi_{col}' for col in df.columns if col not in ['month', 'trade_date', 'ts_code']}
df.rename(columns=cols_to_rename, inplace=True)
print(df.head())
print(f"总共 {len(df)} 条记录")
# 保存到本地data目录，文件名为data/macro/raw/ppi.csv
if not os.path.exists('data/macro/raw'):
    os.makedirs('data/macro/raw')
df.to_csv(f'data/macro/raw/ppi.csv', index=False)
print(f"数据已保存到 data/macro/raw/ppi.csv")

3.2.4 PPI数据 (pro.cn_ppi)
    month  macro_ppi_ppi_yoy  macro_ppi_ppi_mp_yoy  macro_ppi_ppi_mp_qm_yoy  macro_ppi_ppi_mp_rm_yoy  macro_ppi_ppi_mp_p_yoy  macro_ppi_ppi_cg_yoy  macro_ppi_ppi_cg_f_yoy  macro_ppi_ppi_cg_c_yoy  macro_ppi_ppi_cg_adu_yoy  macro_ppi_ppi_cg_dcg_yoy  macro_ppi_ppi_mom  macro_ppi_ppi_mp_mom  macro_ppi_ppi_mp_qm_mom  macro_ppi_ppi_mp_rm_mom  macro_ppi_ppi_mp_p_mom  macro_ppi_ppi_cg_mom  macro_ppi_ppi_cg_f_mom  macro_ppi_ppi_cg_c_mom  macro_ppi_ppi_cg_adu_mom  macro_ppi_ppi_cg_dcg_mom  macro_ppi_ppi_accu  macro_ppi_ppi_mp_accu  macro_ppi_ppi_mp_qm_accu  macro_ppi_ppi_mp_rm_accu  macro_ppi_ppi_mp_p_accu  macro_ppi_ppi_cg_accu  macro_ppi_ppi_cg_f_accu  macro_ppi_ppi_cg_c_accu  macro_ppi_ppi_cg_adu_accu  macro_ppi_ppi_cg_dcg_accu
0  202509               -2.3                  -2.4                     -9.0                     -2.9                    -1.7                  -1.7                    -1.7                    -0.3                       0.7                      -3

In [23]:
# 3.2.5 【m】货币供应
print("=" * 80)
print("3.2.5 货币供应 (pro.cn_m)")
print("=" * 80)

# 货币供应数据按月发布,转换start_date和end_date为月份格式 (YYYYMM)
# 这里start_date同样往前多加180天
start_m = fin_start_date[:6]
end_m = end_date[:6]
df = pro.cn_m(start_m=start_m, end_m=end_m)
# 重命名列：保留month作为时间标识，其他列加前缀macro_m_
cols_to_rename = {col: f'macro_m_{col}' for col in df.columns if col not in ['month', 'trade_date', 'ts_code']}
df.rename(columns=cols_to_rename, inplace=True)
print(df.head())
print(f"总共 {len(df)} 条记录")
# 保存到本地data目录，文件名为data/macro/raw/money_supply.csv
if not os.path.exists('data/macro/raw'):
    os.makedirs('data/macro/raw')
df.to_csv(f'data/macro/raw/money_supply.csv', index=False)
print(f"数据已保存到 data/macro/raw/money_supply.csv")

3.2.5 货币供应 (pro.cn_m)
    month  macro_m_m0  macro_m_m0_yoy  macro_m_m0_mom  macro_m_m1  macro_m_m1_yoy  macro_m_m1_mom  macro_m_m2  macro_m_m2_yoy  macro_m_m2_mom
0  202509   135813.36            11.5            1.81  1131455.07             7.2            1.73  3353771.03             8.4            1.02
1  202508   133402.29            11.7            0.42  1112255.70             6.0            0.15  3319831.44             8.8            0.62
2  202507   132800.00            11.8            0.74  1110600.00             5.6           -2.54  3299400.00             8.8           -0.11
3  202506   131827.18            12.0            0.43  1139494.08             4.6            4.62  3302868.17             8.3            1.38
4  202505   131300.00            12.1           -0.07  1089100.00             2.3           -0.21  3257800.00             7.9            0.19
总共 31 条记录
数据已保存到 data/macro/raw/money_supply.csv


In [24]:
# 3.2.6 【sf】社融数据
print("=" * 80)
print("3.2.6 社融数据 (pro.sf_month)")
print("=" * 80)

# 社融数据按月发布,转换start_date和end_date为月份格式 (YYYYMM)
# 这里start_date同样往前多加180天
start_m = fin_start_date[:6]
end_m = end_date[:6]
df = pro.sf_month(start_m=start_m, end_m=end_m)
# 重命名列：保留month作为时间标识，其他列加前缀macro_sf_
cols_to_rename = {col: f'macro_sf_{col}' for col in df.columns if col not in ['month', 'trade_date', 'ts_code']}
df.rename(columns=cols_to_rename, inplace=True)
print(df.head())
print(f"总共 {len(df)} 条记录")
# 保存到本地data目录，文件名为data/macro/raw/social_finance.csv
if not os.path.exists('data/macro/raw'):
    os.makedirs('data/macro/raw')
df.to_csv(f'data/macro/raw/social_finance.csv', index=False)
print(f"数据已保存到 data/macro/raw/social_finance.csv")

3.2.6 社融数据 (pro.sf_month)
    month  macro_sf_inc_month  macro_sf_inc_cumval  macro_sf_stk_endval
0  202509             35296.0             300851.0               437.08
1  202508             25660.0             265555.0               433.65
2  202507             11307.0             239895.0               431.25
3  202506             42251.0             228588.0               430.24
4  202505             22899.0             186336.0               426.16
总共 31 条记录
数据已保存到 data/macro/raw/social_finance.csv


In [25]:
# 3.2.7 【ust】美国国债收益率
print("=" * 80)
print("3.2.7 美国国债收益率 (pro.us_trycr)")
print("=" * 80)

df = pro.us_trycr(start_date=start_date, end_date=end_date)
# 重命名列：保留date/trade_date作为时间标识，其他列加前缀macro_ust_
if 'date' in df.columns:
    df.rename(columns={'date': 'trade_date'}, inplace=True)
cols_to_rename = {col: f'macro_ust_{col}' for col in df.columns}
df.rename(columns=cols_to_rename, inplace=True)
print(df.head())
print(f"总共 {len(df)} 条记录")
# 保存到本地data目录，文件名为data/macro/raw/ust_yield.csv
if not os.path.exists('data/macro/raw'):
    os.makedirs('data/macro/raw')
df.to_csv(f'data/macro/raw/ust_yield.csv', index=False)
print(f"数据已保存到 data/macro/raw/ust_yield.csv")

3.2.7 美国国债收益率 (pro.us_trycr)
  macro_ust_trade_date  macro_ust_y5  macro_ust_y7  macro_ust_y10  macro_ust_y20  macro_ust_y30
0             20251023          1.19          1.45           1.71           2.15           2.36
1             20251022          1.24          1.46           1.68           2.11           2.34
2             20251021          1.26          1.47           1.70           2.12           2.35
3             20251020          1.30          1.52           1.74           2.16           2.39
4             20251017          1.30          1.52           1.75           2.18           2.40
总共 520 条记录
数据已保存到 data/macro/raw/ust_yield.csv


In [26]:
# 3.2.8 【im】国际指数 - 标普500和恒生指数
print("=" * 80)
print("3.2.8 国际指数 (pro.index_global)")
print("=" * 80)

# 标普500: SPX, 恒生指数: HSI
international_indices = ['SPX', 'HSI']
international_names = ['标普500', '恒生指数']

for index, name in zip(international_indices, international_names):
    print(f"\n{name} ({index}):")
    df = pro.index_global(ts_code=index, start_date=start_date, end_date=end_date)
    # 重命名列：保留trade_date和ts_code，其他列加前缀macro_im_，trade_date
    cols_to_rename = {col: f'macro_im_{col}' for col in df.columns}
    df.rename(columns=cols_to_rename, inplace=True)
    df['trade_date'] = df['macro_im_trade_date']
    df.drop(columns=['macro_im_trade_date'], inplace=True)
    # 打印前5行数据和总记录数
    print(df.head())
    print(f"总共 {len(df)} 条记录")
    # 保存到本地data目录，文件名为data/macro/raw/{index}_macro_im.csv
    if not os.path.exists('data/macro/raw'):
        os.makedirs('data/macro/raw')
    df.to_csv(f'data/macro/raw/{index}_macro_im.csv', index=False)
    print(f"数据已保存到 data/macro/raw/{index}_macro_im.csv")

3.2.8 国际指数 (pro.index_global)

标普500 (SPX):
  macro_im_ts_code  macro_im_open  macro_im_close  macro_im_high  macro_im_low  macro_im_pre_close  macro_im_change  macro_im_pct_chg  macro_im_swing  macro_im_vol trade_date
0              SPX        6703.65         6738.44        6749.53       6700.14             6699.40            39.04              0.58            0.74     3317281.0   20251023
1              SPX        6741.34         6699.40        6741.75       6655.69             6735.35           -35.95             -0.53            1.28     3407490.0   20251022
2              SPX        6736.75         6735.35        6752.16       6722.03             6735.13             0.22              0.00            0.45     3124256.0   20251021
3              SPX        6690.05         6735.13        6744.35       6690.05             6664.01            71.12              1.07            0.81     2653125.0   20251020
4              SPX        6613.27         6664.01        6678.88       6603.76   

# 二、数据整理

In [27]:
'''个股数据'''

## 整合数据
# 1 股票复权行情数据
# 读取data/stock/raw/{stock}_indiv_d.csv，按trade_date升序排列并设置为index
for stock, name in zip(stocks, stock_names):
    print(f"\n处理{name} ({stock}) 的交易数据整合：")
    df = pd.read_csv(f'data/stock/raw/{stock}_indiv_d.csv')
    df['trade_date'] = pd.to_datetime(df['indiv_d_trade_date'], format='%Y%m%d')
    df = df.sort_values('trade_date').set_index('trade_date')
    print(df.head())
    print(f"总共 {len(df)} 条记录")
    # 统计有多少列
    print(f"共有 {len(df.columns)} 列数据")

    # 2 每日指标
    # 读取data/stock/raw/{stock}_indiv_din.csv，按trade_date升序排列并设置为index
    df_din = pd.read_csv(f'data/stock/raw/{stock}_indiv_din.csv')
    df_din['trade_date'] = pd.to_datetime(df_din['indiv_din_trade_date'], format='%Y%m%d')
    df_din = df_din.sort_values('trade_date').set_index('trade_date')
    print(df_din.head())
    print(f"总共 {len(df_din)} 条记录")
    # 统计有多少列
    print(f"共有 {len(df_din.columns)} 列数据")

    # 合并df和df_din，按trade_date对齐，使用左连接，删除重复的ts_code和trade_date列
    df = df.merge(df_din.drop(columns=['indiv_din_ts_code', 'indiv_din_trade_date']), left_index=True, right_index=True, how='left')
    print("合并每日指标后：")
    print(df.head())
    print(f"总共 {len(df)} 条记录")
    # 统计有多少列
    print(f"共有 {len(df.columns)} 列数据")

    # 3 财务指标
    # 读取data/stock/raw/{stock}_indiv_fin.csv
    df_fin = pd.read_csv(f'data/stock/raw/{stock}_indiv_fin.csv')
    df_fin['ann_date'] = pd.to_datetime(df_fin['indiv_fin_ann_date'], format='%Y%m%d')
    print("原始财务数据：")
    print(df_fin.head())
    print(f"总共 {len(df_fin)} 条记录")
    # 统计有多少列
    print(f"共有 {len(df_fin.columns)} 列数据")
    
    # 定义过期报告函数，并删除相应行
    '''如果两个报告的end_date相同但ann_date不同，则保留ann_date较近的报告，删除ann_date较远的报告；如果end_date相同而且ann_date也相同，则保留数据中NaN更少的那份。'''
    def drop_expired_fin_report(df):
        # 统计每行数据的NaN数量
        df['nan_count'] = df.isna().sum(axis=1)
        # 按end_date和ann_date排序，end_date升序，ann_date降序，nan_count升序
        df = df.sort_values(by=['indiv_fin_end_date', 'ann_date', 'nan_count'], ascending=[True, False, True])
        # 标记需要删除的行 
        df['drop'] = df.duplicated(subset=['indiv_fin_end_date'], keep='first')
        # 删除标记为True的行
        df = df[df['drop'] == False]
        # 删除标记列
        df = df.drop(columns=['drop', 'nan_count'])
        # 按公告日期升序排列，并设置index
        df = df.sort_values('ann_date').reset_index(drop=False)
        return df
    
    df_fin = drop_expired_fin_report(df_fin)
    print("删除过期报告后：")
    print(df_fin.head())
    print(f"总共 {len(df_fin)} 条记录")
    # 统计有多少列
    print(f"共有 {len(df_fin.columns)} 列数据")

    # 合并df和df_fin，合并方法为对于df中的每个交易日，找到df_fin中公告日期小于等于该交易日的最新一条记录进行合并，删除重复的ts_code列
    df = pd.merge_asof(df.sort_index(), df_fin.sort_values('ann_date').drop(columns=['indiv_fin_ts_code']), left_index=True, right_on='ann_date', direction='backward')
    # 删除来自df_fin的index和最后一个ann_date列
    df = df = df.drop(columns=['index', 'ann_date'])
    print("合并财务指标后：")
    print(df.head())
    print(f"总共 {len(df)} 条记录")
    # 统计有多少列
    print(f"共有 {len(df.columns)} 列数据")

    # 4 股东人数
    # 读取data/stock/raw/{stock}_indiv_sh.csv
    df_sh = pd.read_csv(f'data/stock/raw/{stock}_indiv_sh.csv')
    df_sh['ann_date'] = pd.to_datetime(df_sh['indiv_sh_ann_date'], format='%Y%m%d')
    print("股东人数数据：")
    print(df_sh.head()) 
    print(f"总共 {len(df_sh)} 条记录")
    # 统计有多少列
    print(f"共有 {len(df_sh.columns)} 列数据")

    # 定义过期报告函数，并删除相应行
    '''如果两个报告的end_date相同但ann_date不同，则保留ann_date较近的报告，删除ann_date较远的报告；如果end_date相同而且ann_date也相同，则保留数据中NaN更少的那份。'''
    def drop_expired_sh_report(df):
        # 统计每行数据的NaN数量
        df['nan_count'] = df.isna().sum(axis=1)
        # 按end_date和ann_date排序，end_date升序，ann_date降序，nan_count升序
        df = df.sort_values(by=['indiv_sh_end_date', 'ann_date', 'nan_count'], ascending=[True, False, True])
        # 标记需要删除的行 
        df['drop'] = df.duplicated(subset=['indiv_sh_end_date'], keep='first')
        # 删除标记为True的行
        df = df[df['drop'] == False]
        # 删除标记列
        df = df.drop(columns=['drop', 'nan_count'])
        # 按公告日期升序排列，并设置index
        df = df.sort_values('ann_date').reset_index(drop=False)
        return df
    
    df_sh = drop_expired_sh_report(df_sh)
    print("删除过期报告后：")
    print(df_sh.head())
    print(f"总共 {len(df_sh)} 条记录")
    # 统计有多少列
    print(f"共有 {len(df_sh.columns)} 列数据")

    # 合并df和df_sh，合并方法为对于df中的每个交易日，找到df_fin中公告日期小于等于该交易日的最新一条记录进行合并，删除重复的ts_code列
    df = pd.merge_asof(df.sort_index(), df_sh.sort_values('ann_date').drop(columns=['indiv_sh_ts_code']), left_index=True, right_on='ann_date', direction='backward')
    # 删除来自df_sh的index和最后一个ann_date列
    df = df = df.drop(columns=['index', 'ann_date'])
    print("合并股东人数后：")
    print(df.head())
    print(f"总共 {len(df)} 条记录")
    # 统计有多少列
    print(f"共有 {len(df.columns)} 列数据")

    # 5 每日筹码及胜率
    # 读取data/stock/raw/{stock}_indiv_chip.csv，按trade_date升序排列并设置为index
    df_chip = pd.read_csv(f'data/stock/raw/{stock}_indiv_chip.csv')
    df_chip['trade_date'] = pd.to_datetime(df_chip['indiv_chip_trade_date'], format='%Y%m%d')
    df_chip = df_chip.sort_values('trade_date').set_index('trade_date')
    print(df_chip.head())
    print(f"总共 {len(df_chip)} 条记录")
    # 统计有多少列
    print(f"共有 {len(df_chip.columns)} 列数据")

    # 合并df和df_chip，按trade_date对齐，使用左连接，删除重复的ts_code和trade_date列
    df = df.merge(df_chip.drop(columns=['indiv_chip_ts_code', 'indiv_chip_trade_date']), left_index=True, right_index=True, how='left')
    print("合并每日筹码及胜率后：")
    print(df.head())
    print(f"总共 {len(df)} 条记录")
    # 统计有多少列
    print(f"共有 {len(df.columns)} 列数据")

    # 6 技术面因子
    # 读取data/stock/raw/{stock}_indiv_techin.csv，按trade_date升序排列并设置为index
    df_techin = pd.read_csv(f'data/stock/raw/{stock}_indiv_techin.csv')
    df_techin['trade_date'] = pd.to_datetime(df_techin['indiv_techin_trade_date'], format='%Y%m%d')
    df_techin = df_techin.sort_values('trade_date').set_index('trade_date')
    print(df_techin.head())
    print(f"总共 {len(df_techin)} 条记录")
    # 统计有多少列
    print(f"共有 {len(df_techin.columns)} 列数据")
    # 合并df和df_techin，按trade_date对齐，使用左连接，删除重复的ts_code和trade_date列
    df = df.merge(df_techin.drop(columns=['indiv_techin_ts_code', 'indiv_techin_trade_date']), left_index=True, right_index=True, how='left')
    print("合并技术面因子后：")
    print(df.head())
    print(f"总共 {len(df)} 条记录")
    # 统计有多少列
    print(f"共有 {len(df.columns)} 列数据")

    # 7 融资融券
    # 读取data/stock/raw/{stock}_indiv_margin.csv，按trade_date升序排列并设置为index
    df_margin = pd.read_csv(f'data/stock/raw/{stock}_indiv_margin.csv')
    df_margin['trade_date'] = pd.to_datetime(df_margin['indiv_margin_trade_date'], format='%Y%m%d')
    df_margin = df_margin.sort_values('trade_date').set_index('trade_date')
    print(df_margin.head())
    print(f"总共 {len(df_margin)} 条记录")
    # 统计有多少列
    print(f"共有 {len(df_margin.columns)} 列数据")
    # 合并df和df_margin，按trade_date对齐，使用左连接，删除重复的ts_code和trade_date列
    df = df.merge(df_margin.drop(columns=['indiv_margin_ts_code', 'indiv_margin_trade_date']), left_index=True, right_index=True, how='left')
    print("合并融资融券后：")
    print(df.head())
    print(f"总共 {len(df)} 条记录")
    # 统计有多少列
    print(f"共有 {len(df.columns)} 列数据")

    # 统计最终整合的数据有多少行和列
    print(f"\n最终整合数据共有 {len(df)} 行，{len(df.columns)} 列。")
    # 统计缺失值情况
    missing_counts = df.isna().sum()
    print("各列缺失值统计：")
    print(missing_counts[missing_counts > 0])
    # 统计缺失值比例
    missing_percent = missing_counts / len(df)
    print("各列缺失值比例：")
    print(missing_percent[missing_percent > 0])
    # 统计共有多少列有缺失值
    print(f"共有 {missing_counts[missing_counts > 0].count()} 列有缺失值。")


    # 保存最终整合的数据到data/stock/processed/{stock}_indiv_full.csv
    if not os.path.exists('data/stock/processed'):
        os.makedirs('data/stock/processed')
    df.to_csv(f'data/stock/processed/{stock}_indiv_full.csv')
    print(f"最终整合数据已保存到 data/stock/processed/{stock}_indiv_full.csv")


处理招商银行 (600036.SH) 的交易数据整合：
           indiv_d_ts_code  indiv_d_trade_date  indiv_d_open  indiv_d_high  indiv_d_low  indiv_d_close  indiv_d_pre_close  indiv_d_change  indiv_d_pct_chg  indiv_d_vol  indiv_d_amount
trade_date                                                                                                                                                                             
2023-09-25       600036.SH            20230925        190.16        190.16       187.99         188.56             190.50           -1.94            -1.02    456515.41     1506238.619
2023-09-26       600036.SH            20230926        188.16        189.02       187.13         187.42             188.56           -1.14            -0.60    322023.56     1058673.650
2023-09-27       600036.SH            20230927        187.99        189.08       187.13         187.53             187.42            0.11             0.06    352466.79     1159673.830
2023-09-28       600036.SH            20230928     

In [28]:
'''ETF数据'''

# 整合数据
# 1 ETF日线行情
for etf, name in zip(etfs, etf_names):
    print(f"\n处理{name} ({etf}) 的交易数据整合:")
    df = pd.read_csv(f'data/etf/raw/{etf}_indiv_d.csv')
    df['trade_date'] = pd.to_datetime(df['indiv_d_trade_date'], format='%Y%m%d')
    df = df.sort_values('trade_date').set_index('trade_date')
    print(df.head())
    print(f"总共 {len(df)} 条记录")
    print(f"共有 {len(df.columns)} 列数据")

    # 2 基金复权因子
    df_adj = pd.read_csv(f'data/etf/raw/{etf}_indiv_adj_d.csv')
    print(f"共有 {len(df_adj.columns)} 列数据")
    df_adj['trade_date'] = pd.to_datetime(df_adj['indiv_adj_d_trade_date'], format='%Y%m%d')
    df_adj = df_adj.sort_values('trade_date').set_index('trade_date')
    df = df.merge(df_adj.drop(columns=['indiv_adj_d_ts_code', 'indiv_adj_d_trade_date']), left_index=True, right_index=True, how='left')
    print("合并基金复权因子后:")
    print(df.head())
    print(f"总共 {len(df)} 条记录")
    print(f"共有 {len(df.columns)} 列数据")

    # 3 指数日线行情
    df_ind = pd.read_csv(f'data/etf/raw/{etf}_indiv_ind.csv')
    print(f"共有 {len(df_ind.columns)} 列数据")
    df_ind['trade_date'] = pd.to_datetime(df_ind['indiv_ind_trade_date'], format='%Y%m%d')
    df_ind = df_ind.sort_values('trade_date').set_index('trade_date')
    df = df.merge(df_ind.drop(columns=['indiv_ind_trade_date']), left_index=True, right_index=True, how='left')
    print("合并指数日线行情后:")
    print(df.head())
    print(f"总共 {len(df)} 条记录")
    print(f"共有 {len(df.columns)} 列数据")

    # 4 添加指数每日指标
    df_din = pd.read_csv(f'data/etf/raw/{etf}_indiv_din.csv')
    print(f"共有 {len(df_din.columns)} 列数据")
    df_din['trade_date'] = pd.to_datetime(df_din['indiv_din_trade_date'], format='%Y%m%d')
    df_din = df_din.sort_values('trade_date').set_index('trade_date')
    df = df.merge(df_din.drop(columns=['indiv_din_ts_code', 'indiv_din_trade_date']), left_index=True, right_index=True, how='left')
    print("合并指数每日指标后:")
    print(df.head())
    print(f"总共 {len(df)} 条记录")
    print(f"共有 {len(df.columns)} 列数据")

    # 5 添加指数技术因子
    df_techin = pd.read_csv(f'data/etf/raw/{etf}_indiv_techin.csv')
    print(f"共有 {len(df_techin.columns)} 列数据")
    df_techin['trade_date'] = pd.to_datetime(df_techin['indiv_techin_trade_date'], format='%Y%m%d')
    df_techin = df_techin.sort_values('trade_date').set_index('trade_date')
    df = df.merge(df_techin.drop(columns=['indiv_techin_ts_code', 'indiv_techin_trade_date']), left_index=True, right_index=True, how='left')
    print("合并指数技术因子后:")
    print(df.head())
    print(f"总共 {len(df)} 条记录")
    print(f"共有 {len(df.columns)} 列数据")

    # 统计最终整合的数据有多少行和列
    print(f"\n最终整合数据共有 {len(df)} 行，{len(df.columns)} 列。")
    # 统计缺失值情况
    missing_counts = df.isna().sum()
    print("各列缺失值统计：")
    print(missing_counts[missing_counts > 0])
    # 统计缺失值比例
    missing_percent = missing_counts / len(df)
    print("各列缺失值比例：")
    print(missing_percent[missing_percent > 0])
    # 统计共有多少列有缺失值
    print(f"共有 {missing_counts[missing_counts > 0].count()} 列有缺失值。")
    
    # 保存最终整合的数据到data/etf/processed/{etf}_indiv_full.csv
    if not os.path.exists('data/etf/processed'):
        os.makedirs('data/etf/processed')
    df.to_csv(f'data/etf/processed/{etf}_indiv_full.csv')
    print(f"最终整合数据已保存到 data/etf/processed/{etf}_indiv_full.csv")


处理华泰柏瑞沪深300ETF (510300.SH) 的交易数据整合:
           indiv_d_ts_code  indiv_d_trade_date  indiv_d_pre_close  indiv_d_open  indiv_d_high  indiv_d_low  indiv_d_close  indiv_d_change  indiv_d_pct_chg  indiv_d_vol  indiv_d_amount
trade_date                                                                                                                                                                             
2023-09-25       510300.SH            20230925              3.817         3.806         3.809        3.782          3.789          -0.028          -0.7336   7009056.51     2656946.227
2023-09-26       510300.SH            20230926              3.789         3.786         3.797        3.766          3.768          -0.021          -0.5542   8629084.81     3257834.057
2023-09-27       510300.SH            20230927              3.768         3.767         3.805        3.766          3.778           0.010           0.2654  10780539.58     4074752.971
2023-09-28       510300.SH            20230

In [29]:
'''加入市场和宏观数据'''
# 1 大盘指数每日指标
# 分别读取data/macro/raw/000001.SH_market_din.csv和data/macro/raw/399001.SZ_market_din.csv，按trade_date升序排列并设置为index，然后合并为一个DataFrame
df_sh = pd.read_csv('data/macro/raw/000001.SH_market_din.csv')
df_sh['trade_date'] = pd.to_datetime(df_sh['market_din_trade_date'], format='%Y%m%d')
df_sh = df_sh.sort_values('trade_date').set_index('trade_date')
# 每列名称删掉market_din_前缀
df_sh.rename(columns={col: col.replace('market_din_', '') for col in df_sh.columns}, inplace=True)
# 每列名称加上market_din_sh_前缀
df_sh.rename(columns={col: f'market_din_sh_{col}' for col in df_sh.columns}, inplace=True)

df_sz = pd.read_csv('data/macro/raw/399001.SZ_market_din.csv')
df_sz['trade_date'] = pd.to_datetime(df_sz['market_din_trade_date'], format='%Y%m%d')
df_sz = df_sz.sort_values('trade_date').set_index('trade_date')
# 每列名称删掉market_din_前缀
df_sz.rename(columns={col: col.replace('market_din_', '') for col in df_sz.columns}, inplace=True)
# 每列名称加上market_din_sz_前缀
df_sz.rename(columns={col: f'market_din_sz_{col}' for col in df_sz.columns}, inplace=True)

df_market = df_sh.merge(df_sz, left_index=True, right_index=True, how='outer')
print("大盘指数每日指标合并后：")
print(df_market.head())
print(f"总共 {len(df_market)} 条记录")
print(f"共有 {len(df_market.columns)} 列数据")

# 2 资金流向
df_flow = pd.read_csv('data/macro/raw/moneyflow.csv')
df_flow['trade_date'] = pd.to_datetime(df_flow['market_flow_trade_date'], format='%Y%m%d')
df_flow = df_flow.sort_values('trade_date').set_index('trade_date')
# 删除market_flow_close_sh  market_flow_pct_change_sh  market_flow_close_sz  market_flow_pct_change_sz列
df_flow = df_flow.drop(columns=['market_flow_close_sh', 'market_flow_pct_change_sh', 'market_flow_close_sz', 'market_flow_pct_change_sz'])
print("资金流向数据：")
print(df_flow.head())
print(f"总共 {len(df_flow)} 条记录")
print(f"共有 {len(df_flow.columns)} 列数据")
# 合并到df_market，按trade_date对齐，使用外连接，删除重复的trade_date列
df_market = df_market.merge(df_flow, left_index=True, right_index=True, how='outer')
print("合并资金流向后：")
print(df_market.head())
print(f"总共 {len(df_market)} 条记录")
print(f"共有 {len(df_market.columns)} 列数据")

# 3 融资融券市场汇总
df_margin = pd.read_csv('data/macro/raw/market_margin.csv')
df_margin['trade_date'] = pd.to_datetime(df_margin['market_margin_trade_date'], format='%Y%m%d')
df_margin = df_margin.sort_values('trade_date')
# 列名删除market_margin_前缀
df_margin.rename(columns={col: col.replace('market_margin_', '') for col in df_margin.columns}, inplace=True)

# 首先提取df_margin中的exchange_id='SZSE'的数据，重命名列加上market_margin_sz_前缀
df_margin_sz = df_margin[df_margin['exchange_id'] == 'SZSE'].copy()
df_margin_sz.rename(columns={col: f'market_margin_sz_{col}' for col in df_margin_sz.columns}, inplace=True)
df_margin_sz = df_margin_sz.drop(columns=['market_margin_sz_exchange_id'])
# 有两列market_margin_sz_trade_date，删掉最后一列
df_margin_sz = df_margin_sz.loc[:,~df_margin_sz.columns.duplicated()]
# 然后提取df_margin中的exchange_id='SSE'的数据，重命名列加上market_margin_sh_前缀
df_margin_sh = df_margin[df_margin['exchange_id'] == 'SSE'].copy()
df_margin_sh.rename(columns={col: f'market_margin_sh_{col}' for col in df_margin_sh.columns}, inplace=True)
df_margin_sh = df_margin_sh.drop(columns=['market_margin_sh_exchange_id'])
# 有两列market_margin_sh_trade_date，删掉最后一列
df_margin_sh = df_margin_sh.loc[:,~df_margin_sh.columns.duplicated()]
# 最后按trade_date合并两个DataFrame
df_margin_pivot = df_margin_sh.merge(df_margin_sz, left_on='market_margin_sh_trade_date', right_on='market_margin_sz_trade_date', how='outer')
# 把market_margin_sh_trade_date重命名为trade_date，market_margin_sz_trade_date删除
df_margin_pivot.rename(columns={'market_margin_sh_trade_date': 'trade_date'}, inplace=True)
df_margin_pivot = df_margin_pivot.drop(columns=['market_margin_sz_trade_date'])
df_margin_pivot['trade_date'] = pd.to_datetime(df_margin_pivot['trade_date'], format='%Y%m%d')
df_margin_pivot = df_margin_pivot.sort_values('trade_date').set_index('trade_date')
# 合并到df_market，按trade_date对齐，使用外连接，删除重复的trade_date列
df_market = df_market.merge(df_margin_pivot, left_index=True, right_index=True, how='outer')

print("合并融资融券市场汇总后：")
print(df_market.head())
print(f"总共 {len(df_market)} 条记录")
print(f"共有 {len(df_market.columns)} 列数据")

# 保存市场数据到data/macro/processed/market_data.csv
if not os.path.exists('data/macro/processed'):
    os.makedirs('data/macro/processed')
df_market.to_csv('data/macro/processed/market_data.csv')
print("市场数据已保存到 data/macro/processed/market_data.csv")



大盘指数每日指标合并后：
           market_din_sh_ts_code  market_din_sh_trade_date  market_din_sh_total_mv  market_din_sh_float_mv  market_din_sh_total_share  market_din_sh_float_share  market_din_sh_free_share  market_din_sh_turnover_rate  market_din_sh_turnover_rate_f  market_din_sh_pe  market_din_sh_pe_ttm  market_din_sh_pb market_din_sz_ts_code  market_din_sz_trade_date  market_din_sz_total_mv  market_din_sz_float_mv  market_din_sz_total_share  market_din_sz_float_share  market_din_sz_free_share  market_din_sz_turnover_rate  market_din_sz_turnover_rate_f  market_din_sz_pe  market_din_sz_pe_ttm  market_din_sz_pb
trade_date                                                                                                                                                                                                                                                                                                                                                                                          

In [30]:
# 整合宏观数据
# 创建一个空的DataFrame,索引为交易日历中的交易日
cal = pd.read_csv('data/trade_cal.csv')
cal['cal_date'] = pd.to_datetime(cal['cal_date'], format='%Y%m%d')
date_index = cal['cal_date']
df_macro = pd.DataFrame(index=date_index)
df_macro.index.name = 'trade_date'
print("初始化df_macro:")
print(df_macro.head())

# 依次读取各个数据文件,将数据合并到df_macro中
# 1. SHIBOR利率
df_shibor = pd.read_csv('data/macro/raw/shibor.csv')
df_shibor['trade_date'] = pd.to_datetime(df_shibor['trade_date'], format='%Y%m%d')
df_shibor = df_shibor.sort_values('trade_date')

# 使用merge_asof进行合并(向后填充最新数据)
df_macro = df_macro.reset_index()
df_macro = pd.merge_asof(
    df_macro.sort_values('trade_date'), 
    df_shibor.sort_values('trade_date'),
    on='trade_date',
    direction='backward'
)
df_macro = df_macro.set_index('trade_date')
print("合并SHIBOR利率后:")
print(df_macro.head())
print(f"总共 {len(df_macro)} 条记录")
print(f"共有 {len(df_macro.columns)} 列数据")

# 2. GDP数据 (季度数据)
df_gdp = pd.read_csv('data/macro/raw/gdp.csv')
# 将季度转换为日期(取季度最后一天)
# 注意: quarter格式为 20231, 20232 等,表示2023年第1季度、2023年第2季度
df_gdp['trade_date'] = pd.PeriodIndex(df_gdp['quarter'].astype(str), freq='Q').to_timestamp(how='end')
df_gdp = df_gdp.sort_values('trade_date')

df_macro = df_macro.reset_index()
df_macro = pd.merge_asof(
    df_macro.sort_values('trade_date'),
    df_gdp.drop(columns=['quarter']).sort_values('trade_date'),
    on='trade_date',
    direction='backward'
)
df_macro = df_macro.set_index('trade_date')
print("合并GDP数据后:")
print(df_macro.head())
print(f"共有 {len(df_macro.columns)} 列数据")

# 3. CPI数据 (月度数据)
df_cpi = pd.read_csv('data/macro/raw/cpi.csv')
# 将月份转换为日期(取月末)
df_cpi['trade_date'] = pd.to_datetime(df_cpi['month'], format='%Y%m') + pd.offsets.MonthEnd(0)
df_cpi = df_cpi.sort_values('trade_date')

df_macro = df_macro.reset_index()
df_macro = pd.merge_asof(
    df_macro.sort_values('trade_date'),
    df_cpi.drop(columns=['month']).sort_values('trade_date'),
    on='trade_date',
    direction='backward'
)
df_macro = df_macro.set_index('trade_date')
print("合并CPI数据后:")
print(df_macro.head())
print(f"共有 {len(df_macro.columns)} 列数据")

# 4. PPI数据 (月度数据)
df_ppi = pd.read_csv('data/macro/raw/ppi.csv')
df_ppi['trade_date'] = pd.to_datetime(df_ppi['month'], format='%Y%m') + pd.offsets.MonthEnd(0)
df_ppi = df_ppi.sort_values('trade_date')

df_macro = df_macro.reset_index()
df_macro = pd.merge_asof(
    df_macro.sort_values('trade_date'),
    df_ppi.drop(columns=['month']).sort_values('trade_date'),
    on='trade_date',
    direction='backward'
)
df_macro = df_macro.set_index('trade_date')
print("合并PPI数据后:")
print(df_macro.head())
print(f"共有 {len(df_macro.columns)} 列数据")

# 5. 货币供应 (月度数据)
df_m = pd.read_csv('data/macro/raw/money_supply.csv')
df_m['trade_date'] = pd.to_datetime(df_m['month'], format='%Y%m') + pd.offsets.MonthEnd(0)
df_m = df_m.sort_values('trade_date')

df_macro = df_macro.reset_index()
df_macro = pd.merge_asof(
    df_macro.sort_values('trade_date'),
    df_m.drop(columns=['month']).sort_values('trade_date'),
    on='trade_date',
    direction='backward'
)
df_macro = df_macro.set_index('trade_date')
print("合并货币供应数据后:")
print(df_macro.head())
print(f"共有 {len(df_macro.columns)} 列数据")

# 6. 社融数据 (月度数据)
df_sf = pd.read_csv('data/macro/raw/social_finance.csv')
df_sf['trade_date'] = pd.to_datetime(df_sf['month'], format='%Y%m') + pd.offsets.MonthEnd(0)
df_sf = df_sf.sort_values('trade_date')

df_macro = df_macro.reset_index()
df_macro = pd.merge_asof(
    df_macro.sort_values('trade_date'),
    df_sf.drop(columns=['month']).sort_values('trade_date'),
    on='trade_date',
    direction='backward'
)
df_macro = df_macro.set_index('trade_date')
print("合并社融数据后:")
print(df_macro.head())
print(f"共有 {len(df_macro.columns)} 列数据")

# 7. 美债收益率 (日度数据)
df_ust = pd.read_csv('data/macro/raw/ust_yield.csv')
df_ust['trade_date'] = pd.to_datetime(df_ust['macro_ust_trade_date'], format='%Y%m%d')
df_ust = df_ust.sort_values('trade_date')

df_macro = df_macro.reset_index()
df_macro = pd.merge_asof(
    df_macro.sort_values('trade_date'),
    df_ust.drop(columns=['macro_ust_trade_date']).sort_values('trade_date'),
    on='trade_date',
    direction='backward'
)
df_macro = df_macro.set_index('trade_date')
print("合并美债收益率后:")
print(df_macro.head())
print(f"共有 {len(df_macro.columns)} 列数据")

# 8. 国际指数 - 标普500
df_spx = pd.read_csv('data/macro/raw/SPX_macro_im.csv')
df_spx['trade_date'] = pd.to_datetime(df_spx['trade_date'], format='%Y%m%d')
df_spx = df_spx.sort_values('trade_date')
# 重命名列,添加spx_前缀
df_spx = df_spx.rename(columns={col: f'macro_im_spx_{col}' if col != 'trade_date' else col for col in df_spx.columns})

df_macro = df_macro.reset_index()
df_macro = pd.merge_asof(
    df_macro.sort_values('trade_date'),
    df_spx.sort_values('trade_date'),
    on='trade_date',
    direction='backward'
)
df_macro = df_macro.set_index('trade_date')
print("合并标普500指数后:")
print(df_macro.head())
print(f"共有 {len(df_macro.columns)} 列数据")

# 9. 国际指数 - 恒生指数
df_hsi = pd.read_csv('data/macro/raw/HSI_macro_im.csv')
df_hsi['trade_date'] = pd.to_datetime(df_hsi['trade_date'], format='%Y%m%d')
df_hsi = df_hsi.sort_values('trade_date')
# 重命名列,添加hsi_前缀
df_hsi = df_hsi.rename(columns={col: f'macro_im_hsi_{col}' if col != 'trade_date' else col for col in df_hsi.columns})

df_macro = df_macro.reset_index()
df_macro = pd.merge_asof(
    df_macro.sort_values('trade_date'),
    df_hsi.sort_values('trade_date'),
    on='trade_date',
    direction='backward'
)
df_macro = df_macro.set_index('trade_date')
print("合并恒生指数后:")
print(df_macro.head())
print(f"共有 {len(df_macro.columns)} 列数据")

# 统计最终整合的数据
print(f"\n最终整合数据共有 {len(df_macro)} 行，{len(df_macro.columns)} 列。")
# 统计缺失值情况
missing_counts = df_macro.isna().sum()
print("各列缺失值统计:")
print(missing_counts[missing_counts > 0])
# 统计缺失值比例
missing_percent = missing_counts / len(df_macro)
print("各列缺失值比例:")
print(missing_percent[missing_percent > 0])

# 保存宏观数据到data/macro/processed/macro_data.csv
if not os.path.exists('data/macro/processed'):
    os.makedirs('data/macro/processed')
df_macro.to_csv('data/macro/processed/macro_data.csv')
print("宏观数据已保存到 data/macro/processed/macro_data.csv")
   

初始化df_macro:
Empty DataFrame
Columns: []
Index: [2023-09-25 00:00:00, 2023-09-26 00:00:00, 2023-09-27 00:00:00, 2023-09-28 00:00:00, 2023-10-09 00:00:00]
合并SHIBOR利率后:
            macro_shibor_on  macro_shibor_1w  macro_shibor_2w  macro_shibor_1m  macro_shibor_3m  macro_shibor_6m  macro_shibor_9m  macro_shibor_1y
trade_date                                                                                                                                        
2023-09-25            1.694            2.005            2.812            2.229            2.282            2.320            2.374            2.416
2023-09-26            1.741            1.969            3.035            2.260            2.294            2.335            2.386            2.432
2023-09-27            1.706            1.848            3.112            2.278            2.303            2.353            2.399            2.440
2023-09-28            2.155            2.195            3.231            2.285            2.302   

In [31]:
# 合并宏观数据到个股和ETF的processed数据中
'''用stock和etf的trade_date为基准,把宏观数据合并进去，宏观数据是逐日的，而个股和ETF数据有可能有缺失的交易日,所以使用左连接'''
for stock, name in zip(stocks, stock_names):
    print(f"\n为{name} ({stock}) 的交易数据加入宏观数据:")
    df = pd.read_csv(f'data/stock/processed/{stock}_indiv_full.csv')
    df['trade_date'] = pd.to_datetime(df['trade_date'])
    df = df.sort_values('trade_date').set_index('trade_date')
    print("原始个股数据:")
    print(df.head())
    print(f"总共 {len(df)} 条记录")
    print(f"共有 {len(df.columns)} 列数据")

    # 合并宏观数据
    df = df.merge(df_macro, left_index=True, right_index=True, how='left')
    print("合并宏观数据后:")
    print(df.head())
    print(f"总共 {len(df)} 条记录")
    print(f"共有 {len(df.columns)} 列数据")

    # 保存到data/stock/full/{stock}_indiv_full_macro.csv
    if not os.path.exists('data/stock/full'):
        os.makedirs('data/stock/full')
    df.to_csv(f'data/stock/full/{stock}_indiv_full_macro.csv')
    print(f"数据已保存到 data/stock/full/{stock}_indiv_full_macro.csv")
    
for etf, name in zip(etfs, etf_names):
    print(f"\n为{name} ({etf}) 的交易数据加入宏观数据:")
    df = pd.read_csv(f'data/etf/processed/{etf}_indiv_full.csv')
    df['trade_date'] = pd.to_datetime(df['trade_date'])
    df = df.sort_values('trade_date').set_index('trade_date')
    print("原始ETF数据:")
    print(df.head())
    print(f"总共 {len(df)} 条记录")
    print(f"共有 {len(df.columns)} 列数据")
    # 合并宏观数据
    df = df.merge(df_macro, left_index=True, right_index=True, how='left')
    print("合并宏观数据后:")
    print(df.head())
    print(f"总共 {len(df)} 条记录")
    print(f"共有 {len(df.columns)} 列数据")
    # 保存到data/etf/full/{etf}_indiv_full_macro.csv
    if not os.path.exists('data/etf/full'):
        os.makedirs('data/etf/full')
    df.to_csv(f'data/etf/full/{etf}_indiv_full_macro.csv')
    print(f"数据已保存到 data/etf/full/{etf}_indiv_full_macro.csv")




为招商银行 (600036.SH) 的交易数据加入宏观数据:
原始个股数据:
           indiv_d_ts_code  indiv_d_trade_date  indiv_d_open  indiv_d_high  indiv_d_low  indiv_d_close  indiv_d_pre_close  indiv_d_change  indiv_d_pct_chg  indiv_d_vol  indiv_d_amount  indiv_din_close  indiv_din_turnover_rate  indiv_din_turnover_rate_f  indiv_din_volume_ratio  indiv_din_pe  indiv_din_pe_ttm  indiv_din_pb  indiv_din_ps  indiv_din_ps_ttm  indiv_din_dv_ratio  indiv_din_dv_ttm  indiv_din_total_share  indiv_din_float_share  indiv_din_free_share  indiv_din_total_mv  indiv_din_circ_mv  indiv_fin_ann_date  indiv_fin_end_date  indiv_fin_eps  indiv_fin_dt_eps  indiv_fin_total_revenue_ps  indiv_fin_revenue_ps  indiv_fin_capital_rese_ps  indiv_fin_surplus_rese_ps  indiv_fin_undist_profit_ps  indiv_fin_extra_item  indiv_fin_profit_dedt  indiv_fin_gross_margin  indiv_fin_current_ratio  indiv_fin_quick_ratio  indiv_fin_cash_ratio  indiv_fin_ar_turn  indiv_fin_ca_turn  indiv_fin_fa_turn  indiv_fin_assets_turn  indiv_fin_op_income  indiv_fin_ebit

# 三、数据说明

In [32]:
# 生成列名说明表格(包含所有列,含时间和代码列)

print("=" * 150)
print("列名说明表格(含所有列)")
print("=" * 150)

# 常见指标的中英文说明字典
field_descriptions = {
    # 通用字段
    'trade_date': {'cn': '交易日期', 'en': 'Trade Date', 'unit': '日期(YYYYMMDD)'},
    'ts_code': {'cn': '证券代码', 'en': 'TS Code', 'unit': '代码'},
    'end_date': {'cn': '截止日期', 'en': 'End Date', 'unit': '日期'},
    'ann_date': {'cn': '公告日期', 'en': 'Announcement Date', 'unit': '日期'},
    'f_ann_date': {'cn': '实际公告日期', 'en': 'Actual Announcement Date', 'unit': '日期'},
    'quarter': {'cn': '季度', 'en': 'Quarter', 'unit': 'YYYYQ'},
    'month': {'cn': '月份', 'en': 'Month', 'unit': 'YYYYMM'},
    
    # 行情数据
    'open': {'cn': '开盘价', 'en': 'Open Price', 'unit': '元'},
    'high': {'cn': '最高价', 'en': 'High Price', 'unit': '元'},
    'low': {'cn': '最低价', 'en': 'Low Price', 'unit': '元'},
    'close': {'cn': '收盘价', 'en': 'Close Price', 'unit': '元'},
    'pre_close': {'cn': '前收盘价', 'en': 'Previous Close', 'unit': '元'},
    'change': {'cn': '涨跌额', 'en': 'Change', 'unit': '元'},
    'pct_chg': {'cn': '涨跌幅', 'en': 'Price Change Percentage', 'unit': '%'},
    'vol': {'cn': '成交量', 'en': 'Volume', 'unit': '手'},
    'amount': {'cn': '成交额', 'en': 'Amount', 'unit': '千元'},
    
    # 每日指标
    'turnover_rate': {'cn': '换手率', 'en': 'Turnover Rate', 'unit': '%'},
    'turnover_rate_f': {'cn': '换手率(自由流通股)', 'en': 'Turnover Rate (Free Float)', 'unit': '%'},
    'volume_ratio': {'cn': '量比', 'en': 'Volume Ratio', 'unit': '倍'},
    'pe': {'cn': '市盈率(总市值/净利润)', 'en': 'P/E Ratio', 'unit': '倍'},
    'pe_ttm': {'cn': '市盈率(TTM)', 'en': 'P/E Ratio (TTM)', 'unit': '倍'},
    'pb': {'cn': '市净率(总市值/净资产)', 'en': 'P/B Ratio', 'unit': '倍'},
    'ps': {'cn': '市销率', 'en': 'P/S Ratio', 'unit': '倍'},
    'ps_ttm': {'cn': '市销率(TTM)', 'en': 'P/S Ratio (TTM)', 'unit': '倍'},
    'dv_ratio': {'cn': '股息率', 'en': 'Dividend Yield', 'unit': '%'},
    'dv_ttm': {'cn': '股息率(TTM)', 'en': 'Dividend Yield (TTM)', 'unit': '%'},
    'total_share': {'cn': '总股本', 'en': 'Total Shares', 'unit': '万股'},
    'float_share': {'cn': '流通股本', 'en': 'Float Shares', 'unit': '万股'},
    'free_share': {'cn': '自由流通股本', 'en': 'Free Float Shares', 'unit': '万股'},
    'total_mv': {'cn': '总市值', 'en': 'Total Market Value', 'unit': '万元'},
    'circ_mv': {'cn': '流通市值', 'en': 'Circulating Market Value', 'unit': '万元'},
    
    # 财务指标
    'eps': {'cn': '基本每股收益', 'en': 'Earnings Per Share', 'unit': '元'},
    'dt_eps': {'cn': '稀释每股收益', 'en': 'Diluted EPS', 'unit': '元'},
    'total_revenue_ps': {'cn': '每股营业总收入', 'en': 'Revenue Per Share', 'unit': '元'},
    'revenue_ps': {'cn': '每股营业收入', 'en': 'Operating Revenue Per Share', 'unit': '元'},
    'capital_rese_ps': {'cn': '每股资本公积', 'en': 'Capital Reserve Per Share', 'unit': '元'},
    'surplus_rese_ps': {'cn': '每股盈余公积', 'en': 'Surplus Reserve Per Share', 'unit': '元'},
    'undist_profit_ps': {'cn': '每股未分配利润', 'en': 'Undistributed Profit Per Share', 'unit': '元'},
    'extra_item': {'cn': '非经常性损益', 'en': 'Extraordinary Items', 'unit': '元'},
    'profit_dedt': {'cn': '扣除非经常性损益后的净利润', 'en': 'Net Profit Excl. Non-recurring', 'unit': '元'},
    'gross_margin': {'cn': '毛利', 'en': 'Gross Margin', 'unit': '元'},
    'current_ratio': {'cn': '流动比率', 'en': 'Current Ratio', 'unit': ''},
    'quick_ratio': {'cn': '速动比率', 'en': 'Quick Ratio', 'unit': ''},
    'cash_ratio': {'cn': '保守速动比率', 'en': 'Cash Ratio', 'unit': ''},
    'ar_turn': {'cn': '应收账款周转率', 'en': 'AR Turnover', 'unit': '次'},
    'ca_turn': {'cn': '流动资产周转率', 'en': 'Current Asset Turnover', 'unit': '次'},
    'fa_turn': {'cn': '固定资产周转率', 'en': 'Fixed Asset Turnover', 'unit': '次'},
    'assets_turn': {'cn': '总资产周转率', 'en': 'Total Asset Turnover', 'unit': '次'},
    'op_income': {'cn': '经营活动净收益', 'en': 'Operating Income', 'unit': '元'},
    'valuechange_income': {'cn': '价值变动净收益', 'en': 'Fair Value Change Income', 'unit': '元'},
    'interst_income': {'cn': '利息费用', 'en': 'Interest Expense', 'unit': '元'},
    'daa': {'cn': '折旧与摊销', 'en': 'D&A', 'unit': '元'},
    'ebit': {'cn': '息税前利润', 'en': 'EBIT', 'unit': '元'},
    'ebitda': {'cn': '息税折旧摊销前利润', 'en': 'EBITDA', 'unit': '元'},
    'fcff': {'cn': '企业自由现金流量', 'en': 'FCFF', 'unit': '元'},
    'fcfe': {'cn': '股权自由现金流量', 'en': 'FCFE', 'unit': '元'},
    'debt_to_assets': {'cn': '资产负债率', 'en': 'Debt to Assets', 'unit': '%'},
    'assets_to_eqt': {'cn': '权益乘数', 'en': 'Equity Multiplier', 'unit': ''},
    'dp_assets_to_eqt': {'cn': '权益乘数(杜邦分析)', 'en': 'Equity Multiplier (DuPont)', 'unit': ''},
    'ca_to_assets': {'cn': '流动资产/总资产', 'en': 'Current Assets to Total Assets', 'unit': '%'},
    'nca_to_assets': {'cn': '非流动资产/总资产', 'en': 'Non-current Assets to Total Assets', 'unit': '%'},
    'tbassets_to_totalassets': {'cn': '有形资产/总资产', 'en': 'Tangible Assets to Total Assets', 'unit': '%'},
    'int_to_talcap': {'cn': '带息债务/全部投入资本', 'en': 'Interest-bearing Debt to Total Capital', 'unit': '%'},
    'eqt_to_talcapital': {'cn': '归属于母公司的股东权益/全部投入资本', 'en': 'Equity to Total Capital', 'unit': '%'},
    'currentdebt_to_debt': {'cn': '流动负债/负债合计', 'en': 'Current Debt to Total Debt', 'unit': '%'},
    'longdeb_to_debt': {'cn': '非流动负债/负债合计', 'en': 'Long-term Debt to Total Debt', 'unit': '%'},
    'ocf_to_shortdebt': {'cn': '经营活动产生的现金流量净额/流动负债', 'en': 'OCF to Short-term Debt', 'unit': ''},
    'debt_to_eqt': {'cn': '产权比率', 'en': 'Debt to Equity', 'unit': '%'},
    'eqt_to_debt': {'cn': '归属于母公司的股东权益/负债合计', 'en': 'Equity to Debt', 'unit': '%'},
    'eqt_to_interestdebt': {'cn': '归属于母公司的股东权益/带息债务', 'en': 'Equity to Interest-bearing Debt', 'unit': '%'},
    'tangibleasset_to_debt': {'cn': '有形资产/负债合计', 'en': 'Tangible Assets to Debt', 'unit': '%'},
    'tangasset_to_intdebt': {'cn': '有形资产/带息债务', 'en': 'Tangible Assets to Interest-bearing Debt', 'unit': '%'},
    'tangibleasset_to_netdebt': {'cn': '有形资产/净债务', 'en': 'Tangible Assets to Net Debt', 'unit': '%'},
    'ocf_to_debt': {'cn': '经营活动产生的现金流量净额/负债合计', 'en': 'OCF to Total Debt', 'unit': '%'},
    'ocf_to_interestdebt': {'cn': '经营活动产生的现金流量净额/带息债务', 'en': 'OCF to Interest-bearing Debt', 'unit': '%'},
    'ocf_to_netdebt': {'cn': '经营活动产生的现金流量净额/净债务', 'en': 'OCF to Net Debt', 'unit': '%'},
    'ebit_to_interest': {'cn': '已获利息倍数(EBIT/利息费用)', 'en': 'Interest Coverage Ratio', 'unit': '倍'},
    'longdebt_to_workingcapital': {'cn': '长期债务与营运资金比率', 'en': 'Long-term Debt to Working Capital', 'unit': '%'},
    'ebitda_to_debt': {'cn': 'EBITDA/负债合计', 'en': 'EBITDA to Total Debt', 'unit': ''},
    'turn_days': {'cn': '营业周期', 'en': 'Operating Cycle', 'unit': '天'},
    'roa_yearly': {'cn': '年化总资产净利率', 'en': 'Annualized ROA', 'unit': '%'},
    'roa_dp': {'cn': '总资产净利率(杜邦分析)', 'en': 'ROA (DuPont)', 'unit': '%'},
    'fixed_assets': {'cn': '固定资产合计', 'en': 'Fixed Assets', 'unit': '元'},
    'profit_prefin_exp': {'cn': '扣除财务费用前营业利润', 'en': 'Operating Profit Before Fin. Exp.', 'unit': '元'},
    'non_op_profit': {'cn': '非营业利润', 'en': 'Non-operating Profit', 'unit': '元'},
    'op_to_ebt': {'cn': '营业利润/利润总额', 'en': 'Operating Profit to EBT', 'unit': '%'},
    'nop_to_ebt': {'cn': '非营业利润/利润总额', 'en': 'Non-operating Profit to EBT', 'unit': '%'},
    'ocf_to_profit': {'cn': '经营活动产生的现金流量净额/营业利润', 'en': 'OCF to Operating Profit', 'unit': '%'},
    'cash_to_liqdebt': {'cn': '货币资金/流动负债', 'en': 'Cash to Current Liabilities', 'unit': '%'},
    'cash_to_liqdebt_withinterest': {'cn': '货币资金/带息流动负债', 'en': 'Cash to Interest-bearing Current Liabilities', 'unit': '%'},
    'op_to_liqdebt': {'cn': '营业利润/流动负债', 'en': 'Operating Profit to Current Liabilities', 'unit': '%'},
    'op_to_debt': {'cn': '营业利润/负债合计', 'en': 'Operating Profit to Total Debt', 'unit': '%'},
    'roic_yearly': {'cn': '年化投入资本回报率', 'en': 'Annualized ROIC', 'unit': '%'},
    'total_capital': {'cn': '全部投入资本', 'en': 'Total Capital', 'unit': '元'},
    'debt_to_revenue': {'cn': '带息债务/营业总收入', 'en': 'Interest-bearing Debt to Revenue', 'unit': '%'},
    'debt_to_ebitda': {'cn': '带息债务/EBITDA', 'en': 'Interest-bearing Debt to EBITDA', 'unit': ''},
    'op_of_gr': {'cn': '营业总收入同比增长率', 'en': 'Revenue Growth Rate', 'unit': '%'},
    'or_of_gr': {'cn': '营业收入同比增长率', 'en': 'Operating Revenue Growth Rate', 'unit': '%'},
    'profit_to_gr': {'cn': '利润总额同比增长率', 'en': 'EBT Growth Rate', 'unit': '%'},
    'netprofit_yoy': {'cn': '归属母公司股东的净利润同比增长率', 'en': 'Net Profit Growth Rate (YoY)', 'unit': '%'},
    'netprofit_margin': {'cn': '销售净利率', 'en': 'Net Profit Margin', 'unit': '%'},
    'grossprofit_margin': {'cn': '销售毛利率', 'en': 'Gross Profit Margin', 'unit': '%'},
    'cogs_of_sales': {'cn': '销售成本率', 'en': 'Cost of Sales Ratio', 'unit': '%'},
    'expense_of_sales': {'cn': '销售期间费用率', 'en': 'Operating Expense Ratio', 'unit': '%'},
    'profit_of_op': {'cn': '营业利润率', 'en': 'Operating Profit Margin', 'unit': '%'},
    'gc_of_gr': {'cn': '营业总成本/营业总收入', 'en': 'Total Operating Cost to Revenue', 'unit': '%'},
    'sale_expense': {'cn': '销售费用/营业总收入', 'en': 'Selling Expense Ratio', 'unit': '%'},
    'admin_expense_of_gr': {'cn': '管理费用/营业总收入', 'en': 'Admin Expense Ratio', 'unit': '%'},
    'fin_exp_of_gr': {'cn': '财务费用/营业总收入', 'en': 'Financial Expense Ratio', 'unit': '%'},
    'impai_ttm': {'cn': '资产减值损失/营业总收入', 'en': 'Impairment Loss Ratio', 'unit': '%'},
    'gc_of_gr_ttm': {'cn': '营业总成本/营业总收入(TTM)', 'en': 'Total Operating Cost to Revenue (TTM)', 'unit': '%'},
    'sale_expense_ttm': {'cn': '销售费用/营业总收入(TTM)', 'en': 'Selling Expense Ratio (TTM)', 'unit': '%'},
    'admin_expense_of_gr_ttm': {'cn': '管理费用/营业总收入(TTM)', 'en': 'Admin Expense Ratio (TTM)', 'unit': '%'},
    'fin_exp_of_gr_ttm': {'cn': '财务费用/营业总收入(TTM)', 'en': 'Financial Expense Ratio (TTM)', 'unit': '%'},
    'impai_of_gr_ttm': {'cn': '资产减值损失/营业总收入(TTM)', 'en': 'Impairment Loss Ratio (TTM)', 'unit': '%'},
    'roe': {'cn': '净资产收益率', 'en': 'Return on Equity', 'unit': '%'},
    'roe_waa': {'cn': '加权平均净资产收益率', 'en': 'Weighted Average ROE', 'unit': '%'},
    'roe_dt': {'cn': '净资产收益率(扣除非经常损益)', 'en': 'ROE (Excl. Non-recurring)', 'unit': '%'},
    'roa': {'cn': '总资产报酬率', 'en': 'Return on Assets', 'unit': '%'},
    'npta': {'cn': '总资产净利润', 'en': 'Net Profit on Total Assets', 'unit': '%'},
    'roic': {'cn': '投入资本回报率', 'en': 'Return on Invested Capital', 'unit': '%'},
    'roe_yearly': {'cn': '年化净资产收益率', 'en': 'Annualized ROE', 'unit': '%'},
    
    # 股东人数
    'holder_num': {'cn': '股东户数', 'en': 'Number of Shareholders', 'unit': '户'},
    
    # 筹码胜率
    'his_low': {'cn': '历史最低价', 'en': 'Historical Low', 'unit': '元'},
    'his_high': {'cn': '历史最高价', 'en': 'Historical High', 'unit': '元'},
    'cost_5pct': {'cn': '5分位成本', 'en': '5th Percentile Cost', 'unit': '元'},
    'cost_15pct': {'cn': '15分位成本', 'en': '15th Percentile Cost', 'unit': '元'},
    'cost_50pct': {'cn': '50分位成本', 'en': 'Median Cost', 'unit': '元'},
    'cost_85pct': {'cn': '85分位成本', 'en': '85th Percentile Cost', 'unit': '元'},
    'cost_95pct': {'cn': '95分位成本', 'en': '95th Percentile Cost', 'unit': '元'},
    'weight_avg': {'cn': '加权平均成本', 'en': 'Weighted Average Cost', 'unit': '元'},
    'winner_rate': {'cn': '胜率', 'en': 'Winner Rate', 'unit': '%'},
    
    # 技术因子
    'macd_dif': {'cn': 'MACD-DIF', 'en': 'MACD DIF', 'unit': ''},
    'macd_dea': {'cn': 'MACD-DEA', 'en': 'MACD DEA', 'unit': ''},
    'macd': {'cn': 'MACD', 'en': 'MACD', 'unit': ''},
    'kdj_k': {'cn': 'KDJ-K', 'en': 'KDJ K', 'unit': ''},
    'kdj_d': {'cn': 'KDJ-D', 'en': 'KDJ D', 'unit': ''},
    'kdj_j': {'cn': 'KDJ-J', 'en': 'KDJ J', 'unit': ''},
    'rsi_6': {'cn': 'RSI(6日)', 'en': 'RSI (6-day)', 'unit': ''},
    'rsi_12': {'cn': 'RSI(12日)', 'en': 'RSI (12-day)', 'unit': ''},
    'rsi_24': {'cn': 'RSI(24日)', 'en': 'RSI (24-day)', 'unit': ''},
    'boll_upper': {'cn': '布林上轨', 'en': 'Bollinger Upper Band', 'unit': '元'},
    'boll_mid': {'cn': '布林中轨', 'en': 'Bollinger Middle Band', 'unit': '元'},
    'boll_lower': {'cn': '布林下轨', 'en': 'Bollinger Lower Band', 'unit': '元'},
    'cci': {'cn': 'CCI指标', 'en': 'CCI', 'unit': ''},
    
    # 融资融券
    'rzye': {'cn': '融资余额', 'en': 'Margin Balance', 'unit': '元'},
    'rzmre': {'cn': '融资买入额', 'en': 'Margin Purchase', 'unit': '元'},
    'rzche': {'cn': '融资偿还额', 'en': 'Margin Repayment', 'unit': '元'},
    'rqye': {'cn': '融券余额', 'en': 'Short Balance', 'unit': '元'},
    'rqmcl': {'cn': '融券卖出量', 'en': 'Short Sale Volume', 'unit': '股'},
    'rqchl': {'cn': '融券偿还量', 'en': 'Short Repayment Volume', 'unit': '股'},
    'rqyl': {'cn': '融券余量', 'en': 'Outstanding Short Volume', 'unit': '股'},
    'rzrqye': {'cn': '融资融券余额', 'en': 'Margin + Short Balance', 'unit': '元'},
    
    # ETF/指数
    'nav': {'cn': '单位净值', 'en': 'Net Asset Value', 'unit': '元'},
    'accum_nav': {'cn': '累计净值', 'en': 'Accumulated NAV', 'unit': '元'},
    'adj_factor': {'cn': '复权因子', 'en': 'Adjustment Factor', 'unit': ''},
    
    # 资金流向
    'ggt_ss': {'cn': '港股通(沪)当日成交金额', 'en': 'HK Connect (SH) Turnover', 'unit': '亿元'},
    'ggt_sz': {'cn': '港股通(深)当日成交金额', 'en': 'HK Connect (SZ) Turnover', 'unit': '亿元'},
    'hgt': {'cn': '沪股通当日成交金额', 'en': 'SH Connect Turnover', 'unit': '亿元'},
    'sgt': {'cn': '深股通当日成交金额', 'en': 'SZ Connect Turnover', 'unit': '亿元'},
    'north_money': {'cn': '北向资金当日成交金额', 'en': 'Northbound Money', 'unit': '亿元'},
    'south_money': {'cn': '南向资金当日成交金额', 'en': 'Southbound Money', 'unit': '亿元'},
    
    # 宏观数据
    'on': {'cn': 'SHIBOR隔夜利率', 'en': 'SHIBOR Overnight', 'unit': '%'},
    '1w': {'cn': 'SHIBOR 1周利率', 'en': 'SHIBOR 1 Week', 'unit': '%'},
    '2w': {'cn': 'SHIBOR 2周利率', 'en': 'SHIBOR 2 Week', 'unit': '%'},
    '1m': {'cn': 'SHIBOR 1月利率', 'en': 'SHIBOR 1 Month', 'unit': '%'},
    '3m': {'cn': 'SHIBOR 3月利率', 'en': 'SHIBOR 3 Month', 'unit': '%'},
    '6m': {'cn': 'SHIBOR 6月利率', 'en': 'SHIBOR 6 Month', 'unit': '%'},
    '9m': {'cn': 'SHIBOR 9月利率', 'en': 'SHIBOR 9 Month', 'unit': '%'},
    '1y': {'cn': 'SHIBOR 1年利率/美债1年期收益率', 'en': 'SHIBOR 1 Year / US Treasury 1Y', 'unit': '%'},
    'gdp': {'cn': 'GDP', 'en': 'GDP', 'unit': '亿元'},
    'gdp_yoy': {'cn': 'GDP同比增长率', 'en': 'GDP Growth Rate (YoY)', 'unit': '%'},
    'pi': {'cn': '第一产业增加值', 'en': 'Primary Industry', 'unit': '亿元'},
    'pi_yoy': {'cn': '第一产业同比增长率', 'en': 'Primary Industry Growth (YoY)', 'unit': '%'},
    'si': {'cn': '第二产业增加值', 'en': 'Secondary Industry', 'unit': '亿元'},
    'si_yoy': {'cn': '第二产业同比增长率', 'en': 'Secondary Industry Growth (YoY)', 'unit': '%'},
    'ti': {'cn': '第三产业增加值', 'en': 'Tertiary Industry', 'unit': '亿元'},
    'ti_yoy': {'cn': '第三产业同比增长率', 'en': 'Tertiary Industry Growth (YoY)', 'unit': '%'},
    'nt_yoy': {'cn': '全国居民消费价格同比', 'en': 'CPI (YoY)', 'unit': '%'},
    'nt_mom': {'cn': '全国居民消费价格环比', 'en': 'CPI (MoM)', 'unit': '%'},
    'nt_accu': {'cn': '全国居民消费价格累计', 'en': 'CPI (Cumulative)', 'unit': '%'},
    'town_yoy': {'cn': '城市居民消费价格同比', 'en': 'Urban CPI (YoY)', 'unit': '%'},
    'town_mom': {'cn': '城市居民消费价格环比', 'en': 'Urban CPI (MoM)', 'unit': '%'},
    'town_accu': {'cn': '城市居民消费价格累计', 'en': 'Urban CPI (Cumulative)', 'unit': '%'},
    'cnt_yoy': {'cn': '农村居民消费价格同比', 'en': 'Rural CPI (YoY)', 'unit': '%'},
    'cnt_mom': {'cn': '农村居民消费价格环比', 'en': 'Rural CPI (MoM)', 'unit': '%'},
    'cnt_accu': {'cn': '农村居民消费价格累计', 'en': 'Rural CPI (Cumulative)', 'unit': '%'},
    'ppi_yoy': {'cn': 'PPI同比', 'en': 'PPI (YoY)', 'unit': '%'},
    'ppi_mom': {'cn': 'PPI环比', 'en': 'PPI (MoM)', 'unit': '%'},
    'ppi_accu': {'cn': 'PPI累计', 'en': 'PPI (Cumulative)', 'unit': '%'},
    'm0': {'cn': 'M0货币供应量', 'en': 'M0', 'unit': '亿元'},
    'm0_yoy': {'cn': 'M0同比增长率', 'en': 'M0 Growth (YoY)', 'unit': '%'},
    'm1': {'cn': 'M1货币供应量', 'en': 'M1', 'unit': '亿元'},
    'm1_yoy': {'cn': 'M1同比增长率', 'en': 'M1 Growth (YoY)', 'unit': '%'},
    'm2': {'cn': 'M2货币供应量', 'en': 'M2', 'unit': '亿元'},
    'm2_yoy': {'cn': 'M2同比增长率', 'en': 'M2 Growth (YoY)', 'unit': '%'},
    '2y': {'cn': '美债2年期收益率', 'en': 'US Treasury 2Y Yield', 'unit': '%'},
    '3y': {'cn': '美债3年期收益率', 'en': 'US Treasury 3Y Yield', 'unit': '%'},
    '5y': {'cn': '美债5年期收益率', 'en': 'US Treasury 5Y Yield', 'unit': '%'},
    '7y': {'cn': '美债7年期收益率', 'en': 'US Treasury 7Y Yield', 'unit': '%'},
    '10y': {'cn': '美债10年期收益率', 'en': 'US Treasury 10Y Yield', 'unit': '%'},
    '20y': {'cn': '美债20年期收益率', 'en': 'US Treasury 20Y Yield', 'unit': '%'},
    '30y': {'cn': '美债30年期收益率', 'en': 'US Treasury 30Y Yield', 'unit': '%'},
    
    # 每日指标
    'float_mv': {'cn': '流通市值(指数)', 'en': 'Float Market Value (Index)', 'unit': '亿元'},

    # 财务指标补充
    'adminexp_of_gr': {'cn': '管理费用与营业总收入之比', 'en': 'Admin Expense to Gross Revenue', 'unit': '%'},
    'assets_yoy': {'cn': '总资产同比增长率', 'en': 'Total Assets Growth (YoY)', 'unit': '%'},
    'basic_eps_yoy': {'cn': '基本每股收益同比增长率', 'en': 'Basic EPS Growth (YoY)', 'unit': '%'},
    'bps': {'cn': '每股净资产', 'en': 'Book Value Per Share', 'unit': '元'},
    'bps_yoy': {'cn': '每股净资产同比增长率', 'en': 'BPS Growth (YoY)', 'unit': '%'},
    'cfps': {'cn': '每股现金流量净额', 'en': 'Cash Flow Per Share', 'unit': '元'},
    'cfps_yoy': {'cn': '每股现金流量净额同比增长率', 'en': 'CFPS Growth (YoY)', 'unit': '%'},
    'current_exint': {'cn': '流动负债合计(带息)', 'en': 'Current Liabilities with Interest', 'unit': '元'},
    'diluted2_eps': {'cn': '稀释每股收益(摊薄)', 'en': 'Diluted EPS (Fully Diluted)', 'unit': '元'},
    'dt_eps_yoy': {'cn': '稀释每股收益同比增长率', 'en': 'Diluted EPS Growth (YoY)', 'unit': '%'},
    'dt_netprofit_yoy': {'cn': '扣非净利润同比增长率', 'en': 'Net Profit Excl. Non-recurring Growth (YoY)', 'unit': '%'},
    'ebit_of_gr': {'cn': 'EBIT与营业总收入之比', 'en': 'EBIT to Gross Revenue', 'unit': '%'},
    'ebit_ps': {'cn': '每股EBIT', 'en': 'EBIT Per Share', 'unit': '元'},
    'ebt_yoy': {'cn': '利润总额同比增长率', 'en': 'EBT Growth (YoY)', 'unit': '%'},
    'eqt_yoy': {'cn': '净资产同比增长率', 'en': 'Equity Growth (YoY)', 'unit': '%'},
    'equity_yoy': {'cn': '股东权益同比增长率', 'en': 'Shareholders Equity Growth (YoY)', 'unit': '%'},
    'fcfe_ps': {'cn': '每股股权自由现金流', 'en': 'FCFE Per Share', 'unit': '元'},
    'fcff_ps': {'cn': '每股企业自由现金流', 'en': 'FCFF Per Share', 'unit': '元'},
    'finaexp_of_gr': {'cn': '财务费用与营业总收入之比', 'en': 'Financial Expense to Gross Revenue', 'unit': '%'},
    'interestdebt': {'cn': '带息债务', 'en': 'Interest-bearing Debt', 'unit': '元'},
    'invest_capital': {'cn': '投入资本', 'en': 'Invested Capital', 'unit': '元'},
    'netdebt': {'cn': '净债务', 'en': 'Net Debt', 'unit': '元'},
    'networking_capital': {'cn': '营运资金', 'en': 'Working Capital', 'unit': '元'},
    'noncurrent_exint': {'cn': '非流动负债合计(带息)', 'en': 'Non-current Liabilities with Interest', 'unit': '元'},
    'ocf_yoy': {'cn': '经营活动现金流同比增长率', 'en': 'OCF Growth (YoY)', 'unit': '%'},
    'ocfps': {'cn': '每股经营活动现金流', 'en': 'Operating Cash Flow Per Share', 'unit': '元'},
    'op_yoy': {'cn': '营业利润同比增长率', 'en': 'Operating Profit Growth (YoY)', 'unit': '%'},
    'or_yoy': {'cn': '营业收入同比增长率', 'en': 'Operating Revenue Growth (YoY)', 'unit': '%'},
    'profit_to_op': {'cn': '利润总额与营业收入之比', 'en': 'Profit to Operating Revenue', 'unit': '%'},
    'q_dt_roe': {'cn': '单季度ROE(扣非)', 'en': 'Quarterly ROE (Excl. Non-recurring)', 'unit': '%'},
    'q_gc_to_gr': {'cn': '单季度营业总成本/营业总收入', 'en': 'Quarterly Total Cost to Revenue', 'unit': '%'},
    'q_npta': {'cn': '单季度总资产净利润', 'en': 'Quarterly Net Profit on Total Assets', 'unit': '%'},
    'q_ocf_to_sales': {'cn': '单季度经营现金流/营业收入', 'en': 'Quarterly OCF to Sales', 'unit': '%'},
    'q_op_qoq': {'cn': '营业利润环比增长率', 'en': 'Operating Profit Growth (QoQ)', 'unit': '%'},
    'q_roe': {'cn': '单季度ROE', 'en': 'Quarterly ROE', 'unit': '%'},
    'q_saleexp_to_gr': {'cn': '单季度销售费用/营业总收入', 'en': 'Quarterly Sales Expense to Revenue', 'unit': '%'},
    'q_sales_yoy': {'cn': '单季度营业收入同比增长率', 'en': 'Quarterly Sales Growth (YoY)', 'unit': '%'},
    'retained_earnings': {'cn': '留存收益', 'en': 'Retained Earnings', 'unit': '元'},
    'retainedps': {'cn': '每股留存收益', 'en': 'Retained Earnings Per Share', 'unit': '元'},
    'roa2_yearly': {'cn': '年化总资产报酬率ROA(2)', 'en': 'Annualized ROA (Method 2)', 'unit': '%'},
    'roe_yoy': {'cn': 'ROE同比增长率', 'en': 'ROE Growth (YoY)', 'unit': '%'},
    'saleexp_to_gr': {'cn': '销售费用与营业总收入之比', 'en': 'Sales Expense to Gross Revenue', 'unit': '%'},
    'tangible_asset': {'cn': '有形资产', 'en': 'Tangible Assets', 'unit': '元'},
    'tr_yoy': {'cn': '营业总收入同比增长率', 'en': 'Total Revenue Growth (YoY)', 'unit': '%'},
    'working_capital': {'cn': '营运资金', 'en': 'Working Capital', 'unit': '元'},

    # 技术指标补充(bfq=不复权)
    'asi_bfq': {'cn': 'ASI振动升降指标(不复权)', 'en': 'ASI (Non-adjusted)', 'unit': ''},
    'asit_bfq': {'cn': 'ASIT指标(不复权)', 'en': 'ASIT (Non-adjusted)', 'unit': ''},
    'atr_bfq': {'cn': 'ATR真实波幅(不复权)', 'en': 'ATR (Non-adjusted)', 'unit': '元'},
    'bbi_bfq': {'cn': 'BBI多空指数(不复权)', 'en': 'BBI (Non-adjusted)', 'unit': '元'},
    'bias1_bfq': {'cn': 'BIAS乖离率1(不复权)', 'en': 'BIAS 1 (Non-adjusted)', 'unit': '%'},
    'bias2_bfq': {'cn': 'BIAS乖离率2(不复权)', 'en': 'BIAS 2 (Non-adjusted)', 'unit': '%'},
    'bias3_bfq': {'cn': 'BIAS乖离率3(不复权)', 'en': 'BIAS 3 (Non-adjusted)', 'unit': '%'},
    'boll_lower_bfq': {'cn': '布林下轨(不复权)', 'en': 'Bollinger Lower (Non-adjusted)', 'unit': '元'},
    'boll_mid_bfq': {'cn': '布林中轨(不复权)', 'en': 'Bollinger Mid (Non-adjusted)', 'unit': '元'},
    'boll_upper_bfq': {'cn': '布林上轨(不复权)', 'en': 'Bollinger Upper (Non-adjusted)', 'unit': '元'},
    'brar_ar_bfq': {'cn': 'BRAR-AR(不复权)', 'en': 'BRAR-AR (Non-adjusted)', 'unit': ''},
    'brar_br_bfq': {'cn': 'BRAR-BR(不复权)', 'en': 'BRAR-BR (Non-adjusted)', 'unit': ''},
    'cci_bfq': {'cn': 'CCI顺势指标(不复权)', 'en': 'CCI (Non-adjusted)', 'unit': ''},
    'close_hfq': {'cn': '收盘价(后复权)', 'en': 'Close (Forward Adjusted)', 'unit': '元'},
    'close_qfq': {'cn': '收盘价(前复权)', 'en': 'Close (Backward Adjusted)', 'unit': '元'},
    'cr_bfq': {'cn': 'CR能量指标(不复权)', 'en': 'CR (Non-adjusted)', 'unit': ''},
    'dfma_dif_bfq': {'cn': 'DFMA-DIF(不复权)', 'en': 'DFMA-DIF (Non-adjusted)', 'unit': ''},
    'dfma_difma_bfq': {'cn': 'DFMA-DIFMA(不复权)', 'en': 'DFMA-DIFMA (Non-adjusted)', 'unit': ''},
    'dmi_adx_bfq': {'cn': 'DMI-ADX(不复权)', 'en': 'DMI-ADX (Non-adjusted)', 'unit': ''},
    'dmi_adxr_bfq': {'cn': 'DMI-ADXR(不复权)', 'en': 'DMI-ADXR (Non-adjusted)', 'unit': ''},
    'dmi_mdi_bfq': {'cn': 'DMI-MDI(不复权)', 'en': 'DMI-MDI (Non-adjusted)', 'unit': ''},
    'dmi_pdi_bfq': {'cn': 'DMI-PDI(不复权)', 'en': 'DMI-PDI (Non-adjusted)', 'unit': ''},
    'downdays': {'cn': '连续下跌天数', 'en': 'Consecutive Down Days', 'unit': '天'},
    'dpo_bfq': {'cn': 'DPO区间震荡线(不复权)', 'en': 'DPO (Non-adjusted)', 'unit': ''},
    'ema_bfq_10': {'cn': 'EMA10(不复权)', 'en': 'EMA 10 (Non-adjusted)', 'unit': '元'},
    'ema_bfq_20': {'cn': 'EMA20(不复权)', 'en': 'EMA 20 (Non-adjusted)', 'unit': '元'},
    'ema_bfq_250': {'cn': 'EMA250(不复权)', 'en': 'EMA 250 (Non-adjusted)', 'unit': '元'},
    'ema_bfq_30': {'cn': 'EMA30(不复权)', 'en': 'EMA 30 (Non-adjusted)', 'unit': '元'},
    'ema_bfq_5': {'cn': 'EMA5(不复权)', 'en': 'EMA 5 (Non-adjusted)', 'unit': '元'},
    'ema_bfq_60': {'cn': 'EMA60(不复权)', 'en': 'EMA 60 (Non-adjusted)', 'unit': '元'},
    'ema_bfq_90': {'cn': 'EMA90(不复权)', 'en': 'EMA 90 (Non-adjusted)', 'unit': '元'},
    'emv_bfq': {'cn': 'EMV简易波动指标(不复权)', 'en': 'EMV (Non-adjusted)', 'unit': ''},
    'expma_12_bfq': {'cn': 'EXPMA12(不复权)', 'en': 'EXPMA 12 (Non-adjusted)', 'unit': '元'},
    'expma_50_bfq': {'cn': 'EXPMA50(不复权)', 'en': 'EXPMA 50 (Non-adjusted)', 'unit': '元'},
    'high_hfq': {'cn': '最高价(后复权)', 'en': 'High (Forward Adjusted)', 'unit': '元'},
    'high_qfq': {'cn': '最高价(前复权)', 'en': 'High (Backward Adjusted)', 'unit': '元'},
    'kdj_bfq': {'cn': 'KDJ-J(不复权)', 'en': 'KDJ-J (Non-adjusted)', 'unit': ''},
    'kdj_d_bfq': {'cn': 'KDJ-D(不复权)', 'en': 'KDJ-D (Non-adjusted)', 'unit': ''},
    'kdj_k_bfq': {'cn': 'KDJ-K(不复权)', 'en': 'KDJ-K (Non-adjusted)', 'unit': ''},
    'ktn_down_bfq': {'cn': 'KTN下轨(不复权)', 'en': 'KTN Lower (Non-adjusted)', 'unit': '元'},
    'ktn_mid_bfq': {'cn': 'KTN中轨(不复权)', 'en': 'KTN Mid (Non-adjusted)', 'unit': '元'},
    'ktn_upper_bfq': {'cn': 'KTN上轨(不复权)', 'en': 'KTN Upper (Non-adjusted)', 'unit': '元'},
    'low_hfq': {'cn': '最低价(后复权)', 'en': 'Low (Forward Adjusted)', 'unit': '元'},
    'low_qfq': {'cn': '最低价(前复权)', 'en': 'Low (Backward Adjusted)', 'unit': '元'},
    'lowdays': {'cn': '连续创新低天数', 'en': 'Consecutive New Low Days', 'unit': '天'},
    'ma_bfq_10': {'cn': 'MA10(不复权)', 'en': 'MA 10 (Non-adjusted)', 'unit': '元'},
    'ma_bfq_20': {'cn': 'MA20(不复权)', 'en': 'MA 20 (Non-adjusted)', 'unit': '元'},
    'ma_bfq_250': {'cn': 'MA250(不复权)', 'en': 'MA 250 (Non-adjusted)', 'unit': '元'},
    'ma_bfq_30': {'cn': 'MA30(不复权)', 'en': 'MA 30 (Non-adjusted)', 'unit': '元'},
    'ma_bfq_5': {'cn': 'MA5(不复权)', 'en': 'MA 5 (Non-adjusted)', 'unit': '元'},
    'ma_bfq_60': {'cn': 'MA60(不复权)', 'en': 'MA 60 (Non-adjusted)', 'unit': '元'},
    'ma_bfq_90': {'cn': 'MA90(不复权)', 'en': 'MA 90 (Non-adjusted)', 'unit': '元'},
    'ma_mass_bfq': {'cn': 'MASS梅斯线MA(不复权)', 'en': 'MA of MASS (Non-adjusted)', 'unit': ''},
    'macd_bfq': {'cn': 'MACD(不复权)', 'en': 'MACD (Non-adjusted)', 'unit': ''},
    'macd_dea_bfq': {'cn': 'MACD-DEA(不复权)', 'en': 'MACD-DEA (Non-adjusted)', 'unit': ''},
    'macd_dif_bfq': {'cn': 'MACD-DIF(不复权)', 'en': 'MACD-DIF (Non-adjusted)', 'unit': ''},
    'madpo_bfq': {'cn': 'DPO均线(不复权)', 'en': 'MA of DPO (Non-adjusted)', 'unit': ''},
    'maemv_bfq': {'cn': 'EMV均线(不复权)', 'en': 'MA of EMV (Non-adjusted)', 'unit': ''},
    'maroc_bfq': {'cn': 'ROC均线(不复权)', 'en': 'MA of ROC (Non-adjusted)', 'unit': ''},
    'mass_bfq': {'cn': 'MASS梅斯线(不复权)', 'en': 'MASS (Non-adjusted)', 'unit': ''},
    'mfi_bfq': {'cn': 'MFI资金流量指标(不复权)', 'en': 'MFI (Non-adjusted)', 'unit': ''},
    'mtm_bfq': {'cn': 'MTM动量指标(不复权)', 'en': 'MTM (Non-adjusted)', 'unit': ''},
    'mtmma_bfq': {'cn': 'MTM均线(不复权)', 'en': 'MA of MTM (Non-adjusted)', 'unit': ''},
    'obv_bfq': {'cn': 'OBV能量潮(不复权)', 'en': 'OBV (Non-adjusted)', 'unit': ''},
    'open_hfq': {'cn': '开盘价(后复权)', 'en': 'Open (Forward Adjusted)', 'unit': '元'},
    'open_qfq': {'cn': '开盘价(前复权)', 'en': 'Open (Backward Adjusted)', 'unit': '元'},
    'pct_change': {'cn': '涨跌幅', 'en': 'Price Change %', 'unit': '%'},
    'pre_close_hfq': {'cn': '前收盘价(后复权)', 'en': 'Previous Close (Forward Adjusted)', 'unit': '元'},
    'pre_close_qfq': {'cn': '前收盘价(前复权)', 'en': 'Previous Close (Backward Adjusted)', 'unit': '元'},
    'psy_bfq': {'cn': 'PSY心理线(不复权)', 'en': 'PSY (Non-adjusted)', 'unit': '%'},
    'psyma_bfq': {'cn': 'PSY均线(不复权)', 'en': 'MA of PSY (Non-adjusted)', 'unit': '%'},
    'roc_bfq': {'cn': 'ROC变动率指标(不复权)', 'en': 'ROC (Non-adjusted)', 'unit': '%'},
    'rsi_bfq_12': {'cn': 'RSI12(不复权)', 'en': 'RSI 12 (Non-adjusted)', 'unit': ''},
    'rsi_bfq_24': {'cn': 'RSI24(不复权)', 'en': 'RSI 24 (Non-adjusted)', 'unit': ''},
    'rsi_bfq_6': {'cn': 'RSI6(不复权)', 'en': 'RSI 6 (Non-adjusted)', 'unit': ''},
    'taq_down_bfq': {'cn': 'TAQ下轨(不复权)', 'en': 'TAQ Lower (Non-adjusted)', 'unit': '元'},
    'taq_mid_bfq': {'cn': 'TAQ中轨(不复权)', 'en': 'TAQ Mid (Non-adjusted)', 'unit': '元'},
    'taq_up_bfq': {'cn': 'TAQ上轨(不复权)', 'en': 'TAQ Upper (Non-adjusted)', 'unit': '元'},
    'topdays': {'cn': '连续创新高天数', 'en': 'Consecutive New High Days', 'unit': '天'},
    'trix_bfq': {'cn': 'TRIX三重指数平滑均线(不复权)', 'en': 'TRIX (Non-adjusted)', 'unit': ''},
    'trma_bfq': {'cn': 'TRIX均线(不复权)', 'en': 'MA of TRIX (Non-adjusted)', 'unit': ''},
    'updays': {'cn': '连续上涨天数', 'en': 'Consecutive Up Days', 'unit': '天'},
    'vr_bfq': {'cn': 'VR容量比率(不复权)', 'en': 'VR (Non-adjusted)', 'unit': ''},
    'wr1_bfq': {'cn': 'WR威廉指标1(不复权)', 'en': 'WR 1 (Non-adjusted)', 'unit': '%'},
    'wr_bfq': {'cn': 'WR威廉指标(不复权)', 'en': 'WR (Non-adjusted)', 'unit': '%'},
    'xsii_td1_bfq': {'cn': 'XSII薛斯通道TD1(不复权)', 'en': 'XSII-TD1 (Non-adjusted)', 'unit': '元'},
    'xsii_td2_bfq': {'cn': 'XSII薛斯通道TD2(不复权)', 'en': 'XSII-TD2 (Non-adjusted)', 'unit': '元'},
    'xsii_td3_bfq': {'cn': 'XSII薛斯通道TD3(不复权)', 'en': 'XSII-TD3 (Non-adjusted)', 'unit': '元'},
    'xsii_td4_bfq': {'cn': 'XSII薛斯通道TD4(不复权)', 'en': 'XSII-TD4 (Non-adjusted)', 'unit': '元'},

    # 宏观指标补充
    'cnt_val': {'cn': '农村CPI定基指数', 'en': 'Rural CPI Index Value', 'unit': ''},
    'nt_val': {'cn': '全国CPI定基指数', 'en': 'National CPI Index Value', 'unit': ''},
    'town_val': {'cn': '城市CPI定基指数', 'en': 'Urban CPI Index Value', 'unit': ''},
    'swing': {'cn': '振幅', 'en': 'Swing', 'unit': '%'},
    'm0_mom': {'cn': 'M0环比增长率', 'en': 'M0 Growth (MoM)', 'unit': '%'},
    'm1_mom': {'cn': 'M1环比增长率', 'en': 'M1 Growth (MoM)', 'unit': '%'},
    'm2_mom': {'cn': 'M2环比增长率', 'en': 'M2 Growth (MoM)', 'unit': '%'},
    'ppi_cg_accu': {'cn': 'PPI:生产资料:累计', 'en': 'PPI Capital Goods (Cumulative)', 'unit': '%'},
    'ppi_cg_adu_accu': {'cn': 'PPI:生产资料:采掘:累计', 'en': 'PPI CG Mining (Cumulative)', 'unit': '%'},
    'ppi_cg_adu_mom': {'cn': 'PPI:生产资料:采掘:环比', 'en': 'PPI CG Mining (MoM)', 'unit': '%'},
    'ppi_cg_adu_yoy': {'cn': 'PPI:生产资料:采掘:同比', 'en': 'PPI CG Mining (YoY)', 'unit': '%'},
    'ppi_cg_c_accu': {'cn': 'PPI:生产资料:化工:累计', 'en': 'PPI CG Chemical (Cumulative)', 'unit': '%'},
    'ppi_cg_c_mom': {'cn': 'PPI:生产资料:化工:环比', 'en': 'PPI CG Chemical (MoM)', 'unit': '%'},
    'ppi_cg_c_yoy': {'cn': 'PPI:生产资料:化工:同比', 'en': 'PPI CG Chemical (YoY)', 'unit': '%'},
    'ppi_cg_dcg_accu': {'cn': 'PPI:生产资料:深加工:累计', 'en': 'PPI CG Deep Processing (Cumulative)', 'unit': '%'},
    'ppi_cg_dcg_mom': {'cn': 'PPI:生产资料:深加工:环比', 'en': 'PPI CG Deep Processing (MoM)', 'unit': '%'},
    'ppi_cg_dcg_yoy': {'cn': 'PPI:生产资料:深加工:同比', 'en': 'PPI CG Deep Processing (YoY)', 'unit': '%'},
    'ppi_cg_f_accu': {'cn': 'PPI:生产资料:燃料:累计', 'en': 'PPI CG Fuel (Cumulative)', 'unit': '%'},
    'ppi_cg_f_mom': {'cn': 'PPI:生产资料:燃料:环比', 'en': 'PPI CG Fuel (MoM)', 'unit': '%'},
    'ppi_cg_f_yoy': {'cn': 'PPI:生产资料:燃料:同比', 'en': 'PPI CG Fuel (YoY)', 'unit': '%'},
    'ppi_cg_mom': {'cn': 'PPI:生产资料:环比', 'en': 'PPI Capital Goods (MoM)', 'unit': '%'},
    'ppi_cg_yoy': {'cn': 'PPI:生产资料:同比', 'en': 'PPI Capital Goods (YoY)', 'unit': '%'},
    'ppi_mp_accu': {'cn': 'PPI:生活资料:累计', 'en': 'PPI Consumer Goods (Cumulative)', 'unit': '%'},
    'ppi_mp_mom': {'cn': 'PPI:生活资料:环比', 'en': 'PPI Consumer Goods (MoM)', 'unit': '%'},
    'ppi_mp_p_accu': {'cn': 'PPI:生活资料:一般日用品:累计', 'en': 'PPI CG General (Cumulative)', 'unit': '%'},
    'ppi_mp_p_mom': {'cn': 'PPI:生活资料:一般日用品:环比', 'en': 'PPI CG General (MoM)', 'unit': '%'},
    'ppi_mp_p_yoy': {'cn': 'PPI:生活资料:一般日用品:同比', 'en': 'PPI CG General (YoY)', 'unit': '%'},
    'ppi_mp_qm_accu': {'cn': 'PPI:生活资料:衣着:累计', 'en': 'PPI CG Clothing (Cumulative)', 'unit': '%'},
    'ppi_mp_qm_mom': {'cn': 'PPI:生活资料:衣着:环比', 'en': 'PPI CG Clothing (MoM)', 'unit': '%'},
    'ppi_mp_qm_yoy': {'cn': 'PPI:生活资料:衣着:同比', 'en': 'PPI CG Clothing (YoY)', 'unit': '%'},
    'ppi_mp_rm_accu': {'cn': 'PPI:生活资料:耐用消费品:累计', 'en': 'PPI CG Durable (Cumulative)', 'unit': '%'},
    'ppi_mp_rm_mom': {'cn': 'PPI:生活资料:耐用消费品:环比', 'en': 'PPI CG Durable (MoM)', 'unit': '%'},
    'ppi_mp_rm_yoy': {'cn': 'PPI:生活资料:耐用消费品:同比', 'en': 'PPI CG Durable (YoY)', 'unit': '%'},
    'ppi_mp_yoy': {'cn': 'PPI:生活资料:同比', 'en': 'PPI Consumer Goods (YoY)', 'unit': '%'},
    'inc_cumval': {'cn': '社融增量累计值', 'en': 'Social Financing Incremental Cumulative', 'unit': '亿元'},
    'inc_month': {'cn': '社融月度增量', 'en': 'Social Financing Monthly Increment', 'unit': '亿元'},
    'stk_endval': {'cn': '社融存量', 'en': 'Social Financing Stock', 'unit': '亿元'},
    'date': {'cn': '日期', 'en': 'Date', 'unit': '日期(YYYYMMDD)'},
    'y10': {'cn': '美债10年期收益率', 'en': 'US Treasury 10Y Yield', 'unit': '%'},
    'y20': {'cn': '美债20年期收益率', 'en': 'US Treasury 20Y Yield', 'unit': '%'},
    'y30': {'cn': '美债30年期收益率', 'en': 'US Treasury 30Y Yield', 'unit': '%'},
    'y5': {'cn': '美债5年期收益率', 'en': 'US Treasury 5Y Yield', 'unit': '%'},
    'y7': {'cn': '美债7年期收益率', 'en': 'US Treasury 7Y Yield', 'unit': '%'},
    'exchange_id': {'cn': '交易所代码', 'en': 'Exchange ID', 'unit': '代码'},
}

# 存储所有列的信息
column_info = []

# 定义接口信息字典
api_info = {
    'pro_bar': 'https://tushare.pro/document/2?doc_id=109',
    'daily_basic': 'https://tushare.pro/document/2?doc_id=32',
    'fina_indicator': 'https://tushare.pro/document/2?doc_id=79',
    'stk_holdernumber': 'https://tushare.pro/document/2?doc_id=166',
    'cyq_perf': 'https://tushare.pro/document/2?doc_id=293',
    'stk_factor': 'https://tushare.pro/document/2?doc_id=328',
    'margin_detail': 'https://tushare.pro/document/2?doc_id=59',
    'fund_daily': 'https://tushare.pro/document/2?doc_id=127',
    'fund_adj': 'https://tushare.pro/document/2?doc_id=199',
    'index_daily': 'https://tushare.pro/document/2?doc_id=95',
    'index_dailybasic': 'https://tushare.pro/document/2?doc_id=128',
    'idx_factor_pro': 'https://tushare.pro/document/2?doc_id=358',
    'moneyflow_hsgt': 'https://tushare.pro/document/2?doc_id=47',
    'margin': 'https://tushare.pro/document/2?doc_id=58',
    'shibor': 'https://tushare.pro/document/2?doc_id=149',
    'cn_gdp': 'https://tushare.pro/document/2?doc_id=227',
    'cn_cpi': 'https://tushare.pro/document/2?doc_id=228',
    'cn_ppi': 'https://tushare.pro/document/2?doc_id=245',
    'cn_m': 'https://tushare.pro/document/2?doc_id=242',
    'sf_month': 'https://tushare.pro/document/2?doc_id=310',
    'us_trycr': 'https://tushare.pro/document/2?doc_id=219',
    'index_global': 'https://tushare.pro/document/2?doc_id=211'
}

def get_field_description(field_name):
    """获取字段的中英文说明"""
    if field_name in field_descriptions:
        return (field_descriptions[field_name]['cn'], 
                field_descriptions[field_name]['en'], 
                field_descriptions[field_name]['unit'])
    else:
        return ('待补充', '待补充', '待补充')

def add_columns_to_info(df, prefix, level1, level2, api_name):
    """将DataFrame的所有列添加到column_info列表中"""
    for col in df.columns:
        # 提取原始字段名(去除前缀)
        if col.startswith(prefix):
            original_field = col[len(prefix):]
        else:
            original_field = col
        
        cn, en, unit = get_field_description(original_field)
        column_info.append([col, level1, level2, api_info[api_name], original_field, cn, en, unit])

# 收集所有数据源的列信息
print("\n正在收集所有数据源的列信息...")

# 1. 个股数据
stocks = ['600036.SH']
print("  - 个股数据...")

# 1.1 复权行情
time.sleep(0.5)
df = ts.pro_bar(ts_code=stocks[0], adj='hfq', start_date=start_date, end_date=end_date, limit=1)
add_columns_to_info(df, 'indiv_d_', 'indiv', 'd', 'pro_bar')

# 1.2 每日指标
time.sleep(0.5)
df = pro.daily_basic(ts_code=stocks[0], start_date=start_date, end_date=end_date, limit=1)
add_columns_to_info(df, 'indiv_din_', 'indiv', 'din', 'daily_basic')

# 1.3 财务指标
time.sleep(0.5)
df = pro.fina_indicator(ts_code=stocks[0], start_date=start_date, end_date=end_date, limit=1)
add_columns_to_info(df, 'indiv_fin_', 'indiv', 'fin', 'fina_indicator')

# 1.4 股东人数
time.sleep(0.5)
df = pro.stk_holdernumber(ts_code=stocks[0], start_date=start_date, end_date=end_date, limit=1)
add_columns_to_info(df, 'indiv_sh_', 'indiv', 'sh', 'stk_holdernumber')

# 1.5 筹码胜率
time.sleep(0.5)
df = pro.cyq_perf(ts_code=stocks[0], start_date=start_date, end_date=end_date, limit=1)
add_columns_to_info(df, 'indiv_chip_', 'indiv', 'chip', 'cyq_perf')

# 1.6 技术因子
time.sleep(0.5)
df = pro.stk_factor(ts_code=stocks[0], start_date=start_date, end_date=end_date, limit=1)
add_columns_to_info(df, 'indiv_techin_', 'indiv', 'techin', 'stk_factor')

# 1.7 融资融券
time.sleep(0.5)
df = pro.margin_detail(ts_code=stocks[0], start_date=start_date, end_date=end_date, limit=1)
add_columns_to_info(df, 'indiv_margin_', 'indiv', 'margin', 'margin_detail')

# 2. ETF数据
print("  - ETF数据...")
etfs = ['510300.SH']
indices = ['000300.SH']

# 2.1 ETF日线
time.sleep(0.5)
df = pro.fund_daily(ts_code=etfs[0], start_date=start_date, end_date=end_date, limit=1)
add_columns_to_info(df, 'indiv_d_', 'indiv', 'd', 'fund_daily')

# 2.2 复权因子
time.sleep(0.5)
df = pro.fund_adj(ts_code=etfs[0], start_date=start_date, end_date=end_date, limit=1)
add_columns_to_info(df, 'indiv_d_', 'indiv', 'd', 'fund_adj')

# 2.3 指数日线
time.sleep(0.5)
df = pro.index_daily(ts_code=indices[0], start_date=start_date, end_date=end_date, limit=1)
add_columns_to_info(df, 'indiv_ind_', 'indiv', 'ind', 'index_daily')

# 2.4 指数每日指标
time.sleep(0.5)
df = pro.index_dailybasic(ts_code=indices[0], start_date=start_date, end_date=end_date, limit=1)
add_columns_to_info(df, 'indiv_din_', 'indiv', 'din', 'index_dailybasic')

# 2.5 指数技术因子
time.sleep(0.5)
df = pro.idx_factor_pro(ts_code=indices[0], start_date=start_date, end_date=end_date, limit=1)
add_columns_to_info(df, 'indiv_techin_', 'indiv', 'techin', 'idx_factor_pro')

# 3. 大盘数据
print("  - 大盘数据...")
market_indices = ['000001.SH']

# 3.1 大盘指数每日指标
time.sleep(0.5)
df = pro.index_dailybasic(ts_code=market_indices[0], start_date=start_date, end_date=end_date, limit=1)
add_columns_to_info(df, 'market_din_', 'market', 'din', 'index_dailybasic')

# 3.2 资金流向
time.sleep(0.5)
df = pro.moneyflow_hsgt(start_date=start_date, end_date=end_date, limit=1)
add_columns_to_info(df, 'market_flow_', 'market', 'flow', 'moneyflow_hsgt')

# 3.3 融资融券汇总
time.sleep(0.5)
df = pro.margin(start_date=start_date, end_date=end_date, limit=1)
add_columns_to_info(df, 'market_margin_', 'market', 'margin', 'margin')

# 4. 宏观数据
print("  - 宏观数据...")

# 4.1 SHIBOR
time.sleep(0.5)
df = pro.shibor(start_date=start_date, end_date=end_date, limit=1)
add_columns_to_info(df, 'macro_shibor_', 'macro', 'shibor', 'shibor')

# 4.2 GDP
start_q = start_date[:4] + '1'
end_q = end_date[:4] + '4'
time.sleep(0.5)
df = pro.cn_gdp(start_q=start_q, end_q=end_q, limit=1)
add_columns_to_info(df, 'macro_gdp_', 'macro', 'gdp', 'cn_gdp')

# 4.3 CPI
start_m = start_date[:6]
end_m = end_date[:6]
time.sleep(0.5)
df = pro.cn_cpi(start_m=start_m, end_m=end_m, limit=1)
add_columns_to_info(df, 'macro_cpi_', 'macro', 'cpi', 'cn_cpi')

# 4.4 PPI
time.sleep(0.5)
df = pro.cn_ppi(start_m=start_m, end_m=end_m, limit=1)
add_columns_to_info(df, 'macro_ppi_', 'macro', 'ppi', 'cn_ppi')

# 4.5 货币供应
time.sleep(0.5)
df = pro.cn_m(start_m=start_m, end_m=end_m, limit=1)
add_columns_to_info(df, 'macro_m_', 'macro', 'm', 'cn_m')

# 4.6 社融
time.sleep(0.5)
df = pro.sf_month(start_m=start_m, end_m=end_m, limit=1)
add_columns_to_info(df, 'macro_sf_', 'macro', 'sf', 'sf_month')

# 4.7 美债收益率
time.sleep(0.5)
df = pro.us_trycr(start_date=start_date, end_date=end_date, limit=1)
add_columns_to_info(df, 'macro_ust_', 'macro', 'ust', 'us_trycr')

# 4.8 国际指数
international_indices = ['SPX']
time.sleep(0.5)
df = pro.index_global(ts_code=international_indices[0], start_date=start_date, end_date=end_date, limit=1)
add_columns_to_info(df, 'macro_im_', 'macro', 'im', 'index_global')

# 创建DataFrame并输出
column_df = pd.DataFrame(column_info, columns=['当前列名', '一级来源', '二级分类', 'tushare接口链接', '字段名', '中文含义', '英文含义', '单位'])

# # 去重
# column_df = column_df.drop_duplicates(subset=['当前列名'], keep='first')

# # 排序
# column_df = column_df.sort_values(['一级来源', '二级分类', '当前列名']).reset_index(drop=True)

print(f"\n共收集到 {len(column_df)} 个不同的列")
print(f"其中已补充说明 {len(column_df[column_df['中文含义'] != '待补充'])} 个，待补充 {len(column_df[column_df['中文含义'] == '待补充'])} 个")

# 保存到CSV
if not os.path.exists('data'):
    os.makedirs('data')
column_df.to_csv('data/column_description.csv', index=False, encoding='utf-8-sig')
print(f"\n列名说明表格已保存到 data/column_description.csv")

# 显示前30行(包含所有类型的列)
print("\n" + "=" * 150)
print("列名说明表格预览（前30行）:")
print("=" * 150)
print(column_df.head(30).to_string(index=False))

# 显示待补充的列
print("\n" + "=" * 150)
print("待补充说明的列（前20个）:")
print("=" * 150)
pending_df = column_df[column_df['中文含义'] == '待补充'].head(20)
print(pending_df[['当前列名', '字段名', 'tushare接口链接']].to_string(index=False))

# 按一级来源统计
print("\n" + "=" * 150)
print("按一级来源统计:")
print("=" * 150)
stats = column_df.groupby('一级来源').agg({
    '当前列名': 'count',
    '中文含义': lambda x: (x != '待补充').sum()
}).rename(columns={'当前列名': '总列数', '中文含义': '已补充说明'})
stats['待补充'] = stats['总列数'] - stats['已补充说明']
print(stats)

# 按二级分类统计
print("\n" + "=" * 150)
print("按二级分类详细统计:")
print("=" * 150)
stats2 = column_df.groupby(['一级来源', '二级分类']).agg({
    '当前列名': 'count',
    '中文含义': lambda x: (x != '待补充').sum()
}).rename(columns={'当前列名': '总列数', '中文含义': '已补充说明'})
stats2['待补充'] = stats2['总列数'] - stats2['已补充说明']
print(stats2)

列名说明表格(含所有列)

正在收集所有数据源的列信息...
  - 个股数据...


  data['adj_factor'] = data['adj_factor'].fillna(method='bfill')


  - ETF数据...
  - 大盘数据...
  - 宏观数据...

共收集到 444 个不同的列
其中已补充说明 444 个，待补充 0 个

列名说明表格已保存到 data/column_description.csv

列名说明表格预览（前30行）:
           当前列名  一级来源 二级分类                               tushare接口链接             字段名         中文含义                       英文含义           单位
        ts_code indiv    d https://tushare.pro/document/2?doc_id=109         ts_code         证券代码                    TS Code           代码
     trade_date indiv    d https://tushare.pro/document/2?doc_id=109      trade_date         交易日期                 Trade Date 日期(YYYYMMDD)
           open indiv    d https://tushare.pro/document/2?doc_id=109            open          开盘价                 Open Price            元
           high indiv    d https://tushare.pro/document/2?doc_id=109            high          最高价                 High Price            元
            low indiv    d https://tushare.pro/document/2?doc_id=109             low          最低价                  Low Price            元
          close indiv    d https://tusha

In [33]:
# 生成列名说明表格(从最终生成的文件中读取列名)

print("=" * 150)
print("列名说明表格(从实际生成的数据文件中提取)")
print("=" * 150)

# 常见指标的中英文说明字典
field_descriptions = {
    # 通用字段
    'trade_date': {'cn': '交易日期', 'en': 'Trade Date', 'unit': '日期(YYYYMMDD)'},
    'ts_code': {'cn': '证券代码', 'en': 'TS Code', 'unit': '代码'},
    'end_date': {'cn': '截止日期', 'en': 'End Date', 'unit': '日期'},
    'ann_date': {'cn': '公告日期', 'en': 'Announcement Date', 'unit': '日期'},
    'f_ann_date': {'cn': '实际公告日期', 'en': 'Actual Announcement Date', 'unit': '日期'},
    'quarter': {'cn': '季度', 'en': 'Quarter', 'unit': 'YYYYQ'},
    'month': {'cn': '月份', 'en': 'Month', 'unit': 'YYYYMM'},
    
    # 行情数据
    'open': {'cn': '开盘价', 'en': 'Open Price', 'unit': '元'},
    'high': {'cn': '最高价', 'en': 'High Price', 'unit': '元'},
    'low': {'cn': '最低价', 'en': 'Low Price', 'unit': '元'},
    'close': {'cn': '收盘价', 'en': 'Close Price', 'unit': '元'},
    'pre_close': {'cn': '前收盘价', 'en': 'Previous Close', 'unit': '元'},
    'change': {'cn': '涨跌额', 'en': 'Change', 'unit': '元'},
    'pct_chg': {'cn': '涨跌幅', 'en': 'Price Change Percentage', 'unit': '%'},
    'vol': {'cn': '成交量', 'en': 'Volume', 'unit': '手'},
    'amount': {'cn': '成交额', 'en': 'Amount', 'unit': '千元'},
    
    # 每日指标
    'turnover_rate': {'cn': '换手率', 'en': 'Turnover Rate', 'unit': '%'},
    'turnover_rate_f': {'cn': '换手率(自由流通股)', 'en': 'Turnover Rate (Free Float)', 'unit': '%'},
    'volume_ratio': {'cn': '量比', 'en': 'Volume Ratio', 'unit': '倍'},
    'pe': {'cn': '市盈率(总市值/净利润)', 'en': 'P/E Ratio', 'unit': '倍'},
    'pe_ttm': {'cn': '市盈率(TTM)', 'en': 'P/E Ratio (TTM)', 'unit': '倍'},
    'pb': {'cn': '市净率(总市值/净资产)', 'en': 'P/B Ratio', 'unit': '倍'},
    'ps': {'cn': '市销率', 'en': 'P/S Ratio', 'unit': '倍'},
    'ps_ttm': {'cn': '市销率(TTM)', 'en': 'P/S Ratio (TTM)', 'unit': '倍'},
    'dv_ratio': {'cn': '股息率', 'en': 'Dividend Yield', 'unit': '%'},
    'dv_ttm': {'cn': '股息率(TTM)', 'en': 'Dividend Yield (TTM)', 'unit': '%'},
    'total_share': {'cn': '总股本', 'en': 'Total Shares', 'unit': '万股'},
    'float_share': {'cn': '流通股本', 'en': 'Float Shares', 'unit': '万股'},
    'free_share': {'cn': '自由流通股本', 'en': 'Free Float Shares', 'unit': '万股'},
    'total_mv': {'cn': '总市值', 'en': 'Total Market Value', 'unit': '万元'},
    'circ_mv': {'cn': '流通市值', 'en': 'Circulating Market Value', 'unit': '万元'},
    
    # 财务指标
    'eps': {'cn': '基本每股收益', 'en': 'Earnings Per Share', 'unit': '元'},
    'dt_eps': {'cn': '稀释每股收益', 'en': 'Diluted EPS', 'unit': '元'},
    'total_revenue_ps': {'cn': '每股营业总收入', 'en': 'Revenue Per Share', 'unit': '元'},
    'revenue_ps': {'cn': '每股营业收入', 'en': 'Operating Revenue Per Share', 'unit': '元'},
    'capital_rese_ps': {'cn': '每股资本公积', 'en': 'Capital Reserve Per Share', 'unit': '元'},
    'surplus_rese_ps': {'cn': '每股盈余公积', 'en': 'Surplus Reserve Per Share', 'unit': '元'},
    'undist_profit_ps': {'cn': '每股未分配利润', 'en': 'Undistributed Profit Per Share', 'unit': '元'},
    'extra_item': {'cn': '非经常性损益', 'en': 'Extraordinary Items', 'unit': '元'},
    'profit_dedt': {'cn': '扣除非经常性损益后的净利润', 'en': 'Net Profit Excl. Non-recurring', 'unit': '元'},
    'gross_margin': {'cn': '毛利', 'en': 'Gross Margin', 'unit': '元'},
    'current_ratio': {'cn': '流动比率', 'en': 'Current Ratio', 'unit': ''},
    'quick_ratio': {'cn': '速动比率', 'en': 'Quick Ratio', 'unit': ''},
    'cash_ratio': {'cn': '保守速动比率', 'en': 'Cash Ratio', 'unit': ''},
    'ar_turn': {'cn': '应收账款周转率', 'en': 'AR Turnover', 'unit': '次'},
    'ca_turn': {'cn': '流动资产周转率', 'en': 'Current Asset Turnover', 'unit': '次'},
    'fa_turn': {'cn': '固定资产周转率', 'en': 'Fixed Asset Turnover', 'unit': '次'},
    'assets_turn': {'cn': '总资产周转率', 'en': 'Total Asset Turnover', 'unit': '次'},
    'op_income': {'cn': '经营活动净收益', 'en': 'Operating Income', 'unit': '元'},
    'valuechange_income': {'cn': '价值变动净收益', 'en': 'Fair Value Change Income', 'unit': '元'},
    'interst_income': {'cn': '利息费用', 'en': 'Interest Expense', 'unit': '元'},
    'daa': {'cn': '折旧与摊销', 'en': 'D&A', 'unit': '元'},
    'ebit': {'cn': '息税前利润', 'en': 'EBIT', 'unit': '元'},
    'ebitda': {'cn': '息税折旧摊销前利润', 'en': 'EBITDA', 'unit': '元'},
    'fcff': {'cn': '企业自由现金流量', 'en': 'FCFF', 'unit': '元'},
    'fcfe': {'cn': '股权自由现金流量', 'en': 'FCFE', 'unit': '元'},
    'debt_to_assets': {'cn': '资产负债率', 'en': 'Debt to Assets', 'unit': '%'},
    'assets_to_eqt': {'cn': '权益乘数', 'en': 'Equity Multiplier', 'unit': ''},
    'dp_assets_to_eqt': {'cn': '权益乘数(杜邦分析)', 'en': 'Equity Multiplier (DuPont)', 'unit': ''},
    'ca_to_assets': {'cn': '流动资产/总资产', 'en': 'Current Assets to Total Assets', 'unit': '%'},
    'nca_to_assets': {'cn': '非流动资产/总资产', 'en': 'Non-current Assets to Total Assets', 'unit': '%'},
    'tbassets_to_totalassets': {'cn': '有形资产/总资产', 'en': 'Tangible Assets to Total Assets', 'unit': '%'},
    'int_to_talcap': {'cn': '带息债务/全部投入资本', 'en': 'Interest-bearing Debt to Total Capital', 'unit': '%'},
    'eqt_to_talcapital': {'cn': '归属于母公司的股东权益/全部投入资本', 'en': 'Equity to Total Capital', 'unit': '%'},
    'currentdebt_to_debt': {'cn': '流动负债/负债合计', 'en': 'Current Debt to Total Debt', 'unit': '%'},
    'longdeb_to_debt': {'cn': '非流动负债/负债合计', 'en': 'Long-term Debt to Total Debt', 'unit': '%'},
    'ocf_to_shortdebt': {'cn': '经营活动产生的现金流量净额/流动负债', 'en': 'OCF to Short-term Debt', 'unit': ''},
    'debt_to_eqt': {'cn': '产权比率', 'en': 'Debt to Equity', 'unit': '%'},
    'eqt_to_debt': {'cn': '归属于母公司的股东权益/负债合计', 'en': 'Equity to Debt', 'unit': '%'},
    'eqt_to_interestdebt': {'cn': '归属于母公司的股东权益/带息债务', 'en': 'Equity to Interest-bearing Debt', 'unit': '%'},
    'tangibleasset_to_debt': {'cn': '有形资产/负债合计', 'en': 'Tangible Assets to Debt', 'unit': '%'},
    'tangasset_to_intdebt': {'cn': '有形资产/带息债务', 'en': 'Tangible Assets to Interest-bearing Debt', 'unit': '%'},
    'tangibleasset_to_netdebt': {'cn': '有形资产/净债务', 'en': 'Tangible Assets to Net Debt', 'unit': '%'},
    'ocf_to_debt': {'cn': '经营活动产生的现金流量净额/负债合计', 'en': 'OCF to Total Debt', 'unit': '%'},
    'ocf_to_interestdebt': {'cn': '经营活动产生的现金流量净额/带息债务', 'en': 'OCF to Interest-bearing Debt', 'unit': '%'},
    'ocf_to_netdebt': {'cn': '经营活动产生的现金流量净额/净债务', 'en': 'OCF to Net Debt', 'unit': '%'},
    'ebit_to_interest': {'cn': '已获利息倍数(EBIT/利息费用)', 'en': 'Interest Coverage Ratio', 'unit': '倍'},
    'longdebt_to_workingcapital': {'cn': '长期债务与营运资金比率', 'en': 'Long-term Debt to Working Capital', 'unit': '%'},
    'ebitda_to_debt': {'cn': 'EBITDA/负债合计', 'en': 'EBITDA to Total Debt', 'unit': ''},
    'turn_days': {'cn': '营业周期', 'en': 'Operating Cycle', 'unit': '天'},
    'roa_yearly': {'cn': '年化总资产净利率', 'en': 'Annualized ROA', 'unit': '%'},
    'roa_dp': {'cn': '总资产净利率(杜邦分析)', 'en': 'ROA (DuPont)', 'unit': '%'},
    'fixed_assets': {'cn': '固定资产合计', 'en': 'Fixed Assets', 'unit': '元'},
    'profit_prefin_exp': {'cn': '扣除财务费用前营业利润', 'en': 'Operating Profit Before Fin. Exp.', 'unit': '元'},
    'non_op_profit': {'cn': '非营业利润', 'en': 'Non-operating Profit', 'unit': '元'},
    'op_to_ebt': {'cn': '营业利润/利润总额', 'en': 'Operating Profit to EBT', 'unit': '%'},
    'nop_to_ebt': {'cn': '非营业利润/利润总额', 'en': 'Non-operating Profit to EBT', 'unit': '%'},
    'ocf_to_profit': {'cn': '经营活动产生的现金流量净额/营业利润', 'en': 'OCF to Operating Profit', 'unit': '%'},
    'cash_to_liqdebt': {'cn': '货币资金/流动负债', 'en': 'Cash to Current Liabilities', 'unit': '%'},
    'cash_to_liqdebt_withinterest': {'cn': '货币资金/带息流动负债', 'en': 'Cash to Interest-bearing Current Liabilities', 'unit': '%'},
    'op_to_liqdebt': {'cn': '营业利润/流动负债', 'en': 'Operating Profit to Current Liabilities', 'unit': '%'},
    'op_to_debt': {'cn': '营业利润/负债合计', 'en': 'Operating Profit to Total Debt', 'unit': '%'},
    'roic_yearly': {'cn': '年化投入资本回报率', 'en': 'Annualized ROIC', 'unit': '%'},
    'total_capital': {'cn': '全部投入资本', 'en': 'Total Capital', 'unit': '元'},
    'debt_to_revenue': {'cn': '带息债务/营业总收入', 'en': 'Interest-bearing Debt to Revenue', 'unit': '%'},
    'debt_to_ebitda': {'cn': '带息债务/EBITDA', 'en': 'Interest-bearing Debt to EBITDA', 'unit': ''},
    'op_of_gr': {'cn': '营业总收入同比增长率', 'en': 'Revenue Growth Rate', 'unit': '%'},
    'or_of_gr': {'cn': '营业收入同比增长率', 'en': 'Operating Revenue Growth Rate', 'unit': '%'},
    'profit_to_gr': {'cn': '利润总额同比增长率', 'en': 'EBT Growth Rate', 'unit': '%'},
    'netprofit_yoy': {'cn': '归属母公司股东的净利润同比增长率', 'en': 'Net Profit Growth Rate (YoY)', 'unit': '%'},
    'netprofit_margin': {'cn': '销售净利率', 'en': 'Net Profit Margin', 'unit': '%'},
    'grossprofit_margin': {'cn': '销售毛利率', 'en': 'Gross Profit Margin', 'unit': '%'},
    'cogs_of_sales': {'cn': '销售成本率', 'en': 'Cost of Sales Ratio', 'unit': '%'},
    'expense_of_sales': {'cn': '销售期间费用率', 'en': 'Operating Expense Ratio', 'unit': '%'},
    'profit_of_op': {'cn': '营业利润率', 'en': 'Operating Profit Margin', 'unit': '%'},
    'gc_of_gr': {'cn': '营业总成本/营业总收入', 'en': 'Total Operating Cost to Revenue', 'unit': '%'},
    'sale_expense': {'cn': '销售费用/营业总收入', 'en': 'Selling Expense Ratio', 'unit': '%'},
    'admin_expense_of_gr': {'cn': '管理费用/营业总收入', 'en': 'Admin Expense Ratio', 'unit': '%'},
    'fin_exp_of_gr': {'cn': '财务费用/营业总收入', 'en': 'Financial Expense Ratio', 'unit': '%'},
    'impai_ttm': {'cn': '资产减值损失/营业总收入', 'en': 'Impairment Loss Ratio', 'unit': '%'},
    'gc_of_gr_ttm': {'cn': '营业总成本/营业总收入(TTM)', 'en': 'Total Operating Cost to Revenue (TTM)', 'unit': '%'},
    'sale_expense_ttm': {'cn': '销售费用/营业总收入(TTM)', 'en': 'Selling Expense Ratio (TTM)', 'unit': '%'},
    'admin_expense_of_gr_ttm': {'cn': '管理费用/营业总收入(TTM)', 'en': 'Admin Expense Ratio (TTM)', 'unit': '%'},
    'fin_exp_of_gr_ttm': {'cn': '财务费用/营业总收入(TTM)', 'en': 'Financial Expense Ratio (TTM)', 'unit': '%'},
    'impai_of_gr_ttm': {'cn': '资产减值损失/营业总收入(TTM)', 'en': 'Impairment Loss Ratio (TTM)', 'unit': '%'},
    'roe': {'cn': '净资产收益率', 'en': 'Return on Equity', 'unit': '%'},
    'roe_waa': {'cn': '加权平均净资产收益率', 'en': 'Weighted Average ROE', 'unit': '%'},
    'roe_dt': {'cn': '净资产收益率(扣除非经常损益)', 'en': 'ROE (Excl. Non-recurring)', 'unit': '%'},
    'roa': {'cn': '总资产报酬率', 'en': 'Return on Assets', 'unit': '%'},
    'npta': {'cn': '总资产净利润', 'en': 'Net Profit on Total Assets', 'unit': '%'},
    'roic': {'cn': '投入资本回报率', 'en': 'Return on Invested Capital', 'unit': '%'},
    'roe_yearly': {'cn': '年化净资产收益率', 'en': 'Annualized ROE', 'unit': '%'},
    
    # 股东人数
    'holder_num': {'cn': '股东户数', 'en': 'Number of Shareholders', 'unit': '户'},
    
    # 筹码胜率
    'his_low': {'cn': '历史最低价', 'en': 'Historical Low', 'unit': '元'},
    'his_high': {'cn': '历史最高价', 'en': 'Historical High', 'unit': '元'},
    'cost_5pct': {'cn': '5分位成本', 'en': '5th Percentile Cost', 'unit': '元'},
    'cost_15pct': {'cn': '15分位成本', 'en': '15th Percentile Cost', 'unit': '元'},
    'cost_50pct': {'cn': '50分位成本', 'en': 'Median Cost', 'unit': '元'},
    'cost_85pct': {'cn': '85分位成本', 'en': '85th Percentile Cost', 'unit': '元'},
    'cost_95pct': {'cn': '95分位成本', 'en': '95th Percentile Cost', 'unit': '元'},
    'weight_avg': {'cn': '加权平均成本', 'en': 'Weighted Average Cost', 'unit': '元'},
    'winner_rate': {'cn': '胜率', 'en': 'Winner Rate', 'unit': '%'},
    
    # 技术因子
    'macd_dif': {'cn': 'MACD-DIF', 'en': 'MACD DIF', 'unit': ''},
    'macd_dea': {'cn': 'MACD-DEA', 'en': 'MACD DEA', 'unit': ''},
    'macd': {'cn': 'MACD', 'en': 'MACD', 'unit': ''},
    'kdj_k': {'cn': 'KDJ-K', 'en': 'KDJ K', 'unit': ''},
    'kdj_d': {'cn': 'KDJ-D', 'en': 'KDJ D', 'unit': ''},
    'kdj_j': {'cn': 'KDJ-J', 'en': 'KDJ J', 'unit': ''},
    'rsi_6': {'cn': 'RSI(6日)', 'en': 'RSI (6-day)', 'unit': ''},
    'rsi_12': {'cn': 'RSI(12日)', 'en': 'RSI (12-day)', 'unit': ''},
    'rsi_24': {'cn': 'RSI(24日)', 'en': 'RSI (24-day)', 'unit': ''},
    'boll_upper': {'cn': '布林上轨', 'en': 'Bollinger Upper Band', 'unit': '元'},
    'boll_mid': {'cn': '布林中轨', 'en': 'Bollinger Middle Band', 'unit': '元'},
    'boll_lower': {'cn': '布林下轨', 'en': 'Bollinger Lower Band', 'unit': '元'},
    'cci': {'cn': 'CCI指标', 'en': 'CCI', 'unit': ''},
    
    # 融资融券
    'rzye': {'cn': '融资余额', 'en': 'Margin Balance', 'unit': '元'},
    'rzmre': {'cn': '融资买入额', 'en': 'Margin Purchase', 'unit': '元'},
    'rzche': {'cn': '融资偿还额', 'en': 'Margin Repayment', 'unit': '元'},
    'rqye': {'cn': '融券余额', 'en': 'Short Balance', 'unit': '元'},
    'rqmcl': {'cn': '融券卖出量', 'en': 'Short Sale Volume', 'unit': '股'},
    'rqchl': {'cn': '融券偿还量', 'en': 'Short Repayment Volume', 'unit': '股'},
    'rqyl': {'cn': '融券余量', 'en': 'Outstanding Short Volume', 'unit': '股'},
    'rzrqye': {'cn': '融资融券余额', 'en': 'Margin + Short Balance', 'unit': '元'},
    
    # ETF/指数
    'nav': {'cn': '单位净值', 'en': 'Net Asset Value', 'unit': '元'},
    'accum_nav': {'cn': '累计净值', 'en': 'Accumulated NAV', 'unit': '元'},
    'adj_factor': {'cn': '复权因子', 'en': 'Adjustment Factor', 'unit': ''},
    
    # 资金流向
    'ggt_ss': {'cn': '港股通(沪)当日成交金额', 'en': 'HK Connect (SH) Turnover', 'unit': '亿元'},
    'ggt_sz': {'cn': '港股通(深)当日成交金额', 'en': 'HK Connect (SZ) Turnover', 'unit': '亿元'},
    'hgt': {'cn': '沪股通当日成交金额', 'en': 'SH Connect Turnover', 'unit': '亿元'},
    'sgt': {'cn': '深股通当日成交金额', 'en': 'SZ Connect Turnover', 'unit': '亿元'},
    'north_money': {'cn': '北向资金当日成交金额', 'en': 'Northbound Money', 'unit': '亿元'},
    'south_money': {'cn': '南向资金当日成交金额', 'en': 'Southbound Money', 'unit': '亿元'},
    
    # 宏观数据
    'on': {'cn': 'SHIBOR隔夜利率', 'en': 'SHIBOR Overnight', 'unit': '%'},
    '1w': {'cn': 'SHIBOR 1周利率', 'en': 'SHIBOR 1 Week', 'unit': '%'},
    '2w': {'cn': 'SHIBOR 2周利率', 'en': 'SHIBOR 2 Week', 'unit': '%'},
    '1m': {'cn': 'SHIBOR 1月利率', 'en': 'SHIBOR 1 Month', 'unit': '%'},
    '3m': {'cn': 'SHIBOR 3月利率', 'en': 'SHIBOR 3 Month', 'unit': '%'},
    '6m': {'cn': 'SHIBOR 6月利率', 'en': 'SHIBOR 6 Month', 'unit': '%'},
    '9m': {'cn': 'SHIBOR 9月利率', 'en': 'SHIBOR 9 Month', 'unit': '%'},
    '1y': {'cn': 'SHIBOR 1年利率/美债1年期收益率', 'en': 'SHIBOR 1 Year / US Treasury 1Y', 'unit': '%'},
    'gdp': {'cn': 'GDP', 'en': 'GDP', 'unit': '亿元'},
    'gdp_yoy': {'cn': 'GDP同比增长率', 'en': 'GDP Growth Rate (YoY)', 'unit': '%'},
    'pi': {'cn': '第一产业增加值', 'en': 'Primary Industry', 'unit': '亿元'},
    'pi_yoy': {'cn': '第一产业同比增长率', 'en': 'Primary Industry Growth (YoY)', 'unit': '%'},
    'si': {'cn': '第二产业增加值', 'en': 'Secondary Industry', 'unit': '亿元'},
    'si_yoy': {'cn': '第二产业同比增长率', 'en': 'Secondary Industry Growth (YoY)', 'unit': '%'},
    'ti': {'cn': '第三产业增加值', 'en': 'Tertiary Industry', 'unit': '亿元'},
    'ti_yoy': {'cn': '第三产业同比增长率', 'en': 'Tertiary Industry Growth (YoY)', 'unit': '%'},
    'nt_yoy': {'cn': '全国居民消费价格同比', 'en': 'CPI (YoY)', 'unit': '%'},
    'nt_mom': {'cn': '全国居民消费价格环比', 'en': 'CPI (MoM)', 'unit': '%'},
    'nt_accu': {'cn': '全国居民消费价格累计', 'en': 'CPI (Cumulative)', 'unit': '%'},
    'town_yoy': {'cn': '城市居民消费价格同比', 'en': 'Urban CPI (YoY)', 'unit': '%'},
    'town_mom': {'cn': '城市居民消费价格环比', 'en': 'Urban CPI (MoM)', 'unit': '%'},
    'town_accu': {'cn': '城市居民消费价格累计', 'en': 'Urban CPI (Cumulative)', 'unit': '%'},
    'cnt_yoy': {'cn': '农村居民消费价格同比', 'en': 'Rural CPI (YoY)', 'unit': '%'},
    'cnt_mom': {'cn': '农村居民消费价格环比', 'en': 'Rural CPI (MoM)', 'unit': '%'},
    'cnt_accu': {'cn': '农村居民消费价格累计', 'en': 'Rural CPI (Cumulative)', 'unit': '%'},
    'ppi_yoy': {'cn': 'PPI同比', 'en': 'PPI (YoY)', 'unit': '%'},
    'ppi_mom': {'cn': 'PPI环比', 'en': 'PPI (MoM)', 'unit': '%'},
    'ppi_accu': {'cn': 'PPI累计', 'en': 'PPI (Cumulative)', 'unit': '%'},
    'm0': {'cn': 'M0货币供应量', 'en': 'M0', 'unit': '亿元'},
    'm0_yoy': {'cn': 'M0同比增长率', 'en': 'M0 Growth (YoY)', 'unit': '%'},
    'm1': {'cn': 'M1货币供应量', 'en': 'M1', 'unit': '亿元'},
    'm1_yoy': {'cn': 'M1同比增长率', 'en': 'M1 Growth (YoY)', 'unit': '%'},
    'm2': {'cn': 'M2货币供应量', 'en': 'M2', 'unit': '亿元'},
    'm2_yoy': {'cn': 'M2同比增长率', 'en': 'M2 Growth (YoY)', 'unit': '%'},
    '2y': {'cn': '美债2年期收益率', 'en': 'US Treasury 2Y Yield', 'unit': '%'},
    '3y': {'cn': '美债3年期收益率', 'en': 'US Treasury 3Y Yield', 'unit': '%'},
    '5y': {'cn': '美债5年期收益率', 'en': 'US Treasury 5Y Yield', 'unit': '%'},
    '7y': {'cn': '美债7年期收益率', 'en': 'US Treasury 7Y Yield', 'unit': '%'},
    '10y': {'cn': '美债10年期收益率', 'en': 'US Treasury 10Y Yield', 'unit': '%'},
    '20y': {'cn': '美债20年期收益率', 'en': 'US Treasury 20Y Yield', 'unit': '%'},
    '30y': {'cn': '美债30年期收益率', 'en': 'US Treasury 30Y Yield', 'unit': '%'},
    
    # 每日指标
    'float_mv': {'cn': '流通市值(指数)', 'en': 'Float Market Value (Index)', 'unit': '亿元'},

    # 财务指标补充
    'adminexp_of_gr': {'cn': '管理费用与营业总收入之比', 'en': 'Admin Expense to Gross Revenue', 'unit': '%'},
    'assets_yoy': {'cn': '总资产同比增长率', 'en': 'Total Assets Growth (YoY)', 'unit': '%'},
    'basic_eps_yoy': {'cn': '基本每股收益同比增长率', 'en': 'Basic EPS Growth (YoY)', 'unit': '%'},
    'bps': {'cn': '每股净资产', 'en': 'Book Value Per Share', 'unit': '元'},
    'bps_yoy': {'cn': '每股净资产同比增长率', 'en': 'BPS Growth (YoY)', 'unit': '%'},
    'cfps': {'cn': '每股现金流量净额', 'en': 'Cash Flow Per Share', 'unit': '元'},
    'cfps_yoy': {'cn': '每股现金流量净额同比增长率', 'en': 'CFPS Growth (YoY)', 'unit': '%'},
    'current_exint': {'cn': '流动负债合计(带息)', 'en': 'Current Liabilities with Interest', 'unit': '元'},
    'diluted2_eps': {'cn': '稀释每股收益(摊薄)', 'en': 'Diluted EPS (Fully Diluted)', 'unit': '元'},
    'dt_eps_yoy': {'cn': '稀释每股收益同比增长率', 'en': 'Diluted EPS Growth (YoY)', 'unit': '%'},
    'dt_netprofit_yoy': {'cn': '扣非净利润同比增长率', 'en': 'Net Profit Excl. Non-recurring Growth (YoY)', 'unit': '%'},
    'ebit_of_gr': {'cn': 'EBIT与营业总收入之比', 'en': 'EBIT to Gross Revenue', 'unit': '%'},
    'ebit_ps': {'cn': '每股EBIT', 'en': 'EBIT Per Share', 'unit': '元'},
    'ebt_yoy': {'cn': '利润总额同比增长率', 'en': 'EBT Growth (YoY)', 'unit': '%'},
    'eqt_yoy': {'cn': '净资产同比增长率', 'en': 'Equity Growth (YoY)', 'unit': '%'},
    'equity_yoy': {'cn': '股东权益同比增长率', 'en': 'Shareholders Equity Growth (YoY)', 'unit': '%'},
    'fcfe_ps': {'cn': '每股股权自由现金流', 'en': 'FCFE Per Share', 'unit': '元'},
    'fcff_ps': {'cn': '每股企业自由现金流', 'en': 'FCFF Per Share', 'unit': '元'},
    'finaexp_of_gr': {'cn': '财务费用与营业总收入之比', 'en': 'Financial Expense to Gross Revenue', 'unit': '%'},
    'interestdebt': {'cn': '带息债务', 'en': 'Interest-bearing Debt', 'unit': '元'},
    'invest_capital': {'cn': '投入资本', 'en': 'Invested Capital', 'unit': '元'},
    'netdebt': {'cn': '净债务', 'en': 'Net Debt', 'unit': '元'},
    'networking_capital': {'cn': '营运资金', 'en': 'Working Capital', 'unit': '元'},
    'noncurrent_exint': {'cn': '非流动负债合计(带息)', 'en': 'Non-current Liabilities with Interest', 'unit': '元'},
    'ocf_yoy': {'cn': '经营活动现金流同比增长率', 'en': 'OCF Growth (YoY)', 'unit': '%'},
    'ocfps': {'cn': '每股经营活动现金流', 'en': 'Operating Cash Flow Per Share', 'unit': '元'},
    'op_yoy': {'cn': '营业利润同比增长率', 'en': 'Operating Profit Growth (YoY)', 'unit': '%'},
    'or_yoy': {'cn': '营业收入同比增长率', 'en': 'Operating Revenue Growth (YoY)', 'unit': '%'},
    'profit_to_op': {'cn': '利润总额与营业收入之比', 'en': 'Profit to Operating Revenue', 'unit': '%'},
    'q_dt_roe': {'cn': '单季度ROE(扣非)', 'en': 'Quarterly ROE (Excl. Non-recurring)', 'unit': '%'},
    'q_gc_to_gr': {'cn': '单季度营业总成本/营业总收入', 'en': 'Quarterly Total Cost to Revenue', 'unit': '%'},
    'q_npta': {'cn': '单季度总资产净利润', 'en': 'Quarterly Net Profit on Total Assets', 'unit': '%'},
    'q_ocf_to_sales': {'cn': '单季度经营现金流/营业收入', 'en': 'Quarterly OCF to Sales', 'unit': '%'},
    'q_op_qoq': {'cn': '营业利润环比增长率', 'en': 'Operating Profit Growth (QoQ)', 'unit': '%'},
    'q_roe': {'cn': '单季度ROE', 'en': 'Quarterly ROE', 'unit': '%'},
    'q_saleexp_to_gr': {'cn': '单季度销售费用/营业总收入', 'en': 'Quarterly Sales Expense to Revenue', 'unit': '%'},
    'q_sales_yoy': {'cn': '单季度营业收入同比增长率', 'en': 'Quarterly Sales Growth (YoY)', 'unit': '%'},
    'retained_earnings': {'cn': '留存收益', 'en': 'Retained Earnings', 'unit': '元'},
    'retainedps': {'cn': '每股留存收益', 'en': 'Retained Earnings Per Share', 'unit': '元'},
    'roa2_yearly': {'cn': '年化总资产报酬率ROA(2)', 'en': 'Annualized ROA (Method 2)', 'unit': '%'},
    'roe_yoy': {'cn': 'ROE同比增长率', 'en': 'ROE Growth (YoY)', 'unit': '%'},
    'saleexp_to_gr': {'cn': '销售费用与营业总收入之比', 'en': 'Sales Expense to Gross Revenue', 'unit': '%'},
    'tangible_asset': {'cn': '有形资产', 'en': 'Tangible Assets', 'unit': '元'},
    'tr_yoy': {'cn': '营业总收入同比增长率', 'en': 'Total Revenue Growth (YoY)', 'unit': '%'},
    'working_capital': {'cn': '营运资金', 'en': 'Working Capital', 'unit': '元'},

    # 技术指标补充(bfq=不复权)
    'asi_bfq': {'cn': 'ASI振动升降指标(不复权)', 'en': 'ASI (Non-adjusted)', 'unit': ''},
    'asit_bfq': {'cn': 'ASIT指标(不复权)', 'en': 'ASIT (Non-adjusted)', 'unit': ''},
    'atr_bfq': {'cn': 'ATR真实波幅(不复权)', 'en': 'ATR (Non-adjusted)', 'unit': '元'},
    'bbi_bfq': {'cn': 'BBI多空指数(不复权)', 'en': 'BBI (Non-adjusted)', 'unit': '元'},
    'bias1_bfq': {'cn': 'BIAS乖离率1(不复权)', 'en': 'BIAS 1 (Non-adjusted)', 'unit': '%'},
    'bias2_bfq': {'cn': 'BIAS乖离率2(不复权)', 'en': 'BIAS 2 (Non-adjusted)', 'unit': '%'},
    'bias3_bfq': {'cn': 'BIAS乖离率3(不复权)', 'en': 'BIAS 3 (Non-adjusted)', 'unit': '%'},
    'boll_lower_bfq': {'cn': '布林下轨(不复权)', 'en': 'Bollinger Lower (Non-adjusted)', 'unit': '元'},
    'boll_mid_bfq': {'cn': '布林中轨(不复权)', 'en': 'Bollinger Mid (Non-adjusted)', 'unit': '元'},
    'boll_upper_bfq': {'cn': '布林上轨(不复权)', 'en': 'Bollinger Upper (Non-adjusted)', 'unit': '元'},
    'brar_ar_bfq': {'cn': 'BRAR-AR(不复权)', 'en': 'BRAR-AR (Non-adjusted)', 'unit': ''},
    'brar_br_bfq': {'cn': 'BRAR-BR(不复权)', 'en': 'BRAR-BR (Non-adjusted)', 'unit': ''},
    'cci_bfq': {'cn': 'CCI顺势指标(不复权)', 'en': 'CCI (Non-adjusted)', 'unit': ''},
    'close_hfq': {'cn': '收盘价(后复权)', 'en': 'Close (Forward Adjusted)', 'unit': '元'},
    'close_qfq': {'cn': '收盘价(前复权)', 'en': 'Close (Backward Adjusted)', 'unit': '元'},
    'cr_bfq': {'cn': 'CR能量指标(不复权)', 'en': 'CR (Non-adjusted)', 'unit': ''},
    'dfma_dif_bfq': {'cn': 'DFMA-DIF(不复权)', 'en': 'DFMA-DIF (Non-adjusted)', 'unit': ''},
    'dfma_difma_bfq': {'cn': 'DFMA-DIFMA(不复权)', 'en': 'DFMA-DIFMA (Non-adjusted)', 'unit': ''},
    'dmi_adx_bfq': {'cn': 'DMI-ADX(不复权)', 'en': 'DMI-ADX (Non-adjusted)', 'unit': ''},
    'dmi_adxr_bfq': {'cn': 'DMI-ADXR(不复权)', 'en': 'DMI-ADXR (Non-adjusted)', 'unit': ''},
    'dmi_mdi_bfq': {'cn': 'DMI-MDI(不复权)', 'en': 'DMI-MDI (Non-adjusted)', 'unit': ''},
    'dmi_pdi_bfq': {'cn': 'DMI-PDI(不复权)', 'en': 'DMI-PDI (Non-adjusted)', 'unit': ''},
    'downdays': {'cn': '连续下跌天数', 'en': 'Consecutive Down Days', 'unit': '天'},
    'dpo_bfq': {'cn': 'DPO区间震荡线(不复权)', 'en': 'DPO (Non-adjusted)', 'unit': ''},
    'ema_bfq_10': {'cn': 'EMA10(不复权)', 'en': 'EMA 10 (Non-adjusted)', 'unit': '元'},
    'ema_bfq_20': {'cn': 'EMA20(不复权)', 'en': 'EMA 20 (Non-adjusted)', 'unit': '元'},
    'ema_bfq_250': {'cn': 'EMA250(不复权)', 'en': 'EMA 250 (Non-adjusted)', 'unit': '元'},
    'ema_bfq_30': {'cn': 'EMA30(不复权)', 'en': 'EMA 30 (Non-adjusted)', 'unit': '元'},
    'ema_bfq_5': {'cn': 'EMA5(不复权)', 'en': 'EMA 5 (Non-adjusted)', 'unit': '元'},
    'ema_bfq_60': {'cn': 'EMA60(不复权)', 'en': 'EMA 60 (Non-adjusted)', 'unit': '元'},
    'ema_bfq_90': {'cn': 'EMA90(不复权)', 'en': 'EMA 90 (Non-adjusted)', 'unit': '元'},
    'emv_bfq': {'cn': 'EMV简易波动指标(不复权)', 'en': 'EMV (Non-adjusted)', 'unit': ''},
    'expma_12_bfq': {'cn': 'EXPMA12(不复权)', 'en': 'EXPMA 12 (Non-adjusted)', 'unit': '元'},
    'expma_50_bfq': {'cn': 'EXPMA50(不复权)', 'en': 'EXPMA 50 (Non-adjusted)', 'unit': '元'},
    'high_hfq': {'cn': '最高价(后复权)', 'en': 'High (Forward Adjusted)', 'unit': '元'},
    'high_qfq': {'cn': '最高价(前复权)', 'en': 'High (Backward Adjusted)', 'unit': '元'},
    'kdj_bfq': {'cn': 'KDJ-J(不复权)', 'en': 'KDJ-J (Non-adjusted)', 'unit': ''},
    'kdj_d_bfq': {'cn': 'KDJ-D(不复权)', 'en': 'KDJ-D (Non-adjusted)', 'unit': ''},
    'kdj_k_bfq': {'cn': 'KDJ-K(不复权)', 'en': 'KDJ-K (Non-adjusted)', 'unit': ''},
    'ktn_down_bfq': {'cn': 'KTN下轨(不复权)', 'en': 'KTN Lower (Non-adjusted)', 'unit': '元'},
    'ktn_mid_bfq': {'cn': 'KTN中轨(不复权)', 'en': 'KTN Mid (Non-adjusted)', 'unit': '元'},
    'ktn_upper_bfq': {'cn': 'KTN上轨(不复权)', 'en': 'KTN Upper (Non-adjusted)', 'unit': '元'},
    'low_hfq': {'cn': '最低价(后复权)', 'en': 'Low (Forward Adjusted)', 'unit': '元'},
    'low_qfq': {'cn': '最低价(前复权)', 'en': 'Low (Backward Adjusted)', 'unit': '元'},
    'lowdays': {'cn': '连续创新低天数', 'en': 'Consecutive New Low Days', 'unit': '天'},
    'ma_bfq_10': {'cn': 'MA10(不复权)', 'en': 'MA 10 (Non-adjusted)', 'unit': '元'},
    'ma_bfq_20': {'cn': 'MA20(不复权)', 'en': 'MA 20 (Non-adjusted)', 'unit': '元'},
    'ma_bfq_250': {'cn': 'MA250(不复权)', 'en': 'MA 250 (Non-adjusted)', 'unit': '元'},
    'ma_bfq_30': {'cn': 'MA30(不复权)', 'en': 'MA 30 (Non-adjusted)', 'unit': '元'},
    'ma_bfq_5': {'cn': 'MA5(不复权)', 'en': 'MA 5 (Non-adjusted)', 'unit': '元'},
    'ma_bfq_60': {'cn': 'MA60(不复权)', 'en': 'MA 60 (Non-adjusted)', 'unit': '元'},
    'ma_bfq_90': {'cn': 'MA90(不复权)', 'en': 'MA 90 (Non-adjusted)', 'unit': '元'},
    'ma_mass_bfq': {'cn': 'MASS梅斯线MA(不复权)', 'en': 'MA of MASS (Non-adjusted)', 'unit': ''},
    'macd_bfq': {'cn': 'MACD(不复权)', 'en': 'MACD (Non-adjusted)', 'unit': ''},
    'macd_dea_bfq': {'cn': 'MACD-DEA(不复权)', 'en': 'MACD-DEA (Non-adjusted)', 'unit': ''},
    'macd_dif_bfq': {'cn': 'MACD-DIF(不复权)', 'en': 'MACD-DIF (Non-adjusted)', 'unit': ''},
    'madpo_bfq': {'cn': 'DPO均线(不复权)', 'en': 'MA of DPO (Non-adjusted)', 'unit': ''},
    'maemv_bfq': {'cn': 'EMV均线(不复权)', 'en': 'MA of EMV (Non-adjusted)', 'unit': ''},
    'maroc_bfq': {'cn': 'ROC均线(不复权)', 'en': 'MA of ROC (Non-adjusted)', 'unit': ''},
    'mass_bfq': {'cn': 'MASS梅斯线(不复权)', 'en': 'MASS (Non-adjusted)', 'unit': ''},
    'mfi_bfq': {'cn': 'MFI资金流量指标(不复权)', 'en': 'MFI (Non-adjusted)', 'unit': ''},
    'mtm_bfq': {'cn': 'MTM动量指标(不复权)', 'en': 'MTM (Non-adjusted)', 'unit': ''},
    'mtmma_bfq': {'cn': 'MTM均线(不复权)', 'en': 'MA of MTM (Non-adjusted)', 'unit': ''},
    'obv_bfq': {'cn': 'OBV能量潮(不复权)', 'en': 'OBV (Non-adjusted)', 'unit': ''},
    'open_hfq': {'cn': '开盘价(后复权)', 'en': 'Open (Forward Adjusted)', 'unit': '元'},
    'open_qfq': {'cn': '开盘价(前复权)', 'en': 'Open (Backward Adjusted)', 'unit': '元'},
    'pct_change': {'cn': '涨跌幅', 'en': 'Price Change %', 'unit': '%'},
    'pre_close_hfq': {'cn': '前收盘价(后复权)', 'en': 'Previous Close (Forward Adjusted)', 'unit': '元'},
    'pre_close_qfq': {'cn': '前收盘价(前复权)', 'en': 'Previous Close (Backward Adjusted)', 'unit': '元'},
    'psy_bfq': {'cn': 'PSY心理线(不复权)', 'en': 'PSY (Non-adjusted)', 'unit': '%'},
    'psyma_bfq': {'cn': 'PSY均线(不复权)', 'en': 'MA of PSY (Non-adjusted)', 'unit': '%'},
    'roc_bfq': {'cn': 'ROC变动率指标(不复权)', 'en': 'ROC (Non-adjusted)', 'unit': '%'},
    'rsi_bfq_12': {'cn': 'RSI12(不复权)', 'en': 'RSI 12 (Non-adjusted)', 'unit': ''},
    'rsi_bfq_24': {'cn': 'RSI24(不复权)', 'en': 'RSI 24 (Non-adjusted)', 'unit': ''},
    'rsi_bfq_6': {'cn': 'RSI6(不复权)', 'en': 'RSI 6 (Non-adjusted)', 'unit': ''},
    'taq_down_bfq': {'cn': 'TAQ下轨(不复权)', 'en': 'TAQ Lower (Non-adjusted)', 'unit': '元'},
    'taq_mid_bfq': {'cn': 'TAQ中轨(不复权)', 'en': 'TAQ Mid (Non-adjusted)', 'unit': '元'},
    'taq_up_bfq': {'cn': 'TAQ上轨(不复权)', 'en': 'TAQ Upper (Non-adjusted)', 'unit': '元'},
    'topdays': {'cn': '连续创新高天数', 'en': 'Consecutive New High Days', 'unit': '天'},
    'trix_bfq': {'cn': 'TRIX三重指数平滑均线(不复权)', 'en': 'TRIX (Non-adjusted)', 'unit': ''},
    'trma_bfq': {'cn': 'TRIX均线(不复权)', 'en': 'MA of TRIX (Non-adjusted)', 'unit': ''},
    'updays': {'cn': '连续上涨天数', 'en': 'Consecutive Up Days', 'unit': '天'},
    'vr_bfq': {'cn': 'VR容量比率(不复权)', 'en': 'VR (Non-adjusted)', 'unit': ''},
    'wr1_bfq': {'cn': 'WR威廉指标1(不复权)', 'en': 'WR 1 (Non-adjusted)', 'unit': '%'},
    'wr_bfq': {'cn': 'WR威廉指标(不复权)', 'en': 'WR (Non-adjusted)', 'unit': '%'},
    'xsii_td1_bfq': {'cn': 'XSII薛斯通道TD1(不复权)', 'en': 'XSII-TD1 (Non-adjusted)', 'unit': '元'},
    'xsii_td2_bfq': {'cn': 'XSII薛斯通道TD2(不复权)', 'en': 'XSII-TD2 (Non-adjusted)', 'unit': '元'},
    'xsii_td3_bfq': {'cn': 'XSII薛斯通道TD3(不复权)', 'en': 'XSII-TD3 (Non-adjusted)', 'unit': '元'},
    'xsii_td4_bfq': {'cn': 'XSII薛斯通道TD4(不复权)', 'en': 'XSII-TD4 (Non-adjusted)', 'unit': '元'},

    # 宏观指标补充
    'cnt_val': {'cn': '农村CPI定基指数', 'en': 'Rural CPI Index Value', 'unit': ''},
    'nt_val': {'cn': '全国CPI定基指数', 'en': 'National CPI Index Value', 'unit': ''},
    'town_val': {'cn': '城市CPI定基指数', 'en': 'Urban CPI Index Value', 'unit': ''},
    'swing': {'cn': '振幅', 'en': 'Swing', 'unit': '%'},
    'm0_mom': {'cn': 'M0环比增长率', 'en': 'M0 Growth (MoM)', 'unit': '%'},
    'm1_mom': {'cn': 'M1环比增长率', 'en': 'M1 Growth (MoM)', 'unit': '%'},
    'm2_mom': {'cn': 'M2环比增长率', 'en': 'M2 Growth (MoM)', 'unit': '%'},
    'ppi_cg_accu': {'cn': 'PPI:生产资料:累计', 'en': 'PPI Capital Goods (Cumulative)', 'unit': '%'},
    'ppi_cg_adu_accu': {'cn': 'PPI:生产资料:采掘:累计', 'en': 'PPI CG Mining (Cumulative)', 'unit': '%'},
    'ppi_cg_adu_mom': {'cn': 'PPI:生产资料:采掘:环比', 'en': 'PPI CG Mining (MoM)', 'unit': '%'},
    'ppi_cg_adu_yoy': {'cn': 'PPI:生产资料:采掘:同比', 'en': 'PPI CG Mining (YoY)', 'unit': '%'},
    'ppi_cg_c_accu': {'cn': 'PPI:生产资料:化工:累计', 'en': 'PPI CG Chemical (Cumulative)', 'unit': '%'},
    'ppi_cg_c_mom': {'cn': 'PPI:生产资料:化工:环比', 'en': 'PPI CG Chemical (MoM)', 'unit': '%'},
    'ppi_cg_c_yoy': {'cn': 'PPI:生产资料:化工:同比', 'en': 'PPI CG Chemical (YoY)', 'unit': '%'},
    'ppi_cg_dcg_accu': {'cn': 'PPI:生产资料:深加工:累计', 'en': 'PPI CG Deep Processing (Cumulative)', 'unit': '%'},
    'ppi_cg_dcg_mom': {'cn': 'PPI:生产资料:深加工:环比', 'en': 'PPI CG Deep Processing (MoM)', 'unit': '%'},
    'ppi_cg_dcg_yoy': {'cn': 'PPI:生产资料:深加工:同比', 'en': 'PPI CG Deep Processing (YoY)', 'unit': '%'},
    'ppi_cg_f_accu': {'cn': 'PPI:生产资料:燃料:累计', 'en': 'PPI CG Fuel (Cumulative)', 'unit': '%'},
    'ppi_cg_f_mom': {'cn': 'PPI:生产资料:燃料:环比', 'en': 'PPI CG Fuel (MoM)', 'unit': '%'},
    'ppi_cg_f_yoy': {'cn': 'PPI:生产资料:燃料:同比', 'en': 'PPI CG Fuel (YoY)', 'unit': '%'},
    'ppi_cg_mom': {'cn': 'PPI:生产资料:环比', 'en': 'PPI Capital Goods (MoM)', 'unit': '%'},
    'ppi_cg_yoy': {'cn': 'PPI:生产资料:同比', 'en': 'PPI Capital Goods (YoY)', 'unit': '%'},
    'ppi_mp_accu': {'cn': 'PPI:生活资料:累计', 'en': 'PPI Consumer Goods (Cumulative)', 'unit': '%'},
    'ppi_mp_mom': {'cn': 'PPI:生活资料:环比', 'en': 'PPI Consumer Goods (MoM)', 'unit': '%'},
    'ppi_mp_p_accu': {'cn': 'PPI:生活资料:一般日用品:累计', 'en': 'PPI CG General (Cumulative)', 'unit': '%'},
    'ppi_mp_p_mom': {'cn': 'PPI:生活资料:一般日用品:环比', 'en': 'PPI CG General (MoM)', 'unit': '%'},
    'ppi_mp_p_yoy': {'cn': 'PPI:生活资料:一般日用品:同比', 'en': 'PPI CG General (YoY)', 'unit': '%'},
    'ppi_mp_qm_accu': {'cn': 'PPI:生活资料:衣着:累计', 'en': 'PPI CG Clothing (Cumulative)', 'unit': '%'},
    'ppi_mp_qm_mom': {'cn': 'PPI:生活资料:衣着:环比', 'en': 'PPI CG Clothing (MoM)', 'unit': '%'},
    'ppi_mp_qm_yoy': {'cn': 'PPI:生活资料:衣着:同比', 'en': 'PPI CG Clothing (YoY)', 'unit': '%'},
    'ppi_mp_rm_accu': {'cn': 'PPI:生活资料:耐用消费品:累计', 'en': 'PPI CG Durable (Cumulative)', 'unit': '%'},
    'ppi_mp_rm_mom': {'cn': 'PPI:生活资料:耐用消费品:环比', 'en': 'PPI CG Durable (MoM)', 'unit': '%'},
    'ppi_mp_rm_yoy': {'cn': 'PPI:生活资料:耐用消费品:同比', 'en': 'PPI CG Durable (YoY)', 'unit': '%'},
    'ppi_mp_yoy': {'cn': 'PPI:生活资料:同比', 'en': 'PPI Consumer Goods (YoY)', 'unit': '%'},
    'inc_cumval': {'cn': '社融增量累计值', 'en': 'Social Financing Incremental Cumulative', 'unit': '亿元'},
    'inc_month': {'cn': '社融月度增量', 'en': 'Social Financing Monthly Increment', 'unit': '亿元'},
    'stk_endval': {'cn': '社融存量', 'en': 'Social Financing Stock', 'unit': '亿元'},
    'date': {'cn': '日期', 'en': 'Date', 'unit': '日期(YYYYMMDD)'},
    'y10': {'cn': '美债10年期收益率', 'en': 'US Treasury 10Y Yield', 'unit': '%'},
    'y20': {'cn': '美债20年期收益率', 'en': 'US Treasury 20Y Yield', 'unit': '%'},
    'y30': {'cn': '美债30年期收益率', 'en': 'US Treasury 30Y Yield', 'unit': '%'},
    'y5': {'cn': '美债5年期收益率', 'en': 'US Treasury 5Y Yield', 'unit': '%'},
    'y7': {'cn': '美债7年期收益率', 'en': 'US Treasury 7Y Yield', 'unit': '%'},
    'exchange_id': {'cn': '交易所代码', 'en': 'Exchange ID', 'unit': '代码'},
    
    # 国际指数字段(带前缀的完整字段名)
    'spx_macro_im_ts_code': {'cn': '标普500指数代码', 'en': 'S&P 500 Index Code', 'unit': '代码'},
    'spx_macro_im_open': {'cn': '标普500开盘点位', 'en': 'S&P 500 Open', 'unit': '点'},
    'spx_macro_im_close': {'cn': '标普500收盘点位', 'en': 'S&P 500 Close', 'unit': '点'},
    'spx_macro_im_high': {'cn': '标普500最高点位', 'en': 'S&P 500 High', 'unit': '点'},
    'spx_macro_im_low': {'cn': '标普500最低点位', 'en': 'S&P 500 Low', 'unit': '点'},
    'spx_macro_im_pre_close': {'cn': '标普500昨日收盘点', 'en': 'S&P 500 Previous Close', 'unit': '点'},
    'spx_macro_im_change': {'cn': '标普500涨跌点位', 'en': 'S&P 500 Change', 'unit': '点'},
    'spx_macro_im_pct_chg': {'cn': '标普500涨跌幅', 'en': 'S&P 500 Percent Change', 'unit': '%'},
    'spx_macro_im_swing': {'cn': '标普500振幅', 'en': 'S&P 500 Swing', 'unit': '%'},
    'spx_macro_im_vol': {'cn': '标普500成交量', 'en': 'S&P 500 Volume', 'unit': '手'},
    
    'hsi_macro_im_ts_code': {'cn': '恒生指数代码', 'en': 'Hang Seng Index Code', 'unit': '代码'},
    'hsi_macro_im_open': {'cn': '恒生指数开盘点位', 'en': 'HSI Open', 'unit': '点'},
    'hsi_macro_im_close': {'cn': '恒生指数收盘点位', 'en': 'HSI Close', 'unit': '点'},
    'hsi_macro_im_high': {'cn': '恒生指数最高点位', 'en': 'HSI High', 'unit': '点'},
    'hsi_macro_im_low': {'cn': '恒生指数最低点位', 'en': 'HSI Low', 'unit': '点'},
    'hsi_macro_im_pre_close': {'cn': '恒生指数昨日收盘点', 'en': 'HSI Previous Close', 'unit': '点'},
    'hsi_macro_im_change': {'cn': '恒生指数涨跌点位', 'en': 'HSI Change', 'unit': '点'},
    'hsi_macro_im_pct_chg': {'cn': '恒生指数涨跌幅', 'en': 'HSI Percent Change', 'unit': '%'},
    'hsi_macro_im_swing': {'cn': '恒生指数振幅', 'en': 'HSI Swing', 'unit': '%'},
    'hsi_macro_im_vol': {'cn': '恒生指数成交量', 'en': 'HSI Volume', 'unit': '手'},    
}

# 定义接口信息字典
api_info = {
    'pro_bar': 'https://tushare.pro/document/2?doc_id=109',
    'daily_basic': 'https://tushare.pro/document/2?doc_id=32',
    'fina_indicator': 'https://tushare.pro/document/2?doc_id=79',
    'stk_holdernumber': 'https://tushare.pro/document/2?doc_id=166',
    'cyq_perf': 'https://tushare.pro/document/2?doc_id=293',
    'stk_factor': 'https://tushare.pro/document/2?doc_id=328',
    'margin_detail': 'https://tushare.pro/document/2?doc_id=59',
    'fund_daily': 'https://tushare.pro/document/2?doc_id=127',
    'fund_adj': 'https://tushare.pro/document/2?doc_id=199',
    'index_daily': 'https://tushare.pro/document/2?doc_id=95',
    'index_dailybasic': 'https://tushare.pro/document/2?doc_id=128',
    'idx_factor_pro': 'https://tushare.pro/document/2?doc_id=358',
    'moneyflow_hsgt': 'https://tushare.pro/document/2?doc_id=47',
    'margin': 'https://tushare.pro/document/2?doc_id=58',
    'shibor': 'https://tushare.pro/document/2?doc_id=149',
    'cn_gdp': 'https://tushare.pro/document/2?doc_id=227',
    'cn_cpi': 'https://tushare.pro/document/2?doc_id=228',
    'cn_ppi': 'https://tushare.pro/document/2?doc_id=245',
    'cn_m': 'https://tushare.pro/document/2?doc_id=242',
    'sf_month': 'https://tushare.pro/document/2?doc_id=310',
    'us_trycr': 'https://tushare.pro/document/2?doc_id=219',
    'index_global': 'https://tushare.pro/document/2?doc_id=211'
}

# ⭐ 定义前缀到API的映射（必须在函数定义之前）
prefix_to_api = {
    'indiv_d_': 'pro_bar',
    'indiv_din_': 'daily_basic',
    'indiv_fin_': 'fina_indicator',
    'indiv_sh_': 'stk_holdernumber',
    'indiv_chip_': 'cyq_perf',
    'indiv_techin_': 'stk_factor',
    'indiv_margin_': 'margin_detail',
    'indiv_adj_d_': 'fund_adj',
    'indiv_ind_': 'index_daily',
    'market_din_': 'index_dailybasic',
    'market_flow_': 'moneyflow_hsgt',
    'market_margin_': 'margin',
    'macro_shibor_': 'shibor',
    'macro_gdp_': 'cn_gdp',
    'macro_cpi_': 'cn_cpi',
    'macro_ppi_': 'cn_ppi',
    'macro_m_': 'cn_m',
    'macro_sf_': 'sf_month',
    'macro_ust_': 'us_trycr',
    'macro_im_': 'index_global',
}

def get_field_description(field_name):
    """获取字段的中英文说明"""
    if field_name in field_descriptions:
        return (field_descriptions[field_name]['cn'], 
                field_descriptions[field_name]['en'], 
                field_descriptions[field_name]['unit'])
    else:
        return ('待补充', '待补充', '待补充')

def classify_column(col_name):
    """根据列名前缀分类并确定API"""
    # 特殊列名处理
    if col_name == 'trade_date':
        return 'common', 'date', None
    
    # 根据前缀判断一级来源和二级分类
    if col_name.startswith('indiv_'):
        level1 = 'indiv'
        if col_name.startswith('indiv_d_'):
            level2 = 'd'
        elif col_name.startswith('indiv_din_'):
            level2 = 'din'
        elif col_name.startswith('indiv_fin_'):
            level2 = 'fin'
        elif col_name.startswith('indiv_sh_'):
            level2 = 'sh'
        elif col_name.startswith('indiv_chip_'):
            level2 = 'chip'
        elif col_name.startswith('indiv_techin_'):
            level2 = 'techin'
        elif col_name.startswith('indiv_margin_'):
            level2 = 'margin'
        elif col_name.startswith('indiv_adj_d_'):
            level2 = 'adj_d'
        elif col_name.startswith('indiv_ind_'):
            level2 = 'ind'
        else:
            level2 = 'unknown'
    elif col_name.startswith('market_'):
        level1 = 'market'
        if col_name.startswith('market_din_'):
            level2 = 'din'
        elif col_name.startswith('market_flow_'):
            level2 = 'flow'
        elif col_name.startswith('market_margin_'):
            level2 = 'margin'
        else:
            level2 = 'unknown'
    elif col_name.startswith('macro_'):
        level1 = 'macro'
        if col_name.startswith('macro_shibor_'):
            level2 = 'shibor'
        elif col_name.startswith('macro_gdp_'):
            level2 = 'gdp'
        elif col_name.startswith('macro_cpi_'):
            level2 = 'cpi'
        elif col_name.startswith('macro_ppi_'):
            level2 = 'ppi'
        elif col_name.startswith('macro_m_'):
            level2 = 'm'
        elif col_name.startswith('macro_sf_'):
            level2 = 'sf'
        elif col_name.startswith('macro_ust_'):
            level2 = 'ust'
        elif col_name.startswith('macro_im_'):
            level2 = 'im'
        else:
            level2 = 'unknown'
    else:
        level1 = 'unknown'
        level2 = 'unknown'
    
    # 找到对应的API
    api_name = None
    for prefix, api in prefix_to_api.items():
        if col_name.startswith(prefix):
            api_name = api
            break
    
    return level1, level2, api_name

def extract_original_field(col_name):
    """从列名中提取原始字段名"""
    # 尝试所有已知前缀
    for prefix in prefix_to_api.keys():
        if col_name.startswith(prefix):
            return col_name[len(prefix):]
    return col_name

print("\n正在从最终生成的数据文件中提取列信息...")

# 从一个代表性的个股文件和一个ETF文件中读取所有列名
sample_files = [
    ('data/stock/full/600036.SH_indiv_full_macro.csv', '个股'),
    ('data/etf/full/510300.SH_indiv_full_macro.csv', 'ETF')
]

all_columns = set()  # 使用集合去重
column_info = []  # 存储所有列的信息

for file_path, file_type in sample_files:
    print(f"  - 读取 {file_type} 数据: {file_path}")
    if os.path.exists(file_path):
        df_sample = pd.read_csv(file_path, nrows=0)  # 只读取列名
        all_columns.update(df_sample.columns.tolist())
        print(f"    发现 {len(df_sample.columns)} 列")
    else:
        print(f"    警告: 文件不存在!")

print(f"\n共发现 {len(all_columns)} 个不同的列名")

# 处理所有列
for col in sorted(all_columns):
    level1, level2, api_name = classify_column(col)
    original_field = extract_original_field(col)
    cn, en, unit = get_field_description(original_field)
    
    api_link = api_info.get(api_name, '未知') if api_name else '通用字段'
    
    column_info.append([col, level1, level2, api_link, original_field, cn, en, unit])

# 创建DataFrame并输出
column_df = pd.DataFrame(column_info, columns=['当前列名', '一级来源', '二级分类', 'tushare接口链接', '字段名', '中文含义', '英文含义', '单位'])

# 排序
column_df = column_df.sort_values(['一级来源', '二级分类', '当前列名']).reset_index(drop=True)

print(f"\n共整理 {len(column_df)} 个列的说明")
print(f"其中已补充说明 {len(column_df[column_df['中文含义'] != '待补充'])} 个，待补充 {len(column_df[column_df['中文含义'] == '待补充'])} 个")

# 保存到CSV
if not os.path.exists('data'):
    os.makedirs('data')
column_df.to_csv('data/column_description.csv', index=False, encoding='utf-8-sig')
print(f"\n列名说明表格已保存到 data/column_description.csv")

# 显示前30行
print("\n" + "=" * 150)
print("列名说明表格预览（前30行）:")
print("=" * 150)
print(column_df.head(30).to_string(index=False))

# 显示待补充的列
print("\n" + "=" * 150)
print("待补充说明的列（前20个）:")
print("=" * 150)
pending_df = column_df[column_df['中文含义'] == '待补充'].head(20)
if len(pending_df) > 0:
    print(pending_df[['当前列名', '字段名', 'tushare接口链接']].to_string(index=False))
else:
    print("所有列的说明都已补充完整！")

# 按一级来源统计
print("\n" + "=" * 150)
print("按一级来源统计:")
print("=" * 150)
stats = column_df.groupby('一级来源').agg({
    '当前列名': 'count',
    '中文含义': lambda x: (x != '待补充').sum()
}).rename(columns={'当前列名': '总列数', '中文含义': '已补充说明'})
stats['待补充'] = stats['总列数'] - stats['已补充说明']
print(stats)

# 按二级分类统计
print("\n" + "=" * 150)
print("按二级分类详细统计:")
print("=" * 150)
stats2 = column_df.groupby(['一级来源', '二级分类']).agg({
    '当前列名': 'count',
    '中文含义': lambda x: (x != '待补充').sum()
}).rename(columns={'当前列名': '总列数', '中文含义': '已补充说明'})
stats2['待补充'] = stats2['总列数'] - stats2['已补充说明']
print(stats2)

列名说明表格(从实际生成的数据文件中提取)

正在从最终生成的数据文件中提取列信息...
  - 读取 个股 数据: data/stock/full/600036.SH_indiv_full_macro.csv
    发现 283 列
  - 读取 ETF 数据: data/etf/full/510300.SH_indiv_full_macro.csv
    发现 215 列

共发现 373 个不同的列名

共整理 373 个列的说明
其中已补充说明 373 个，待补充 0 个

列名说明表格已保存到 data/column_description.csv

列名说明表格预览（前30行）:
                  当前列名   一级来源  二级分类                               tushare接口链接         字段名         中文含义                       英文含义           单位
            trade_date common  date                                      通用字段  trade_date         交易日期                 Trade Date 日期(YYYYMMDD)
indiv_adj_d_adj_factor  indiv adj_d https://tushare.pro/document/2?doc_id=199  adj_factor         复权因子          Adjustment Factor             
 indiv_chip_cost_15pct  indiv  chip https://tushare.pro/document/2?doc_id=293  cost_15pct       15分位成本       15th Percentile Cost            元
 indiv_chip_cost_50pct  indiv  chip https://tushare.pro/document/2?doc_id=293  cost_50pct       50分位成本                Median C