In [1]:
import pandas as pd
import numpy as np

In [2]:
# 加载数据
from firefin.data.gateway import fetch_data
data = fetch_data(['market_cap', 'return_adj', 'pb_ratio','open','close','volume'])
rf = pd.read_feather(r'C:\Users\m1309\Downloads\bond_data\bond_data\us_bond_2y.feather')

[32m2025-08-26 22:15:32.652[0m | [1mINFO    [0m | [36mfirefin.common.config[0m:[36m<module>[0m:[36m37[0m - [1mNo additional JSON files found in DATA_PATH, load default DATA_MAPS.[0m
  pd.to_datetime(obj)
  pd.to_datetime(obj)


In [3]:
#把数据截取成想要的大小，并构造基本的指标如无风险利率，市净率等
mkt_cap = data['market_cap'].iloc[50:1070, :500]

ret_adj = data['return_adj'].iloc[50:1070, :500]

pb = data['pb_ratio'].iloc[50:1070, :500]

open_price = data['open'].iloc[50:1100, :500]

dates = mkt_cap.index
stock_code = mkt_cap.columns

bm = 1 / pb


#得到与 mkt_cap、ret_adj 索引一致的 risk_free_rate
if 'datetime' in rf.columns:
    rf['datetime'] = pd.to_datetime(rf['datetime'])
    rf = rf.set_index('datetime')
else:
    rf.index = pd.to_datetime(rf.index)
rf = rf['us_bond_2y']
rf.index = rf.index.normalize()
rf = rf.reindex(dates.normalize(), method='ffill') / 100
rf.index = dates
risk_free_rate = rf

#构造超额收益
excess_ret = ret_adj.sub(risk_free_rate, axis=0).fillna(0)

#构造动量信号
mom_signal = (data["close"] / data["close"].shift(21) -1).shift(1).iloc[50:1070, :500]

In [4]:
#用当期收益率ret_adj构造新的（学术）因子，比如这里就是直接用pb进行打分然后构造多空组合(这里收益统一用字典，key是0表示当天的收益，value是T * quantiles的DataFrame)
return_adjusted = {}
return_adjusted[0]= ret_adj
from firefin.core.algorithm.portfolio_sort import PortfolioSort
pb_quantile_ret = PortfolioSort.single_sort(factor=pb, forward_returns=return_adjusted, quantiles=5, market_cap = mkt_cap)


In [5]:
#提取H-L这一项，将它作为后续检验的开始
pb_HML = [pb_quantile_ret[0].iloc[:,1]]

In [6]:
# 学术因子的检测，本项目里面有基础的AcaEvaluatorModel用来做学术因子对于收益率的检验
from firefin.evaluation.academia.AcaEvaluatorModel import *
basic_test = AcaEvaluatorModel(factor_portfolio=pb_HML,return_adj=excess_ret,time_series_window=None,cov_type="HAC")

In [7]:
#对该因子进行时序回归，返回因子暴露、截距、残差以及各种统计量
basic_test.run_time_series_regression()

BatchRegressionResult(
 alpha:
    stock_code           000001.SZ  000002.SZ  000004.SZ  000005.SZ  000006.SZ  \
    datetime                                                                     
    2009-03-23 15:00:00        NaN        NaN        NaN        NaN        NaN   
    2009-03-24 15:00:00        NaN        NaN        NaN        NaN        NaN   
    2009-03-25 15:00:00        NaN        NaN        NaN        NaN        NaN   
    2009-03-26 15:00:00        NaN        NaN        NaN        NaN        NaN   
    2009-03-27 15:00:00        NaN        NaN        NaN        NaN        NaN   
    ...                        ...        ...        ...        ...        ...   
    2013-05-28 15:00:00        NaN        NaN        NaN        NaN        NaN   
    2013-05-29 15:00:00        NaN        NaN        NaN        NaN        NaN   
    2013-05-30 15:00:00        NaN        NaN        NaN        NaN        NaN   
    2013-05-31 15:00:00        NaN        NaN        NaN        NaN

In [8]:
#进行横截面回归，返回因子暴露在横截面上的性质，返回回归系数，回归截距，残差以及t统计量
basic_test.run_cross_sectional_regression()

BatchRegressionResult(
 alpha:
    datetime
    2009-03-23 15:00:00         NaN
    2009-03-24 15:00:00         NaN
    2009-03-25 15:00:00         NaN
    2009-03-26 15:00:00         NaN
    2009-03-27 15:00:00         NaN
                             ...   
    2013-05-28 15:00:00         NaN
    2013-05-29 15:00:00         NaN
    2013-05-30 15:00:00         NaN
    2013-05-31 15:00:00         NaN
    2013-06-03 15:00:00   -0.001552
    Name: alpha, Length: 1020, dtype: float64,
 alpha_t:
    datetime
    2009-03-23 15:00:00         NaN
    2009-03-24 15:00:00         NaN
    2009-03-25 15:00:00         NaN
    2009-03-26 15:00:00         NaN
    2009-03-27 15:00:00         NaN
                             ...   
    2013-05-28 15:00:00         NaN
    2013-05-29 15:00:00         NaN
    2013-05-30 15:00:00         NaN
    2013-05-31 15:00:00         NaN
    2013-06-03 15:00:00   -3.597405
    Name: alpha_t, Length: 1020, dtype: float64,
 beta:
                                0
    

In [9]:
#进行fama macbeth回归，得到横截面上的性质，返回回归系数，回归截距，t统计量
#basic_test.run_fama_macbeth_regression()

In [10]:
from firefin.evaluation.academia.AcaIndirectEvaluator import *
pb_HML_indirect_test = AcaIndirectEvaluator(factor_portfolio=pb_HML, return_adj=ret_adj, risk_free_rate=risk_free_rate,
                         stock_size=mkt_cap, stock_value=bm, mom_signal=mom_signal)

In [11]:
pb_HML_indirect_test.evaluate_stability(mode = "ff3_mom",window = 520)

PicklingError: Could not pickle the task to send it to the workers.

In [13]:
# 对于pb这个指标我们可以对它的每一个分位数的性质都做检验，先构造分位收益率
quantile_ret = [pb_quantile_ret[0][col] for col in pb_quantile_ret[0].columns]

In [14]:
# 学术因子的检测，本项目里面有基础的AcaEvaluatorModel用来做学术因子对于收益率的检验
from firefin.evaluation.academia.AcaIndirectEvaluator import *

indirect_test = AcaIndirectEvaluator(factor_portfolio=quantile_ret, return_adj=ret_adj, risk_free_rate=risk_free_rate,
                         stock_size=mkt_cap, stock_value=bm, mom_signal=mom_signal)


In [15]:
# 构造对于每一个分位数检验的LaTex表格
indirect_test.export_evaluation_table(mode="all")

  alpha_last = float(res.alpha.iloc[-1])
  alpha_t_last = float(res.alpha_t.iloc[-1])
  alpha_last = float(res.alpha.iloc[-1])
  alpha_t_last = float(res.alpha_t.iloc[-1])


'\\toprule\n\\begin{tabular}{l c c c c c c c c c c c}\nPortfolio & \\multicolumn{2}{c}{Excess Return} & \\multicolumn{3}{c}{CAPM} & \\multicolumn{6}{c}{4-Factor} \\\\\n\\cmidrule(lr){2-3}\\cmidrule(lr){4-6}\\cmidrule(lr){7-12}\n & mean excess\\_ret & std & alpha & MKT & Adj \\$R\\textasciicircum{}2\\$ & alpha & MKT & SMB & HML & MOM & Adj \\$R\\textasciicircum{}2\\$ \\\\\n\\midrule\n1 & 0.000 & 0.016 & \\begin{tabular}[t]{@{}r@{}}0.00\\\\(18.58)\\end{tabular} & \\begin{tabular}[t]{@{}r@{}}0.90\\\\(61.50)\\end{tabular} & 0.788 & \\begin{tabular}[t]{@{}r@{}}0.00\\\\(22.28)\\end{tabular} & \\begin{tabular}[t]{@{}r@{}}0.79\\\\(58.74)\\end{tabular} & \\begin{tabular}[t]{@{}r@{}}0.07\\\\(2.42)\\end{tabular} & \\begin{tabular}[t]{@{}r@{}}0.47\\\\(19.46)\\end{tabular} & \\begin{tabular}[t]{@{}r@{}}-0.05\\\\(-2.44)\\end{tabular} & 0.852 \\\\\n2 & 0.000 & 0.017 & \\begin{tabular}[t]{@{}r@{}}0.01\\\\(22.12)\\end{tabular} & \\begin{tabular}[t]{@{}r@{}}1.02\\\\(69.40)\\end{tabular} & 0.825 & \\begi

In [16]:
indirect_test.evaluate_by_other_factors(mode = "capm")

  alpha_last = float(res.alpha.iloc[-1])
  alpha_t_last = float(res.alpha_t.iloc[-1])


(        alpha       MKT
 1    0.004452  0.901350
 2    0.005303  1.017366
 3    0.006089  1.026336
 4    0.006456  1.025418
 5    0.006345  0.817211
 H-L  0.001894 -0.084139,
          alpha        MKT
 1    18.575077  61.500194
 2    22.121586  69.400880
 3    26.976343  74.349509
 4    20.979114  54.488446
 5    11.759440  24.763903
 H-L   3.134954  -2.277319,
 1      0.787720
 2      0.825349
 3      0.844329
 4      0.744419
 5      0.375326
 H-L    0.004091
 Name: r2_adj, dtype: float64)

In [17]:
indirect_test.cumulated_alpha(mode = "capm")

  alpha_i = float(res.alpha.iloc[-1])


KeyboardInterrupt: 

In [17]:
# 项目提供了非常简便构造经典学术因子的函数，例如fama french 3因子
from firefin.evaluation.academia.AcademicFactors import *
ff3 = bundle_ff3(stock_return = ret_adj,
    size = mkt_cap,
    book_to_market = bm,
    market_cap = mkt_cap,
    risk_free_rate = risk_free_rate)

In [18]:
# 本项目提供了独立的做横截面回归的函数，可以输出回归系数以及检验指标，不需要走AcaEvaluator的流程了
from firefin.core.algorithm.cross_sectional_regression import *
res=cross_sectional_regression(ff3, excess_ret,cov_type="HAC",window=50)

In [19]:
# 本项目提供了单独的fama macbeth回归，返回回归系数的值以及其检验统计量，不需要走AcaEvaluator的流程了
from firefin.core.algorithm.fama_macbeth import *
rr=FamaMacBeth.run_regression(ff3, excess_ret,window=50)

In [20]:
#本项目提供一个封装好的RollingRegressor类，可以设定滚动窗口，得到时序axis=0,或截面axis=1的回归结果以及数据的检验统计量。
window = 120
ff3_factor = np.stack([f.values.reshape(-1, 1) for f in ff3], axis=0)
maxlag=int(4*(window/100)**(2/9))
from firefin.core.algorithm.regression import *
result = RollingRegressor(
    x = ff3_factor,
    y = excess_ret,
).fit(window=120,cov_type="HAC",cov_kwds={"maxlags": maxlag})


In [21]:
# newey west t 检验，检验回归系数是否显著为0
from firefin.core.algorithm.newey_west_t_statistics import NeweyWestTest
NeweyWestTest.newey_west_t_test(result, ff3)

  t_values = beta_vec / se


{Timestamp('2009-09-10 15:00:00'):                alpha        MKT       SMB       HML
 stock_code                                         
 000001.SZ  -0.895461   2.897711 -2.050069  2.481808
 000002.SZ   0.845829  15.051843 -2.251386  0.594320
 000004.SZ   0.355078  12.324952  3.179499 -3.082861
 000005.SZ   0.303535  13.293356  2.829177  1.538699
 000006.SZ   0.723038   9.490948 -1.951400 -0.654409
 ...              ...        ...       ...       ...
 000607.SZ   0.982777   6.066825  2.506039  1.566584
 000608.SZ   2.893915  11.088579  0.948515 -0.011187
 000609.SZ   0.119196  13.883319  0.463784 -0.820870
 000610.SZ   0.625582  11.052945  4.255177 -1.182811
 000611.SZ   0.661991  13.183465  3.126527  3.751737
 
 [200 rows x 4 columns],
 Timestamp('2009-09-11 15:00:00'):                alpha        MKT       SMB       HML
 stock_code                                         
 000001.SZ  -0.664598   3.323409 -2.146012  1.638091
 000002.SZ   0.570266  15.038735 -2.203301  0.922166
 000