In [None]:
import rqdatac
import pandas as pd
import numpy as np
def generate_micro_factors(all_instruments: pd.DataFrame, start_date: str, end_date: str,factor_name:str) -> pd.DataFrame:


    """
    为一个资产池批量生成多种微观层面的量价因子。(使用 .join() 解决赋值时的AssertionError)
    """
    print("开始获取基础量价数据...")
    df = rqdatac.get_price(
        all_instruments['order_book_id'].tolist(),
        start_date, end_date, frequency='1d', fields=['open', 'high', 'low', 'close', 'volume']
    ).reset_index().set_index(['order_book_id', 'date'])

    print("数据获取完毕，开始计算因子...")

    # --- 准备工作 ---
    df['daily_return'] = df.groupby(level=0)['close'].pct_change()

    # --- 动量/反转 (这部分通常没问题，但为了统一也改了) ---
    print("正在计算动量因子...")
    for n in [5, 10, 20, 60, 90]:
        momentum_series = df.groupby(level=0)['close'].pct_change(n).rename(f'momentum_{n}d')
        df = df.join(momentum_series)

    # === 【关键修改：使用 .join() 进行赋值】 ===

    # --- 波动率 ---
    print("正在计算波动率因子...")
    for n in [20, 60, 90]:
        vol_series = df.groupby(level=0)['daily_return'].rolling(window=n).std().rename(f'volatility_{n}d') * np.sqrt(252)
        # rolling() 会产生一个额外的索引层级，需要先把它去掉再join
        df = df.join(vol_series.reset_index(level=0, drop=True))

    # --- 流动性 ---
    print("正在计算流动性因子...")
    for n in [20, 60, 90]:
        avg_vol_series = df.groupby(level=0)['volume'].rolling(window=n).mean().rename(f'avg_volume_{n}d')
        df = df.join(avg_vol_series.reset_index(level=0, drop=True))
        
    df['volume_shock'] = df['avg_volume_20d'] / df['avg_volume_60d'] - 1
    
    # --- 技术指标 ---
    print("正在计算技术指标 (RSI, MACD)...")
    
    # RSI 
    delta = df.groupby(level=0)['close'].diff()
    gain = delta.clip(lower=0).ewm(com=13, adjust=False).mean()
    loss = -delta.clip(upper=0).ewm(com=13, adjust=False).mean()
    df['rsi_14d'] = 100 - (100 / (1 + (gain / loss)))

    # MACD
    ewm_12d = df.groupby(level=0)['close'].ewm(span=12, adjust=False).mean()
    ewm_26d = df.groupby(level=0)['close'].ewm(span=26, adjust=False).mean()
    df = df.join(ewm_12d.rename('ewm_12d').reset_index(level=0, drop=True))
    df = df.join(ewm_26d.rename('ewm_26d').reset_index(level=0, drop=True))
    
    df['macd_line'] = df['ewm_12d'] - df['ewm_26d']
    
    signal_line = df.groupby(level=0)['macd_line'].ewm(span=9, adjust=False).mean()
    df = df.join(signal_line.rename('signal_line').reset_index(level=0, drop=True))
    
    df['macd'] = df['macd_line'] - df['signal_line']

    # --- 清理和返回 ---
    original_cols = ['open', 'high', 'low', 'close', 'volume', 'daily_return']
    temp_cols = ['ewm_12d', 'ewm_26d', 'macd_line', 'signal_line']
    factor_cols = [col for col in df.columns if col not in original_cols + temp_cols]
    
    print(f"因子计算完成，共生成 {len(factor_cols)} 个微观因子。")
    df = df[[factor_name]].dropna(how='all')
    o_df = df.rename(columns = {factor_name:'beta'}).reset_index()
    print('model2')
    return o_df

In [2]:
if __name__ == '__main__':
    import rqdatac
    import pandas as pd
    import statsmodels.api as sm
    from linearmodels.panel import FamaMacBeth
    import csv
    import numpy as np
    import import_ipynb
    import a_general_factor_test
    rqdatac.init()
    start_date = pd.to_datetime('2018-01-01')
    end_date = pd.to_datetime('2020-12-31')
    instrument_type = 'Convertible'
    factor_name = '第二产业增加值占GDP比重(现价)'
    remaining_time_to_mature ='短期 (1-3年)'
    rolling_window = 90
    factor_name = ['momentum_5d',  'momentum_10d',  'momentum_20d','momentum_60d',  'momentum_90d', 
                    'volatility_20d','volatility_60d','volatility_90d','avg_volume_20d','avg_volume_60d',
                    'avg_volume_90d','volume_shock','rsi_14d','macd']
    all_instruments = a_general_factor_test.get_bonds_poll(start_date, end_date, instrument_type)
    rolling_micro_factors = generate_micro_factors(all_instruments,start_date,end_date,factor_name[1])
    print(rolling_micro_factors)



testmodel2
可转债数量: 449
开始获取基础量价数据...
数据获取完毕，开始计算因子...
正在计算动量因子...
正在计算波动率因子...
正在计算流动性因子...
正在计算技术指标 (RSI, MACD)...
因子计算完成，共生成 14 个微观因子。
       order_book_id       date      beta
0        110030.XSHG 2018-01-16  0.053131
1        110030.XSHG 2018-01-17  0.046179
2        110030.XSHG 2018-01-18  0.045455
3        110030.XSHG 2018-01-19  0.005945
4        110030.XSHG 2018-01-22  0.001458
...              ...        ...       ...
130029   132022.XSHG 2020-12-25 -0.007113
130030   132022.XSHG 2020-12-28 -0.009901
130031   132022.XSHG 2020-12-29  0.011765
130032   132022.XSHG 2020-12-30  0.008317
130033   132022.XSHG 2020-12-31  0.000000

[130034 rows x 3 columns]
