In [2]:


import rqdatac
import pandas as pd
import statsmodels.api as sm
from linearmodels.panel import FamaMacBeth
import csv
import numpy as np


## Selecting Valied Bond

In [3]:
def get_bonds_poll(start_date:str,end_date:str,instrument_type:str) -> pd.DataFrame:
    '''
    最基本的筛选时间内所有股票基本信息的函数
    返回： 一个带有 order book id，delisted date等等的 data frame
     '''
    print('testmodel2')
    all_instruments = rqdatac.all_instruments(instrument_type)
    all_instruments['listed_date'] = pd.to_datetime(all_instruments['listed_date'], errors='coerce')
    all_instruments['de_listed_date'] = pd.to_datetime(all_instruments['de_listed_date'], errors='coerce')
    start_date = pd.to_datetime(start_date)
    end_date = pd.to_datetime(end_date)
    mask = (all_instruments['listed_date'] <= end_date) & (all_instruments['de_listed_date'].isna() | (all_instruments['de_listed_date'] >= start_date))
    all_instruments = all_instruments.loc[mask]
    print("可转债数量:", len(all_instruments['order_book_id']))
    return all_instruments
# all_instruments = get_bonds_poll(start_date, end_date, instrument_type).sample(10) # 这里一定记得删除！！！
# print(all_instruments.info())


In [4]:
def get_return_rate(all_instruments, start_date, end_date):
    '''
    输入所有可用的 order book id
    起止时间
    
    输出：一个以order book id 和 date 为index，  return 30 ,daily return 为column的 data frame
    至此dependent已处理完毕
    '''
    all_prices = rqdatac.get_price(all_instruments['order_book_id'], start_date, end_date + pd.DateOffset(days=23)).reset_index() 
    # 扩展时间范围
    all_prices['date'] = pd.to_datetime(all_prices['date'])
    all_prices.set_index(['order_book_id', 'date'], inplace=True)

    all_prices['future_price'] = all_prices.groupby(level='order_book_id')['close'].shift(-22)
    all_prices['return_30'] = (all_prices['future_price'] - all_prices['close']) / all_prices['close']
    all_prices['daily_return'] = all_prices.groupby('order_book_id')['close'].pct_change()

    dependent = all_prices[['return_30','daily_return']].dropna()
    return dependent
    
# dependent = get_return_rate(all_instruments,start_date,end_date)
# print(dependent)


In [5]:
def econ_get_factor_value_forward_fill(factor_name,start_date,end_date):# 这里什么日期当作要查看的日期
    '''这是用econ get factor 向前填充得到的因子值
    输入： factor name
    输出：
    index为发行日期‘date’的数据
    column是向前填充的标准化之后的因子值 注意：一定将因子值的名称改为‘value’

    '''
    print(f'testing: {factor_name}')
    factor_df = rqdatac.econ.get_factors(factor_name, start_date, end_date).reset_index()
    factor_df['date'] = factor_df['info_date']
    factor_df = factor_df[['date', 'value']]
    # 还是前视偏差，不能在看到未来值的情况下做标准化 factor_df['value'] = (factor_df['value']-factor_df['value'].mean()) /factor_df['value'].std()
    factor_df.set_index('date', inplace=True)
    date_range = pd.date_range(start_date, end_date, freq='d')
    factor_df = factor_df.reindex(date_range, method='ffill').dropna()
    factor_df.index.names = ['date']
    return factor_df
# independent = econ_get_factor_value_forward_fill(factor_name,start_date,end_date)
# print(independent)


In [6]:
def prepare_time_regression_df (dependent, independent):
    '''
    将因子值与收益合并
    使得因子值在每个时间，每只股票上都有对齐
    
    输入：一个 order book id， date为index的大收益表
    一个 date 为index的收益表'''
    factor_df = pd.merge(
            dependent.reset_index(), 
            independent.reset_index(), # reset_index() 让 'date' 变回普通列以便合并
            on='date', 
            how='left'
        )
    factor_df = factor_df.dropna(subset=['value'])
    factor_df['value'] = (factor_df['value'] - factor_df['value'].mean()) / factor_df['value'].std()
    return factor_df

# time_regression_df = prepare_time_regression_df(dependent,independent)

In [7]:
def roll_data_and_regress(time_regression_df,rolling_window):
        '''
        输入： column 有order book id 和date 和daily return 和value的大表
        创建滚动窗并在滚动窗内计算beta值
        输出 column 有 （order book id 和 date ）和每股每天因子值的data frame
        
        
        '''
        results_list = []
        window = rolling_window
        for cb, group in time_regression_df.groupby('order_book_id'):
            if len(group) < window:
                # print(f"Skipping {cb}: Not enough data for a full window ({len(group)})")
                continue

            # Inner loop calculates the rolling beta for the current stock
            for i in range(window, len(group) + 1):
                # Get the current window of data
                df_window = group.iloc[i-window:i]

                if df_window.isnull().values.any():
                    continue
                    
                # Perform the OLS regression
                y = df_window['daily_return']
                X = sm.add_constant(df_window['value'])
                model_fit = sm.OLS(y, X).fit()
                beta_value = model_fit.params.get('value')
                
                # 2. Get the end date of the current window
                # The date is the first level of the group's index
                end_date = df_window['date'].iloc[-1]
                
                # 3. Append a dictionary with ALL info to the list
                results_list.append({
                    'order_book_id': cb,
                    'date': end_date,
                    'beta': beta_value  # Use the factor_name as the column title
                })

        # 4. After all loops are done, create the DataFrame from the list of dictionaries
        rolling_betas = pd.DataFrame(results_list)
        return rolling_betas
# rolling_betas = roll_data_and_regress(time_regression_df, rolling_window)
# print(rolling_betas.info())

In [8]:
def add_maturity_group_tag(rolling_betas,all_instruments):
    '''输入一个正常的rolling beta的beta 双index 列表，和all——instrument 提供delisted date
        返回一个添加了‘maturity——group——tag的’ rolling beta
    '''

    maturity_lists = all_instruments[['order_book_id','de_listed_date','maturity_date']].copy()
    rolling_betas = pd.merge(rolling_betas,maturity_lists,on = 'order_book_id',how = 'left')
    rolling_betas['effective_end_date'] = rolling_betas['de_listed_date'].combine_first(rolling_betas['maturity_date'])
    rolling_betas['remaining_maturity'] = (rolling_betas['effective_end_date'] - rolling_betas['date']).dt.days / 365.25


    bins = [0, 3, 5, 100]  # 100可以看作是一个足够大的数，代表无穷远
    labels = ['短期 (1-3年)', '中期 (3-5年)', '长期 (5+年)']
    rolling_betas['maturity_group_tag'] = pd.cut(rolling_betas['remaining_maturity'], bins=bins, labels=labels, right=False)

    return rolling_betas[['date','order_book_id','beta','maturity_group_tag']]

# rolling_betas_with_timegroup_tag = add_maturity_group_tag(rolling_betas,all_instruments)
# print(rolling_betas_with_timegroup_tag.sample(50))


In [9]:
remaining_time_to_mature ='短期 (1-3年)'
def select_wanted_rolling_data(rolling_betas_with_timegroup_tag,remaining_time_to_mature):
    '''选出目标债'''
    return rolling_betas_with_timegroup_tag.loc[rolling_betas_with_timegroup_tag['maturity_group_tag'] == remaining_time_to_mature]
# rolling_betas = select_wanted_rolling_data(rolling_betas_with_timegroup_tag,remaining_time_to_mature)
# print(rolling_betas)

In [None]:
def do_fama_macbeth(rolling_betas,dependent):
    exog = rolling_betas.set_index(['order_book_id','date'],inplace=False)
    dependent = dependent[['return_30']]

    exog = exog[['beta']]

    common_index = dependent.index.intersection(exog.index)

    dependent = dependent.loc[common_index]
    exog = sm.add_constant(exog.loc[common_index])
    try:
                model = FamaMacBeth(dependent, exog)
                results = model.fit()
                print(results)
                return results


                    
                    
    except ValueError as e:
        print("错误:", e)
        print("每个 order_book_id 的 exog 观测数:", exog.groupby(level='order_book_id').size())
        return None
# result = do_fama_macbeth(rolling_betas,dependent)


In [None]:
def generate_file_path(folder_name):
    import os
    os.makedirs(folder_name,exist_ok=True)
    success_filepath = os.path.join(folder_name,f'Result_file.csv')
    dropped_filepath = os.path.join(folder_name,f'Result_file_dropped.csv')
    error_filepath = os.path.join(folder_name,f'Result_file_error.csv')
    return success_filepath,dropped_filepath,error_filepath


In [None]:
def store_value_into_file(results,factor_name,success_filepath,dropped_filepath,error_filepath):
    if results != None:

        p_value = results.pvalues['beta']
        t_stat = results.tstats['beta']

        if p_value < 0.07:
            data = f'factor_name:  {factor_name}    p_value: {p_value}  t_value:{t_stat}\n'

            with open(success_filepath,'a') as file:
                file.write(data)
        else:
            data = f'factor_name:  {factor_name}    p_value: {p_value}  t_value:{t_stat}\n'

            with open(dropped_filepath,'a') as file:
                file.write(data)
    else:
            data = f'factor_name:  {factor_name}   Fama_error\n'
            with open(error_filepath,'a') as file:
                file.write(data)
    return



In [None]:
def main_test_for_econ_get_factors(start_date,end_date,instrument_type,factor_name,remaining_time_to_mature,rolling_window,folder_name):
    import os
    success_file_path,dropped_file_path,error_filepath = generate_file_path(folder_name=folder_name)
    try:
        
        all_instruments = get_bonds_poll(start_date, end_date, instrument_type)
        dependent = get_return_rate(all_instruments,start_date,end_date)
        independent = econ_get_factor_value_forward_fill(factor_name,start_date,end_date)
        time_regression_df = prepare_time_regression_df(dependent,independent)
        rolling_betas = roll_data_and_regress(time_regression_df, rolling_window)
        #cb only
        rolling_betas_with_timegroup_tag = add_maturity_group_tag(rolling_betas,all_instruments)
        rolling_betas = select_wanted_rolling_data(rolling_betas_with_timegroup_tag,remaining_time_to_mature)
        result = do_fama_macbeth(rolling_betas,dependent)
        store_value_into_file(results=result,factor_name=factor_name,success_filepath=success_file_path,dropped_filepath=dropped_file_path,error_filepath=error_filepath)
        
    except Exception as e:
        data = f'factor_name:  {factor_name}    outside error\n'
        with open(error_filepath,'a') as file:
                        file.write(data)
        print(e)
'''start_date = pd.to_datetime('2018-01-01')
end_date = pd.to_datetime('2020-12-31')
instrument_type = 'Convertible'
factor_name = '第二产业增加值占GDP比重(现价)'
remaining_time_to_mature ='短期 (1-3年)'
rolling_window = 90'''


"start_date = pd.to_datetime('2018-01-01')\nend_date = pd.to_datetime('2020-12-31')\ninstrument_type = 'Convertible'\nfactor_name = '第二产业增加值占GDP比重(现价)'\nremaining_time_to_mature ='短期 (1-3年)'\nrolling_window = 90"