# 因子填充

## 导入模块

In [1]:
import numpy as np
import pandas as pd
import feather
import sunlandsdatasdk as sd
import os

## 读入交易日和公司列表

In [2]:
price_1d = feather.read_dataframe('../data/StockPriceK1d_20240630.feather')
start_date = '2019-01-01'
end_date = '2024-06-30'
price_1d = price_1d[(price_1d['date'] >= start_date) & (price_1d['date'] <= end_date)]
price_1d = (
    price_1d
        .sort_values(['date', 'issue'])
        .set_index(['date', 'issue'])
)
price_1d

Unnamed: 0_level_0,Unnamed: 1_level_0,preclose,open,high,low,close,numTrades,volume,value,adj,ret,is_limit_buy,is_limit_sell
date,issue,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2019-01-02,000001,9.38,9.39,9.42,9.16,9.19,25140,53938632,4.986951e+08,108.031388,-0.020256,0,0
2019-01-02,000002,23.82,23.83,24.09,23.67,23.90,26541,24701028,5.893846e+08,142.667999,0.003359,0,0
2019-01-02,000004,16.03,16.05,16.24,16.01,16.06,241,142400,2.290041e+06,4.063862,0.001871,0,0
2019-01-02,000005,2.68,2.69,2.70,2.66,2.67,1187,2909600,7.788443e+06,9.267603,-0.003731,0,0
2019-01-02,000006,5.18,5.18,5.25,5.10,5.15,2643,6322964,3.273364e+07,34.226151,-0.005792,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-06-28,688799,42.14,42.18,43.16,41.75,42.43,1473,502586,2.143038e+07,1.046135,0.006882,0,0
2024-06-28,688800,27.17,27.32,28.99,26.81,28.08,7151,2362741,6.678026e+07,1.419317,0.033493,0,0
2024-06-28,688819,23.85,23.83,24.28,23.83,23.86,3652,1014134,2.441640e+07,1.076469,0.000419,0,0
2024-06-28,688981,45.71,45.52,46.42,45.51,46.10,39878,21801968,1.004720e+09,1.000000,0.008532,0,0


## 因子填充

### 因子填充函数

In [3]:
def factor_filling(factor):
    factor_fill = (
        factor
            .sort_values(['date', 'issue'])
            .set_index(['date', 'issue'])
    )
    factor_fill = factor_fill.reindex(index=price_1d.index)
    factor_fill = factor_fill.groupby('issue').ffill()
    factor_fill = factor_fill.reset_index()
    return factor_fill

In [4]:
dirs = ['../data/N_connect_0_1/', '../data/N_connect_1_both/', '../data/ret_jump/']
for dirname in dirs:
    for filename in os.listdir(dirname):
        if not filename.startswith('fill_'):
            factor = feather.read_dataframe(dirname + '/' + filename)
            factor_fill = factor_filling(factor)
            feather.write_dataframe(factor_fill, dirname + '/fill_' + filename)

## 检查

In [5]:
ret_jump = feather.read_dataframe('../data/ret_jump/fill_neutral_ret_jump.feather')
ret_jump[(ret_jump['issue'] == '000001') & (ret_jump['date'] >= '2019-01-30')]

Unnamed: 0,date,issue,level_1,ret_jump,indus_factor,neutral_factor
71466,2019-01-30,000001,,,,
75048,2019-01-31,000001,0.0,0.066792,0.042402,0.013745
78630,2019-02-01,000001,0.0,0.066792,0.042402,0.013745
82213,2019-02-11,000001,0.0,0.066792,0.042402,0.013745
85796,2019-02-12,000001,0.0,0.066792,0.042402,0.013745
...,...,...,...,...,...,...
5839312,2024-06-24,000001,0.0,0.014435,0.006274,0.034035
5844428,2024-06-25,000001,0.0,0.014435,0.006274,0.034035
5849544,2024-06-26,000001,0.0,0.014435,0.006274,0.034035
5854661,2024-06-27,000001,0.0,0.014435,0.006274,0.034035
