In [1]:
import pandas as pd 
import numpy as np
import pickle
from tqdm import *
from scipy.stats import pearsonr
from scipy.stats import spearmanr
from scipy import stats
import statsmodels.api as sm
import os

#时间
import datetime

# 米筐
import rqdatac as rq
from rqdatac import *
init(timeout=300)
from rqfactor import *
from rqfactor_utils.universe_filter import *
from rqfactor import *
from rqfactor.extension import rolling_window, CombinedRollingWindowFactor, CombinedFactor, UserDefinedLeafFactor,UnaryCrossSectionalFactor
from rqfactor import CS_REGRESSION_RESIDUAL,MA,STD,PCT_CHANGE,REF,LOG,RANK,IF,ABS,TS_FILLNA,TS_ZSCORE,SUM,DELTA,TS_MAX,CS_ZSCORE,QUANTILE
from rqfactor_utils import *
# 关闭通知
import warnings
warnings.filterwarnings("ignore")
import logging
logging.getLogger().setLevel(logging.ERROR)

import seaborn as sns
import matplotlib.pyplot as plt
plt.style.use('default')
plt.rcParams['figure.facecolor'] = 'white'
plt.rcParams['font.sans-serif']=['SimHei']
plt.rcParams['axes.unicode_minus']=False
from nn_factor_analysis import *

In [2]:
def _fn(factor):
    return TS_FILLNA(factor, 0)

def f_sum(factors):
    v = _fn(factors[0])
    for f in factors[1:]:
        v += _fn(f)
    return v

def f_mean(factors):
    return f_sum(factors) / len(factors)


def f_std(factors):
    mean = f_mean(factors)
    return f_mean([(f - mean)**2 for f in factors]) ** 0.5
  
def f_z(factors):
    t0 = factors[0]
    mean = f_mean(factors)
    std = f_std(factors)
    return (t0 - mean)/std

def prev_deal(ftr_name, i=0):
    return Factor(ftr_name+f"_mrq_{i}")

def get_yoy(ftr_name):
    return prev_deal(ftr_name) / ABS(prev_deal(ftr_name, 4)) - 1

def get_ratio_ftr(name1, name2, i):
    return prev_deal(name1, i) / ABS(prev_deal(name2, i))

def get_ema(factors, decay):
    v = _fn(factors[0])
    for f in factors[1:]:
        v = _fn(f) + decay * v
    return v / len(factors)

def get_ema_cs(factors, decay):
    v = CS_ZSCORE(_fn(factors[0]))
    for f in factors[1:]:
        v = CS_ZSCORE(_fn(f)) + decay * v
    return v / len(factors)

In [3]:
def INDEX_FIX(start_date = '2016-02-01',end_date = '2023-08-01',index_item = '000906.XSHG'):
    """
    :param start_date: 开始日 -> str
    :param end_date: 结束日 -> str 
    :param index_item: 指数代码 -> str 
    :return index_fix: 动态因子值 -> unstack
    """
    
    index = pd.DataFrame(dict([(k, pd.Series(v)) for k, v in index_components(index_item,start_date= start_date,end_date=end_date).items()])).T

    # 构建动态股票池 
    index_fix = index.unstack().reset_index().iloc[:,-2:]
    index_fix.columns = ['date','stock']
    index_fix.date = pd.to_datetime(index_fix.date)
    index_fix['level'] = True
    index_fix.dropna(inplace = True)
    index_fix = index_fix.set_index(['date','stock']).level.unstack()
    index_fix.fillna(False,inplace = True)
    stock_list = index_fix.columns.tolist()

    return index_fix, stock_list
stk_names = INDEX_FIX(start_date="2019-01-01", end_date="2024-03-30")[1]
start_date="2019-01-01"
end_date="2024-03-30"

In [4]:
cfoa_div_profit_list = []
for i in range(12):
    cfoa_div_profit_list.append(CS_ZSCORE(get_ratio_ftr("cash_flow_from_operating_activities", "net_profit", i)))

expense_ftr = f_sum([prev_deal(j) for j in ["selling_expense","ga_expense","financing_expense"]])
expense_div_op = expense_ftr / ABS(prev_deal("operating_revenue")) * CS_ZSCORE(prev_deal("gross_profit"))

cfoa_div_operating_revenue = prev_deal("cash_flow_from_operating_activities") / prev_deal("operating_revenue")
cfoa_div_profit_mean = f_mean(cfoa_div_profit_list)
short_debt_div_op = (TS_FILLNA(Factor("short_term_loans"),0) + TS_FILLNA(Factor("short_term_debt"),0)) / TS_FILLNA(Factor("operating_revenue"),0)

In [5]:
expense_div_op_ftr = execute_factor((expense_div_op), stk_names, start_date, end_date)
cfoa_div_operating_revenue_ftr = execute_factor((cfoa_div_operating_revenue), stk_names, start_date, end_date)
cfoa_div_profit_mean_ftr = execute_factor((cfoa_div_profit_mean), stk_names, start_date, end_date)
short_debt_div_op_ftr = execute_factor((short_debt_div_op), stk_names, start_date, end_date)

In [6]:
expense_div_op_ftr.to_parquet("./ftr_data/expense_div_op_ftr.parquet")
cfoa_div_operating_revenue_ftr.to_parquet("./ftr_data/cfoa_div_operating_revenue_ftr.parquet")
cfoa_div_profit_mean_ftr.to_parquet("./ftr_data/cfoa_div_profit_mean_ftr.parquet")
short_debt_div_op_ftr.to_parquet("./ftr_data/short_debt_div_op_ftr.parquet")