In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import sys
import os

PATH = os.getcwd()
for _ in range(2):
    PATH = os.path.dirname(PATH)
if PATH not in sys.path:
    sys.path.append(PATH)

In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings

from FactorAnalysis.week1.helpers.data_processing import read_from_db, read_table_names
from FactorAnalysis.week2.helpers.information_analysis import (
    factor_analysis_all,
    read_in_weights,
    combine_factors
)

In [4]:
sns.set_theme()
warnings.filterwarnings("ignore")

## Load Factors and Prices

In [5]:
start_date = "20190101"
db_factors = "data/factors.db"

factors = read_from_db("factors_all_stocks", db_factors)

In [6]:
factors

Unnamed: 0_level_0,Unnamed: 1_level_0,total_mv,pe,pb,turnover_rate,reversal_rate,volatility,roe,netprofit_yoy,or_yoy,assets_yoy,equity_yoy,gross_profit_margin,operating_profit_margin,net_profit_margin,operating_cash_flow_to_net_income,operating_cash_flow_to_revenue,current_ratio,cash_current_liability_ratio,cash_liability_ratio,long_term_liability_operating_cash_flow_ratio
trade_date,ts_code,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2019-01-02,000001.SZ,0.083321,-0.325763,-0.798117,-0.304775,-0.220369,-0.228010,-0.114297,-0.176164,-0.229718,-0.195412,-0.480238,-0.452793,-0.459658,-0.418976,-0.339621,-0.377077,-0.193252,-0.256768,-0.355400,-0.193091
2019-01-02,000002.SZ,0.083321,-0.206204,-0.480028,-0.135223,-0.212351,-0.267856,0.082697,-0.013247,-0.157485,0.395986,-0.234699,0.000424,-0.013598,-0.093235,-0.266039,-0.316678,-0.320439,-0.270601,-0.276109,-0.303068
2019-01-02,000006.SZ,0.702998,0.950617,0.348110,0.968434,0.935827,0.895779,0.971955,0.924936,1.020208,0.952265,0.835304,0.935789,0.927483,0.917090,0.949519,0.921057,1.088554,0.719391,0.651322,0.955307
2019-01-02,000008.SZ,0.581733,2.102146,1.447103,1.740295,1.717541,1.623287,1.257846,1.665371,1.823129,1.882059,1.221329,0.970596,0.962309,0.960389,1.601891,1.721343,0.998632,1.205718,1.141040,1.788690
2019-01-02,000009.SZ,-1.733993,-1.530981,-0.199528,-0.993448,-1.231527,-1.224059,-1.038154,-1.369969,-1.496202,-1.467010,-0.804477,-1.445616,-1.485266,-0.988157,-0.492675,-0.183347,-1.768321,-0.446984,-0.285851,-1.502775
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-12-29,688777.SH,-0.222965,-0.200384,-0.401124,-0.377340,-0.401381,-0.601752,-0.462780,-0.513021,-0.516439,-0.453111,-0.517085,-0.501424,-0.498649,-0.493062,-0.430655,-0.424723,-0.018425,-0.376723,-0.374207,-0.414121
2023-12-29,688778.SH,-0.771222,-0.461310,-0.824449,-0.793190,-1.053647,-0.790082,-0.731853,-0.252151,-0.394839,-0.397358,-0.006730,-0.377000,-0.384726,-0.243073,-0.630119,-0.758501,-0.535848,-0.354148,-0.432185,-1.037556
2023-12-29,688779.SH,-2.473606,-1.008549,-0.815579,-2.495824,-2.495088,-2.391307,-1.870585,-1.014213,-1.034186,-1.091583,-1.868308,-1.058373,-1.072313,-1.065926,-2.389988,-2.209301,-2.016050,-2.214228,-2.421893,-2.172181
2023-12-29,688819.SH,-0.702957,-0.805802,-0.532022,-0.969003,-0.932444,-0.917142,-1.146047,-1.004502,-1.003178,-1.048111,-0.887566,-1.157365,-1.153338,-1.143297,-0.895822,-0.967259,-0.610897,-0.943926,-0.938920,-0.987390


## Combine Factors

In [7]:
weights_df = read_in_weights()
weights_df

Unnamed: 0,total_mv,pe,pb,turnover_rate,reversal_rate,volatility,roe,netprofit_yoy,or_yoy,assets_yoy,equity_yoy,gross_profit_margin,operating_profit_margin,net_profit_margin,operating_cash_flow_to_net_income,operating_cash_flow_to_revenue,current_ratio,cash_current_liability_ratio,cash_liability_ratio,long_term_liability_operating_cash_flow_ratio
IC_10D,0.009776,-0.051812,-0.044281,-0.051301,-0.050766,-0.05328,-0.051306,-0.059359,-0.057962,-0.049469,-0.052405,-0.054424,-0.054296,-0.054258,-0.050817,-0.051871,-0.051079,-0.050721,-0.05063,-0.050187
IC_1D,-0.034544,0.01596,-0.027533,-0.001884,0.01393,0.031507,0.064962,0.042942,0.050698,0.063379,0.075235,0.072201,0.072734,0.072649,0.056136,0.057112,0.073284,0.057406,0.058035,0.057869
IC_5D,0.009992,-0.053345,-0.047652,-0.053597,-0.051911,-0.054109,-0.050562,-0.059921,-0.05797,-0.049046,-0.051495,-0.053387,-0.053221,-0.05323,-0.050254,-0.051198,-0.049645,-0.05,-0.049914,-0.049551
IC_rank_10D,0.00284,-0.052204,-0.037167,-0.050214,-0.051809,-0.050973,-0.051992,-0.059046,-0.057271,-0.050459,-0.054318,-0.057049,-0.056888,-0.056735,-0.052073,-0.052747,-0.052716,-0.051121,-0.051106,-0.051274
IC_rank_1D,-0.054004,-0.01514,-0.037045,-0.037198,-0.048657,0.056118,0.058804,0.026329,0.039092,0.056153,0.07373,0.062461,0.06285,0.064868,0.045737,0.045888,0.077475,0.046603,0.047204,0.044642
IC_rank_5D,0.000176,-0.053822,-0.038029,-0.051686,-0.053875,-0.05005,-0.0516,-0.060027,-0.057734,-0.050333,-0.054324,-0.056734,-0.056615,-0.056413,-0.051807,-0.052404,-0.051754,-0.050641,-0.050703,-0.051272
IR_1D,-0.047885,0.017805,-0.035626,-0.001742,0.012235,0.031099,0.061497,0.043727,0.051684,0.061734,0.079614,0.071372,0.071881,0.072245,0.053052,0.054026,0.071479,0.05405,0.05471,0.052537
IR_5D,0.011885,-0.060314,-0.060179,-0.050225,-0.046205,-0.053442,-0.047926,-0.060683,-0.058947,-0.048613,-0.055147,-0.053312,-0.05318,-0.053489,-0.047802,-0.048779,-0.049213,-0.047765,-0.047717,-0.045177
IR_10D,0.00969,-0.059052,-0.055229,-0.048546,-0.045835,-0.053469,-0.047651,-0.060908,-0.058582,-0.048174,-0.056067,-0.054877,-0.05482,-0.054963,-0.048457,-0.049916,-0.050117,-0.048746,-0.048626,-0.046273
IR_rank_1D,-0.079035,-0.016747,-0.046068,-0.033551,-0.042234,0.053487,0.055259,0.025996,0.038317,0.053777,0.076778,0.062761,0.063126,0.065431,0.042381,0.042628,0.075782,0.042941,0.043555,0.040147


In [8]:
db_out_filename = "data/combined_factors.db"
combined_factors = combine_factors(factors, weights_df, db_out_filename)
combined_factors

Unnamed: 0_level_0,Unnamed: 1_level_0,IC_10D,IC_1D,IC_5D,IC_rank_10D,IC_rank_1D,IC_rank_5D,IR_1D,IR_5D,IR_10D,IR_rank_1D,IR_rank_5D,IR_rank_10D
trade_date,ts_code,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2019-01-02,000001.SZ,0.316205,-0.268799,0.317371,0.315399,-0.196979,0.316223,-0.258811,0.325115,0.323956,-0.188321,0.323839,0.322782
2019-01-02,000002.SZ,0.158128,-0.113168,0.159362,0.155529,-0.077551,0.155852,-0.106590,0.162463,0.161536,-0.072177,0.158095,0.157922
2019-01-02,000006.SZ,-0.877584,0.820225,-0.875525,-0.892974,0.593089,-0.897035,0.789079,-0.865639,-0.871567,0.557461,-0.892046,-0.887210
2019-01-02,000008.SZ,-1.444015,1.212432,-1.448062,-1.453559,0.813689,-1.462231,1.163067,-1.444954,-1.445167,0.760419,-1.460244,-1.451426
2019-01-02,000009.SZ,1.036315,-0.949642,1.033794,1.062858,-0.662007,1.071049,-0.906732,1.024029,1.033029,-0.603039,1.071496,1.061855
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-12-29,688777.SH,0.416214,-0.379571,0.416015,0.420395,-0.283712,0.421933,-0.365246,0.413921,0.415758,-0.267945,0.420455,0.418707
2023-12-29,688778.SH,0.529804,-0.396453,0.532321,0.534836,-0.224171,0.539466,-0.361628,0.523818,0.523908,-0.187682,0.532140,0.527770
2023-12-29,688779.SH,1.670957,-1.474135,1.668533,1.702817,-1.001081,1.712639,-1.386152,1.628355,1.641645,-0.900213,1.680079,1.670020
2023-12-29,688819.SH,0.938793,-0.882261,0.936565,0.954246,-0.641553,0.958226,-0.847408,0.926697,0.933033,-0.602803,0.952799,0.948213


In [9]:
combined_factors_reloaded = read_from_db("combined_factors", db_out_filename)
combined_factors_reloaded

Unnamed: 0_level_0,Unnamed: 1_level_0,IC_10D,IC_1D,IC_5D,IC_rank_10D,IC_rank_1D,IC_rank_5D,IR_1D,IR_5D,IR_10D,IR_rank_1D,IR_rank_5D,IR_rank_10D
trade_date,ts_code,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2019-01-02,000001.SZ,0.316205,-0.268799,0.317371,0.315399,-0.196979,0.316223,-0.258811,0.325115,0.323956,-0.188321,0.323839,0.322782
2019-01-02,000002.SZ,0.158128,-0.113168,0.159362,0.155529,-0.077551,0.155852,-0.106590,0.162463,0.161536,-0.072177,0.158095,0.157922
2019-01-02,000006.SZ,-0.877584,0.820225,-0.875525,-0.892974,0.593089,-0.897035,0.789079,-0.865639,-0.871567,0.557461,-0.892046,-0.887210
2019-01-02,000008.SZ,-1.444015,1.212432,-1.448062,-1.453559,0.813689,-1.462231,1.163067,-1.444954,-1.445167,0.760419,-1.460244,-1.451426
2019-01-02,000009.SZ,1.036315,-0.949642,1.033794,1.062858,-0.662007,1.071049,-0.906732,1.024029,1.033029,-0.603039,1.071496,1.061855
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-12-29,688777.SH,0.416214,-0.379571,0.416015,0.420395,-0.283712,0.421933,-0.365246,0.413921,0.415758,-0.267945,0.420455,0.418707
2023-12-29,688778.SH,0.529804,-0.396453,0.532321,0.534836,-0.224171,0.539466,-0.361628,0.523818,0.523908,-0.187682,0.532140,0.527770
2023-12-29,688779.SH,1.670957,-1.474135,1.668533,1.702817,-1.001081,1.712639,-1.386152,1.628355,1.641645,-0.900213,1.680079,1.670020
2023-12-29,688819.SH,0.938793,-0.882261,0.936565,0.954246,-0.641553,0.958226,-0.847408,0.926697,0.933033,-0.602803,0.952799,0.948213
