In [None]:
import jqfactor_analyzer as fa
import numpy as np 
import pandas as pd 
import random
from matplotlib import pyplot as plt
import def_factor_repo as factor_repo
import warnings
warnings.filterwarnings('ignore')
idx = pd.IndexSlice

In [None]:
def get_stk_list(date_bar):
    total_stk_list = list(date_bar.index.get_level_values(0).unique())
    index_codes_list = [idx_code  for idx_code in total_stk_list if (idx_code<'sh600000')|(idx_code>'sz399000')]
    stks_list = [security  for security in total_stk_list if security not in index_codes_list]
    return stks_list

def get_prepare_data(date_bar,fields,stk_list,del_paused=False,every_date_stk_nums=None,start_date=None,end_date=None)->pd.Series:
    data = date_bar.copy()
    data.index.names = ['stk','date']
    
    
    if stk_list:
        data = data.loc[idx[stk_list,:],:]
        
    if every_date_stk_nums:
        if del_paused:
            data = data[data['paused']==0]
    
        data = data.groupby('date',group_keys=False).apply(lambda s:s.sample(every_date_stk_nums) if len(s)>every_date_stk_nums else s)
        data.sort_index(inplace=True)
        
    if start_date:
        data = data.loc[idx[:,start_date:],:]
    if end_date:
        data = data.loc[idx[:,:end_date],:]
    if start_date and end_date:
        data = data.loc[idx[:,start_date:end_date],:]

    data = data.loc[:,fields]
    return data


In [None]:
def get_prepare_data_of_analyze(prepare_price_series,prepare_factor_df):
    import datetime 
    prepare_price = prepare_price_series.dropna()
    prepare_factor_df = prepare_factor_df.dropna()
    prepare_price_unstack_df = prepare_price.unstack().T
    del prepare_price_series
    prepare_factor_unstack_df = prepare_factor_df.unstack().T
    price_date_idx_list = prepare_price_unstack_df.index
    factor_date_idx_list = prepare_factor_unstack_df.index
    price_date_datetime_idx = [datetime.datetime.strptime(str(date), '%Y%m%d') for date in price_date_idx_list]
    factor_date_datetime_idx = [datetime.datetime.strptime(str(date), '%Y%m%d') for date in factor_date_idx_list]
    del price_date_idx_list, factor_date_idx_list
    prepare_price_unstack_df.index = price_date_datetime_idx
    prepare_factor_unstack_df.index = factor_date_datetime_idx
    return prepare_price_unstack_df, prepare_factor_unstack_df

In [None]:
def get_prepare_date_bar():
    a_date_bar_file_path = r'D:\QUANT_GAME\python_game\factor\factor_lab\get_date_bar\date_bar_post.pkl'
    a_date_bar = pd.read_pickle(a_date_bar_file_path)
    stks_list = get_stk_list(a_date_bar)
    fields = ['close',]
    prepare_price_data = get_prepare_data(a_date_bar,fields,stks_list,del_paused=False)
    return prepare_price_data


In [None]:
prepare_date_bar = get_prepare_date_bar()
prepare_date_bar.sort_index(inplace=True)
periods = (1,) 
start_date,end_date = 20200101,20240101
# check_start_date = start_date + 10000

In [None]:
def calc_and_analyze_factor(factor_class, prepare_date_bar,periods,start_date,end_date,zero_elevation_type=None):
    # factor
    if zero_elevation_type:
        factor_data_calc_res = factor_class.calc(zero_elevation_type)
    else:
        factor_data_calc_res = factor_class.calc()
    factor_data = factor_data_calc_res.loc[idx[:,start_date:end_date]]
    # price 
    prepare_price_series = prepare_date_bar['close']
    # get_prepare_data_of_analyze
    prepare_price,prepare_factor = get_prepare_data_of_analyze(prepare_price_series,factor_data)
    # 查看因子的分布图，来查看是否又异极值即分布不均匀
    prepare_factor.stack().hist(bins=200)
    # 因子分析
    factor_analyzer = fa.FactorAnalyzer(prices=prepare_price,factor=prepare_factor,quantiles=10,periods=periods,max_loss=0.5)
    # factor_analyzer.create_summary_tear_sheet()
    # factor_analyzer.create_full_tear_sheet()
    factor_analyzer.plot_information_table(method='normal')
    factor_analyzer.plot_ic_hist(method='normal')
    factor_analyzer.plot_ic_ts(method='normal')
    

### 1 price_speed_factor

In [None]:
price_speed_factor = factor_repo.PriceSpeed(['close'],prepare_date_bar)
calc_and_analyze_factor(price_speed_factor,prepare_date_bar,periods,start_date,end_date)

### 2 price_speed_change_factor

In [None]:
price_speed_change_factor = factor_repo.PriceSpeedChange(['close'],prepare_date_bar)
calc_and_analyze_factor(price_speed_change_factor,prepare_date_bar,periods,start_date,end_date)

### 3.force_factor

In [None]:
force_factor = factor_repo.Force(['close'],prepare_date_bar)
calc_and_analyze_factor(force_factor,prepare_date_bar,periods,start_date,end_date)

### 4.impluse_factor

In [None]:
impluse_factor = factor_repo.Impluse(['close'],prepare_date_bar)
calc_and_analyze_factor(impluse_factor,prepare_date_bar,periods,start_date,end_date)

### 5.max_min_price_average_factor

In [None]:
max_min_price_average_factor = factor_repo.MaxMinPriceAverage(['close'],prepare_date_bar)
calc_and_analyze_factor(max_min_price_average_factor,prepare_date_bar,periods,start_date,end_date)

### 6.periods_price_avergae_factor

In [None]:
period_price_average_factor = factor_repo.PeriodPriceAverage(['close'],prepare_date_bar)
calc_and_analyze_factor(period_price_average_factor,prepare_date_bar,periods,start_date,end_date)

### 7.Elevation_factor 

In [None]:
elevation_factor = factor_repo.Elevation(['close'],prepare_date_bar)
calc_and_analyze_factor(elevation_factor,prepare_date_bar,periods,start_date,end_date,zero_elevation_type='mean')

### 8.voltage_factor

In [None]:
voltage_factor = factor_repo.Voltage(['close'],prepare_date_bar)
calc_and_analyze_factor(voltage_factor,prepare_date_bar,periods,start_date,end_date)

### 9.current_factor

In [None]:
current_factor = factor_repo.Current(['close'],prepare_date_bar)
calc_and_analyze_factor(current_factor,prepare_date_bar,periods,start_date,end_date,zero_elevation_type='mean')

### 10.return_accumulation_factor

In [16]:
return_accumulation_factor = factor_repo.ReturnAccumulation(['close'],prepare_date_bar)
calc_and_analyze_factor(return_accumulation_factor,prepare_date_bar,periods,start_date,end_date)

KeyboardInterrupt: 