In [1]:
import pandas as pd
import numpy as np
from datetime import date
from data_provider.nestlib.trading_cal import TradeCal
from data_provider.datafeed.quote_feed import QuoteFeed
from collections import OrderedDict
import matplotlib.pyplot as plt
from smartbeta.smartfactor import SmartFactor
from data_provider.datafeed.universe import Universe
from data_provider.nestlib.market_info import Frequency
import collections
from tqdm import tqdm
import multiprocessing
from multiprocessing import Pool
import datetime
from datetime import date
import pdb

import os
os.environ['NLS_LANG'] = 'SIMPLIFIED CHINESE_CHINA.UTF8'

import matplotlib.pyplot as plt
from pathos.multiprocessing import ProcessingPool as newPool

plt.rcParams['font.sans-serif']=['Microsoft YaHei'] #用来正常显示中文标签
plt.rcParams['axes.unicode_minus']=False #用来正常显示负号

tc_handle = TradeCal()
uni_handle = Universe()

# 剔除新股

In [2]:
def get_new_ipo_stocks(date):
    ipo_df = uni_handle.get_all_ipo_info()
    limit = tc_handle.shift_date(date, 30, direction='backward')
    new_ipo_stocks_df = ipo_df[ipo_df['ipo_date']>int(limit)]
    new_ipo_stocks_li = new_ipo_stocks_df['ticker'].tolist()
    return new_ipo_stocks_li

In [3]:
def get_number_of_all_stocks(date):
#     print('正在获取每日股票总数(除去新股)。。。。。。')
    all_stocks_li = uni_handle.get_a_share_by_date(date)
    all_stocks_li = list(set(all_stocks_li)-set(get_new_ipo_stocks(date)))
    return all_stocks_li

In [4]:
def fillna_axis(x):
    if x == '':
        x = '申万通信设备'
    elif x == '申万券商':
        x = '申万证券'  
    else:
        pass
    return x 

In [5]:
def get_everyday_bias_df(date):
    end_day = date
    begin_day = tc_handle.shift_date(end_day, 4,direction='backward')
    universe = get_number_of_all_stocks(date)

    # 拿所有个股行情

    qfq_quote = QuoteFeed(
        universe_ticker=universe,
        begin_day=begin_day,
        end_day=end_day,
        tracking_freq=86400,
        adjust_method="forward",
    )
    stock_quotes= qfq_quote.get_stock_quote().loc[:,['close','ticker', 'datetime_str','amount']]
    stock_quotes = stock_quotes.drop(stock_quotes[stock_quotes['amount']==0].index)
    stock_quotes['daily_ret'] = stock_quotes.groupby('ticker')['close'].pct_change()
    stock_quotes = stock_quotes.dropna()
    
    amount_bias_se = stock_quotes.groupby('ticker').apply(lambda x: (x['amount'].iloc[-1])/(x['amount'].mean()))
    amount_bias_df = pd.DataFrame({'ticker':amount_bias_se.index,'amount_bias':amount_bias_se.values})
    today_quote = stock_quotes[stock_quotes.datetime_str==date]
    all_quote = today_quote.merge(amount_bias_df,on='ticker')
    
    # 填充修改原数据中空白的行业名称
    industry_df = uni_handle.get_sw_industry(date)
    industry_df['swIndustryLv2'] = industry_df['swIndustryLv2'].map(fillna_axis)
      
    industry_df = industry_df.rename(columns={'securityId':"ticker"})
    merge_df = industry_df.merge(all_quote)
    merge_df = merge_df.sort_values(by='amount_bias',ascending=False)
    data_li = []
    gb_data = merge_df.groupby('swIndustryLv2')
    for industry,industry_df in gb_data:
        industry_df = industry_df.copy()
        amount_bias_mean = industry_df['amount_bias'].mean()
        daily_ret_mean = industry_df['daily_ret'].mean()
        data_li.append([industry,amount_bias_mean,daily_ret_mean,len(industry_df)])
    ret_df = pd.DataFrame(data_li,columns=['industry','amount_bias_mean','daily_ret_mean','stocks_number'])
    ret_df = ret_df.set_index('industry')
    
    number_se = ret_df['stocks_number']
    se_1 = ret_df.loc[:,'amount_bias_mean']
    se_1.index = se_1.index + '_amount_bias'

    se_2 = ret_df.loc[:,'daily_ret_mean']
    se_2.index = se_2.index + '_daily_ret'
    df_all = se_1.append(se_2)
    df_all  = df_all.to_frame()
    df_all.columns = [date]
    shared_df_list.append(df_all.T)

In [6]:
if __name__=='__main__':
    manager = multiprocessing.Manager()
    shared_df_list = manager.list()
    today = date.today().strftime('%Y%m%d')
#     yesterday = (today - datetime.timedelta(days=1)).strftime('%Y%m%d')
    past_day = tc_handle.shift_date(today,20,direction="backward")

    with Pool(10) as p:
        p.map(get_everyday_bias_df,tc_handle.get_trading_day_list(past_day,today))
    final_bias_df = pd.concat(shared_df_list)
    
    industry_df_today = uni_handle.get_sw_industry(today)
    industry_df_today['swIndustryLv2'] = industry_df_today['swIndustryLv2'].map(fillna_axis)
    industry_length_se = industry_df_today.groupby('swIndustryLv2').count()['securityId']

    fin_df = final_bias_df.T.sort_index(ascending=False)

    index_li =[]
    for index in fin_df.index:
        industry_label = index.split('_')[0]
        stocks_number = industry_length_se[industry_label]
        real_index = industry_label.replace('申万','')+'('+stocks_number.astype(str)+')_'+('_').join(index.split('_')[1:])
        index_li.append(real_index)
    fin_df.index = index_li
    
    fin_df_copy = fin_df.copy()

    fin_df_copy = fin_df_copy.T.sort_index(ascending=False)

    fin_df_copy = fin_df_copy.sort_values(by=fin_df_copy.index[0],axis=1,ascending=False)

    col_li = fin_df_copy.columns
    industry_rank_li = []
    for number in range(len(col_li)):
        if col_li[number].split('_')[-1] == 'bias':
            industry_rank_li.append(col_li[number].split('_')[0])
    final_ranked_col_name_li = []
    for industry in industry_rank_li:
        final_ranked_col_name_li.append(industry+'_amount_bias')
        final_ranked_col_name_li.append(industry+'_daily_ret')

    fin_df_copy = fin_df_copy[final_ranked_col_name_li]
    result = fin_df_copy.T.iloc[:10,:10]
    result.to_pickle('industry_amount_bias.pkl')

正在使用hdf5行情！
正在使用hdf5行情！
正在使用hdf5行情！
正在使用hdf5行情！
正在使用hdf5行情！
正在使用hdf5行情！
正在使用hdf5行情！
正在使用hdf5行情！
正在使用hdf5行情！
正在使用hdf5行情！
正在使用hdf5行情！
正在使用hdf5行情！
正在使用hdf5行情！
正在使用hdf5行情！
正在使用hdf5行情！
正在使用hdf5行情！
正在使用hdf5行情！
正在使用hdf5行情！
正在使用hdf5行情！
正在使用hdf5行情！
正在使用hdf5行情！


In [7]:
result

Unnamed: 0,20210512,20210511,20210510,20210507,20210506,20210430,20210429,20210428,20210427,20210426
农业综合(1)_amount_bias,1.673246,0.900593,1.059066,0.824072,1.025934,0.866463,0.758685,0.520369,0.578761,0.579762
农业综合(1)_daily_ret,0.018367,0.004098,-0.00611,-0.002033,0.008197,0.0,-0.010142,0.0,-0.027613,-0.019342
种植业(20)_amount_bias,1.420861,0.975947,0.980969,1.179287,1.495399,0.93507,0.820036,1.008602,0.988557,1.069971
种植业(20)_daily_ret,0.034846,0.001659,0.008826,-0.001052,0.039349,0.008876,-0.010794,-0.019076,-0.008309,-0.01152
农产品加工(20)_amount_bias,1.338776,1.112511,1.135785,1.20064,1.043897,0.86736,0.901034,0.922633,1.084635,1.26409
农产品加工(20)_daily_ret,0.035754,0.021907,0.001296,0.00616,0.012882,-0.001697,-0.011513,-0.001836,0.010803,-0.001633
汽车整车(22)_amount_bias,1.330718,1.172653,0.986946,0.960765,0.962297,1.061332,0.888383,0.863452,0.876588,0.928304
汽车整车(22)_daily_ret,0.041054,0.000163,0.025062,-0.011798,0.009718,-0.001098,-0.012302,0.014077,-0.022885,-0.005964
其他采掘(11)_amount_bias,1.306059,1.063773,1.205561,1.437305,1.493371,0.959171,0.964599,0.819738,0.969961,1.245482
其他采掘(11)_daily_ret,0.035487,-0.008216,0.06682,0.05108,0.062873,-0.016583,0.003933,0.001644,-0.014547,0.018165
