In [58]:
import pandas as pd
from openpyxl import Workbook

In [61]:
def cal(stock_code, end_date, ws,stock_data,beta_data):
    # print(stock_code)
    # print(stock_data)
    if end_date == '2018-06-30':
        end_date = '2018-06-29'
    elif end_date == '2018-12-31':
        end_date = '2018-12-28'
    else:
        return

    #找到对应交易日期的index，并根据回望周期倒推
    end_index = stock_data.index.get_loc(end_date)
    #print(end_index)
    start_date_1y = end_index-250
    start_date_1m = end_index-22
    start_date_3m = end_index-66
    start_date_6m = end_index-125

    stock_data_1y = stock_data[start_date_1y:end_index+1]
    #print(stock_data_1y)

    # 波动率因子
    weekly_prices = stock_data_1y.resample('1W').last().dropna()
    weekly_returns = weekly_prices.pct_change().dropna()
    volatility = weekly_returns.std() * (52 ** 0.5)
    #print(volatility)

    #动量因子
    end_date_val = stock_data[end_date]
    start_date_1m_value = stock_data[start_date_1m]
    start_date_3m_value = stock_data[start_date_3m]
    start_date_6m_value = stock_data[start_date_6m]

    mom_1m = (end_date_val-start_date_1m_value)/start_date_1m_value
    mom_3m = (end_date_val-start_date_3m_value)/start_date_3m_value
    mom_6m = (end_date_val-start_date_6m_value)/start_date_6m_value

    # print(mom_1m)
    # print(mom_3m)
    # print(mom_6m)

    #Beta因子
    stock_data_ret = stock_data_1y.pct_change().dropna()
    beta_end_index = beta_data.index.get_loc(end_date)
    beta_data_1y = beta_data[beta_end_index-250:beta_end_index+1]
    beta_hs300 = stock_data_ret.corr(beta_data_1y['沪深300'].pct_change().dropna())
    beta_zz500 = stock_data_ret.corr(beta_data_1y['中证500'].pct_change().dropna())

    #写入excel
    ws.append([stock_code,volatility,mom_1m,mom_3m,mom_6m,beta_hs300,beta_zz500])

    return 


In [62]:
def main():
    all_data = pd.read_hdf('eodprices.h5')
    beta_data = pd.read_excel('指数数据.xlsx')
    
    all_data['TRADE_DT'] = pd.to_datetime(all_data['TRADE_DT'], format='%Y%m%d')
    all_data.set_index('TRADE_DT', inplace=True)
    beta_data.set_index("日期",inplace=True)

    # 导入个股列表
    all_stocks = pd.read_excel("stock_list.xlsx")
    all_stock_lst = all_stocks["S_INFO_WINDCODE"].tolist()

    #将H5文件的数据按照个股代码分组
    all_stock_data = all_data.groupby('S_INFO_WINDCODE')

    #建立俩个20180630、20181231的excel文件
    wb = Workbook()

    ws1 = wb.active
    ws1.title = '2018-06-30'  
    ws1.append(['S_INFO_WINDCODE','volatility','mom_1m','mom_3m','mom_6m','beta_hs300','beta_zz500'])

    ws2 = wb.create_sheet(title='2018-12-31') 
    ws2.append(['S_INFO_WINDCODE','volatility','mom_1m','mom_3m','mom_6m','beta_hs300','beta_zz500'])

    # 对所有个股进行运算
    for i in all_stock_lst:
        try:
            # 在H5数据中寻找该个股的交易日数据并sort
            stock_data = all_stock_data.get_group(i)['S_DQ_ADJCLOSE'].sort_index()
            # 计算2018-06-30的数据
            cal(i, '2018-06-30', ws1,stock_data, beta_data)
            # 计算2018-12-31的数据
            cal(i, '2018-12-31', ws2,stock_data, beta_data)
        except KeyError as e:
            #如果出现该个股未上市，选择不报错，跳过该个股
            continue

    wb.save("sample_resceshi_final.xlsx")

if __name__ == "__main__":
    main()