In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tushare as ts

with open('../../tushare_token.txt', 'r') as f:
    token = f.readline()

ts.set_token(token)
tushare_api = ts.pro_api()

%matplotlib inline
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

# 股票信息表（包含上市信息）

In [2]:
df_basic = pd.read_csv('../../data/financial_statements/stock_basic_sheet.csv')
df_basic .head()

Unnamed: 0,ts_code,symbol,name,area,industry,fullname,enname,market,exchange,curr_type,list_status,list_date,delist_date,is_hs
0,000001.SZ,1,平安银行,深圳,银行,平安银行股份有限公司,"Ping An Bank Co., Ltd.",主板,SZSE,CNY,L,19910403,,S
1,000002.SZ,2,万科A,深圳,全国地产,万科企业股份有限公司,"China Vanke Co.,Ltd.",主板,SZSE,CNY,L,19910129,,S
2,000004.SZ,4,国农科技,深圳,互联网,深圳中国农大科技股份有限公司,"Shenzhen Cau Technology Co.,Ltd.",主板,SZSE,CNY,L,19910114,,N
3,000005.SZ,5,世纪星源,深圳,环境保护,深圳世纪星源股份有限公司,Shenzhen Fountain Corporation,主板,SZSE,CNY,L,19901210,,N
4,000006.SZ,6,深振业A,深圳,区域地产,深圳市振业(集团)股份有限公司,"Shenzhen Zhenye(Group) Co.,Ltd.",主板,SZSE,CNY,L,19920427,,S


# 最新停牌状态表

In [3]:
# 股票列表
df_suspended = tushare_api.stock_basic(
    exchange='',
    list_status='P',
    fields='ts_code, symbol, name, area, industry, fullname, enname,'
    'market, exchange, curr_type, list_status, list_date,'
    'delist_date, is_hs')
df_suspended.head()

Unnamed: 0,ts_code,symbol,name,area,industry,fullname,enname,market,exchange,curr_type,list_status,list_date,delist_date,is_hs
0,000670.SZ,670,*ST盈方,湖北,元器件,盈方微电子股份有限公司,"Infotmic Co.,Ltd.",主板,SZSE,CNY,P,19961217,,N
1,000760.SZ,760,*ST斯太,湖北,汽车配件,斯太尔动力股份有限公司,Steyr Motors Corp.,主板,SZSE,CNY,P,19970627,,N
2,000792.SZ,792,*ST盐湖,青海,化工原料,青海盐湖工业股份有限公司,"Qinghai Salt Lake Industry Co.,Ltd.",主板,SZSE,CNY,P,19970904,,N
3,000939.SZ,939,*ST凯迪,湖北,新型电力,凯迪生态环境科技股份有限公司,Kaidi Ecological and Environmental Technology ...,主板,SZSE,CNY,P,19990923,,N
4,000995.SZ,995,*ST皇台,甘肃,白酒,甘肃皇台酒业股份有限公司,"Gansu Huangtai Wine-Marketing Industry Co.,Ltd.",主板,SZSE,CNY,P,20000807,,N


In [4]:
df_suspended.to_csv('../../data/financial_statements/suspended_sheet.csv', index=False)

# 上市日期表

In [5]:
df_list_date = df_basic[['ts_code', 'symbol', 'name', 'list_date']]
df_list_date.head()

Unnamed: 0,ts_code,symbol,name,list_date
0,000001.SZ,1,平安银行,19910403
1,000002.SZ,2,万科A,19910129
2,000004.SZ,4,国农科技,19910114
3,000005.SZ,5,世纪星源,19901210
4,000006.SZ,6,深振业A,19920427


In [6]:
df_list_date.to_csv('../../data/financial_statements/list_date_sheet.csv', index=False)

# 利润表

In [7]:
df_income = pd.read_csv('../../data/financial_statements/income_sheet.csv')
df_income .head()

Unnamed: 0,code,ts_code,ann_date,f_ann_date,end_date,report_type,comp_type,basic_eps,diluted_eps,total_revenue,revenue,int_income,prem_earned,comm_income,n_commis_income,n_oth_income,n_oth_b_income,prem_income,out_prem,une_prem_reser,reins_income,n_sec_tb_income,n_sec_uw_income,n_asset_mg_income,oth_b_income,fv_value_chg_gain,invest_income,ass_invest_income,forex_gain,total_cogs,oper_cost,int_exp,comm_exp,biz_tax_surchg,sell_exp,admin_exp,fin_exp,assets_impair_loss,prem_refund,compens_payout,reser_insur_liab,div_payt,reins_exp,oper_exp,compens_payout_refu,insur_reser_refu,reins_cost_refund,other_bus_cost,operate_profit,non_oper_income,non_oper_exp,nca_disploss,total_profit,income_tax,n_income,n_income_attr_p,minority_gain,oth_compr_income,t_compr_income,compr_inc_attr_p,compr_inc_attr_m_s,ebit,ebitda,insurance_exp,undist_profit,distable_profit,update_flag
0,1,000001.SZ,19970828.0,19970828.0,19970630,1,2.0,,,1314008000.0,1314008000.0,,,10243958.09,,,,,,,,,,,,,171747400.0,,10961663.44,,,,5213519.46,84795770.0,208876400.0,6839735.0,2243028.36,0.0,,,,,,,,,,,541242500.0,1295892.19,6425328.62,,524024100.0,80377680.0,443646400.0,443738100.0,-91666.89,,,,,1008283000.0,1008283000.0,,,,0
1,1,000001.SZ,19980312.0,19980312.0,19971231,1,2.0,,,1869233000.0,1869233000.0,2657035000.0,,39610404.48,27230402.38,219446600.0,-142315500.0,,,,,,,,31046570.0,,339348400.0,,22413734.45,907148000.0,,1207841000.0,12380002.1,185402900.0,533808600.0,548383000.0,13267320.76,,,,,,,907148000.0,,,,173362100.0,962084700.0,8005756.21,9501351.41,,1002425000.0,140197300.0,862227900.0,862316100.0,-88247.73,,,,,-618583000.0,-618583000.0,,,,0
2,1,000001.SZ,19980826.0,19980826.0,19980630,1,2.0,,,1294420000.0,1294420000.0,,,13854435.68,,,,,,,,,,,,,70725410.0,,7507331.38,,,,4713625.7,85807170.0,238638000.0,6079972.0,177251.49,0.0,,,,,,,,,,,435265300.0,2325747.9,1720196.9,,435870800.0,57965920.0,378013100.0,378013100.0,,,,,,959181700.0,1016280000.0,,,,0
3,1,000001.SZ,19990423.0,19990423.0,19981231,1,2.0,,,1963738000.0,1963738000.0,2532461000.0,,30262407.24,18745419.34,72328960.0,-133297900.0,,,,,,,,208493700.0,,184826900.0,,20799898.59,1113156000.0,,1001588000.0,11516987.9,191582800.0,561085800.0,574281200.0,4098638.66,5500000.0,,,,,,1113156000.0,,,,341791600.0,850582900.0,14599067.7,8145256.62,,857036700.0,92796600.0,764240100.0,764339200.0,-99076.4,,,,,-376316600.0,-221003500.0,,476794360.4,1241063000.0,0
4,1,000001.SZ,19990717.0,19990717.0,19990630,1,2.0,,,1134698000.0,1134698000.0,,,7346929.2,,,,,,,,,,,,,108852600.0,,328567.25,,,,3241363.34,84020250.0,239035900.0,4796524.0,717579.34,0.0,,,,,,,,,,,278978000.0,1874095.03,4378753.18,,276473300.0,35554730.0,240918600.0,240918600.0,,,,,,803604200.0,865224400.0,,,,0


In [8]:
df_income_null = df_income.groupby('comp_type').apply(lambda x: x.isnull().all())
df_income_null

Unnamed: 0_level_0,code,ts_code,ann_date,f_ann_date,end_date,report_type,comp_type,basic_eps,diluted_eps,total_revenue,revenue,int_income,prem_earned,comm_income,n_commis_income,n_oth_income,n_oth_b_income,prem_income,out_prem,une_prem_reser,reins_income,n_sec_tb_income,n_sec_uw_income,n_asset_mg_income,oth_b_income,fv_value_chg_gain,invest_income,ass_invest_income,forex_gain,total_cogs,oper_cost,int_exp,comm_exp,biz_tax_surchg,sell_exp,admin_exp,fin_exp,assets_impair_loss,prem_refund,compens_payout,reser_insur_liab,div_payt,reins_exp,oper_exp,compens_payout_refu,insur_reser_refu,reins_cost_refund,other_bus_cost,operate_profit,non_oper_income,non_oper_exp,nca_disploss,total_profit,income_tax,n_income,n_income_attr_p,minority_gain,oth_compr_income,t_compr_income,compr_inc_attr_p,compr_inc_attr_m_s,ebit,ebitda,insurance_exp,undist_profit,distable_profit,update_flag
comp_type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1
1.0,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,True,False,True,True,True,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False
2.0,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,True,True,True,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,True,True,True,True,False,True,True,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
3.0,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,True,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False
4.0,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,True,True,True,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,True,False,True,False,False,True,True,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False


In [9]:
df_income_null.to_csv('../../data/null_features/df_income_null_features.csv')

In [10]:
df_income_null_list = pd.DataFrame(columns=('comp_type', 'null_features'))
for i_comp, r in df_income_null.iterrows():
    null_list = []
    for k in r.keys():
        if r[k]:
            null_list.append(k)
    df_income_null_list = df_income_null_list.append(
          pd.DataFrame({'comp_type': [i_comp], 'null_features': [null_list]}), ignore_index=True)

df_income_null_list

Unnamed: 0,comp_type,null_features
0,1.0,"[prem_income, out_prem, reins_income, n_sec_tb..."
1,2.0,"[prem_earned, out_prem, une_prem_reser, reins_..."
2,3.0,"[n_sec_tb_income, n_sec_uw_income, n_asset_mg_..."
3,4.0,"[prem_earned, prem_income, out_prem, une_prem_..."


In [11]:
df_income_null_list.to_csv('../../data/null_features/income_null_features_list.csv', index=False)

# 资产负债表

In [12]:
df_balance = pd.read_csv('../../data/financial_statements/balance_sheet.csv')
df_balance.head()

Unnamed: 0,code,ts_code,ann_date,f_ann_date,end_date,report_type,comp_type,total_share,cap_rese,undistr_porfit,surplus_rese,special_rese,money_cap,trad_asset,notes_receiv,accounts_receiv,oth_receiv,prepayment,div_receiv,int_receiv,inventories,amor_exp,nca_within_1y,sett_rsrv,loanto_oth_bank_fi,premium_receiv,reinsur_receiv,reinsur_res_receiv,pur_resale_fa,oth_cur_assets,total_cur_assets,fa_avail_for_sale,htm_invest,lt_eqt_invest,invest_real_estate,time_deposits,oth_assets,lt_rec,fix_assets,cip,const_materials,fixed_assets_disp,produc_bio_assets,oil_and_gas_assets,intan_assets,r_and_d,goodwill,lt_amor_exp,defer_tax_assets,decr_in_disbur,oth_nca,total_nca,cash_reser_cb,depos_in_oth_bfi,prec_metals,deriv_assets,rr_reins_une_prem,rr_reins_outstd_cla,rr_reins_lins_liab,rr_reins_lthins_liab,refund_depos,ph_pledge_loans,refund_cap_depos,indep_acct_assets,client_depos,client_prov,transac_seat_fee,invest_as_receiv,total_assets,lt_borr,st_borr,cb_borr,depos_ib_deposits,loan_oth_bank,trading_fl,notes_payable,acct_payable,adv_receipts,sold_for_repur_fa,comm_payable,payroll_payable,taxes_payable,int_payable,div_payable,oth_payable,acc_exp,deferred_inc,st_bonds_payable,payable_to_reinsurer,rsrv_insur_cont,acting_trading_sec,acting_uw_sec,non_cur_liab_due_1y,oth_cur_liab,total_cur_liab,bond_payable,lt_payable,specific_payables,estimated_liab,defer_tax_liab,defer_inc_non_cur_liab,oth_ncl,total_ncl,depos_oth_bfi,deriv_liab,depos,agency_bus_liab,oth_liab,prem_receiv_adva,depos_received,ph_invest,reser_une_prem,reser_outstd_claims,reser_lins_liab,reser_lthins_liab,indept_acc_liab,pledge_borr,indem_payable,policy_div_payable,total_liab,treasury_share,ordin_risk_reser,forex_differ,invest_loss_unconf,minority_int,total_hldr_eqy_exc_min_int,total_hldr_eqy_inc_min_int,total_liab_hldr_eqy,lt_payroll_payable,oth_comp_income,oth_eqt_tools,oth_eqt_tools_p_shr,lending_funds,acc_receivable,st_fin_payable,payables,hfs_assets,hfs_sales,update_flag
0,1,000001.SZ,19940830.0,19940830.0,19940630,1,2.0,269435600.0,497723500.0,502091500.0,243662900.0,,139552700.0,,,39538150.0,734983600.0,,,,,,,,,,,,,,10666230000.0,,,,,,,,,,,-241835.0,,,,,,,,,,272147300.0,,,,,,,,,,,,,,,,,12465950000.0,,,,,,,,119344600.0,,,,5890948.81,50381130.0,,,103842100.0,,,,,,,,,,9325414000.0,,,,,,,,1627626000.0,,,,,,,,,,,,,,,,,10953040000.0,,,,,,1512913000.0,1512913000.0,12465950000.0,,,,,,,,,,,0
1,1,000001.SZ,19950310.0,19950310.0,19941231,1,2.0,431068600.0,549976900.0,375219500.0,299343100.0,,238290600.0,19383436.23,,59513640.0,185312900.0,250000.0,,,481662900.0,6812085.36,,,,,,,,,13106120000.0,,,,,,,,,,,,,,,,,,,,,1209959000.0,,,,,,,,,,,,,,,,,15488410000.0,,18150000.0,,,,,,172355300.0,,,,39155973.28,93159880.0,,433.95,47297390.0,3731340.21,,,,,,,,180864.13,11623490000.0,,,,,,,,2205090000.0,,,,,,,,,,,,,,,,,13828580000.0,,,,,4224750.74,1655608000.0,1659833000.0,15488410000.0,,,,,,,,,,,0
2,1,000001.SZ,19950811.0,19950811.0,19950630,1,2.0,431068600.0,549976900.0,588042100.0,295013900.0,,325499000.0,14663436.23,,169851600.0,,310000.0,,,493984200.0,2943435.45,,,,,,,,,14420120000.0,,,,,,,,,,,3396.37,,,,,,,,,,1385428000.0,,,,,,,,,,,,,,,,,17439930000.0,,19350000.0,,,,,,204047400.0,35148248.67,,,15686583.89,67711560.0,,,88879810.0,2142000.0,,,,,,,,180864.13,12512340000.0,,,,,,,,3059269000.0,,,,,,,,,,,,,,,,,15571600000.0,,,,,4224750.74,1864102000.0,1868326000.0,17439930000.0,,,,,,,,,,,0
3,1,000001.SZ,19960209.0,19960209.0,19951231,1,2.0,775923500.0,549976900.0,153288400.0,476272100.0,,386261200.0,11044717.46,,121160000.0,188086000.0,150000.0,,,478051800.0,5946364.83,,,,,,,,,17225140000.0,,,,,,,,,,,,,,,,,,,,,1762864000.0,,,,,,,,,,,,,,,,,20312480000.0,,19850000.0,,,,,,287786500.0,12221086.74,,,50388548.96,77944450.0,,,100685700.0,,,,,,,,,,14087180000.0,,,,,,,,4264760000.0,,,,,,,,,,,,,,,,,18351940000.0,,,,,5077922.55,1955461000.0,1960539000.0,20312480000.0,,,,,,,,,,,0
4,1,000001.SZ,19960829.0,19960829.0,19960630,1,2.0,1034565000.0,291335800.0,517579600.0,476440200.0,,441466300.0,9542466.57,,185837100.0,341844600.0,172996.0,,,482552100.0,6248857.05,,,,,,,,,19368310000.0,,,,,,,,,,,,,,,,,,,,,2124532000.0,,,,,,,,,,,,,,,,,24296770000.0,,21550000.0,,,,,,416883900.0,10744332.18,,,73223148.25,134376900.0,,,152363800.0,216314.9,,,,,,,,,16424400000.0,,,,,,,,5547476000.0,,,,,,,,,,,,,,,,,21971870000.0,,,,,4974517.86,2319920000.0,2324895000.0,24296770000.0,,,,,,,,,,,0


In [13]:
df_balance_null = df_balance.groupby('comp_type').apply(lambda x: x.isnull().all())
df_balance_null

Unnamed: 0_level_0,code,ts_code,ann_date,f_ann_date,end_date,report_type,comp_type,total_share,cap_rese,undistr_porfit,surplus_rese,special_rese,money_cap,trad_asset,notes_receiv,accounts_receiv,oth_receiv,prepayment,div_receiv,int_receiv,inventories,amor_exp,nca_within_1y,sett_rsrv,loanto_oth_bank_fi,premium_receiv,reinsur_receiv,reinsur_res_receiv,pur_resale_fa,oth_cur_assets,total_cur_assets,fa_avail_for_sale,htm_invest,lt_eqt_invest,invest_real_estate,time_deposits,oth_assets,lt_rec,fix_assets,cip,const_materials,fixed_assets_disp,produc_bio_assets,oil_and_gas_assets,intan_assets,r_and_d,goodwill,lt_amor_exp,defer_tax_assets,decr_in_disbur,oth_nca,total_nca,cash_reser_cb,depos_in_oth_bfi,prec_metals,deriv_assets,rr_reins_une_prem,rr_reins_outstd_cla,rr_reins_lins_liab,rr_reins_lthins_liab,refund_depos,ph_pledge_loans,refund_cap_depos,indep_acct_assets,client_depos,client_prov,transac_seat_fee,invest_as_receiv,total_assets,lt_borr,st_borr,cb_borr,depos_ib_deposits,loan_oth_bank,trading_fl,notes_payable,acct_payable,adv_receipts,sold_for_repur_fa,comm_payable,payroll_payable,taxes_payable,int_payable,div_payable,oth_payable,acc_exp,deferred_inc,st_bonds_payable,payable_to_reinsurer,rsrv_insur_cont,acting_trading_sec,acting_uw_sec,non_cur_liab_due_1y,oth_cur_liab,total_cur_liab,bond_payable,lt_payable,specific_payables,estimated_liab,defer_tax_liab,defer_inc_non_cur_liab,oth_ncl,total_ncl,depos_oth_bfi,deriv_liab,depos,agency_bus_liab,oth_liab,prem_receiv_adva,depos_received,ph_invest,reser_une_prem,reser_outstd_claims,reser_lins_liab,reser_lthins_liab,indept_acc_liab,pledge_borr,indem_payable,policy_div_payable,total_liab,treasury_share,ordin_risk_reser,forex_differ,invest_loss_unconf,minority_int,total_hldr_eqy_exc_min_int,total_hldr_eqy_inc_min_int,total_liab_hldr_eqy,lt_payroll_payable,oth_comp_income,oth_eqt_tools,oth_eqt_tools_p_shr,lending_funds,acc_receivable,st_fin_payable,payables,hfs_assets,hfs_sales,update_flag
comp_type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1,Unnamed: 102_level_1,Unnamed: 103_level_1,Unnamed: 104_level_1,Unnamed: 105_level_1,Unnamed: 106_level_1,Unnamed: 107_level_1,Unnamed: 108_level_1,Unnamed: 109_level_1,Unnamed: 110_level_1,Unnamed: 111_level_1,Unnamed: 112_level_1,Unnamed: 113_level_1,Unnamed: 114_level_1,Unnamed: 115_level_1,Unnamed: 116_level_1,Unnamed: 117_level_1,Unnamed: 118_level_1,Unnamed: 119_level_1,Unnamed: 120_level_1,Unnamed: 121_level_1,Unnamed: 122_level_1,Unnamed: 123_level_1,Unnamed: 124_level_1,Unnamed: 125_level_1,Unnamed: 126_level_1,Unnamed: 127_level_1,Unnamed: 128_level_1,Unnamed: 129_level_1,Unnamed: 130_level_1,Unnamed: 131_level_1,Unnamed: 132_level_1,Unnamed: 133_level_1,Unnamed: 134_level_1,Unnamed: 135_level_1,Unnamed: 136_level_1,Unnamed: 137_level_1,Unnamed: 138_level_1,Unnamed: 139_level_1
1.0,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,True,True,True,True,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,True,True,True,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
2.0,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,True,True,True,False,False,False,False,False,False,False,False,False,False,False,False,True,False,True,True,False,True,False,False,False,False,False,False,False,False,False,False,True,True,True,True,False,True,True,True,False,False,True,False,False,False,False,False,True,False,False,True,False,False,False,True,False,False,False,False,False,False,False,True,True,True,False,True,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,True,True,True,True,True,True,True,True,True,True,True,False,False,False,False,True,False,False,False,False,False,False,False,False,True,False,True,False,False,True,False
3.0,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,True,True,False,True,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,True,True,True,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,True,False,False,False,False,True,False,False,True,True,False,True,False,False,True,False
4.0,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,True,False,False,False,False,False,False,False,True,False,False,False,False,True,False,True,True,False,False,False,False,False,True,False,False,False,True,False,False,True,True,True,True,False,True,True,True,False,False,False,False,False,False,False,True,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,True,False,False,False,False,True,True,True,True,True,True,True,True,False,True,True,False,False,False,False,True,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False


In [14]:
df_balance_null.to_csv('../../data/null_features/df_balance_null_features.csv')

In [15]:
df_balance_null_list = pd.DataFrame(columns=('comp_type', 'null_features'))
for i_comp, r in df_balance_null.iterrows():
    null_list = []
    for k in r.keys():
        if r[k]:
            null_list.append(k)
    df_balance_null_list = df_balance_null_list.append(
          pd.DataFrame({'comp_type': [i_comp], 'null_features': [null_list]}), ignore_index=True)
df_balance_null_list

Unnamed: 0,comp_type,null_features
0,1.0,"[prec_metals, rr_reins_une_prem, rr_reins_outs..."
1,2.0,"[special_rese, premium_receiv, reinsur_receiv,..."
2,3.0,"[const_materials, produc_bio_assets, oil_and_g..."
3,4.0,"[special_rese, nca_within_1y, reinsur_res_rece..."


In [16]:
df_balance_null_list.to_csv('../../data/null_features/balance_null_features_list.csv', index=False)

# 现金流量表

In [17]:
df_cashflow = pd.read_csv('../../data/financial_statements/cashflow_sheet.csv')
df_cashflow.head()

Unnamed: 0,code,ts_code,ann_date,f_ann_date,end_date,comp_type,report_type,net_profit,finan_exp,c_fr_sale_sg,recp_tax_rends,n_depos_incr_fi,n_incr_loans_cb,n_inc_borr_oth_fi,prem_fr_orig_contr,n_incr_insured_dep,n_reinsur_prem,n_incr_disp_tfa,ifc_cash_incr,n_incr_disp_faas,n_incr_loans_oth_bank,n_cap_incr_repur,c_fr_oth_operate_a,c_inf_fr_operate_a,c_paid_goods_s,c_paid_to_for_empl,c_paid_for_taxes,n_incr_clt_loan_adv,n_incr_dep_cbob,c_pay_claims_orig_inco,pay_handling_chrg,pay_comm_insur_plcy,oth_cash_pay_oper_act,st_cash_out_act,n_cashflow_act,oth_recp_ral_inv_act,c_disp_withdrwl_invest,c_recp_return_invest,n_recp_disp_fiolta,n_recp_disp_sobu,stot_inflows_inv_act,c_pay_acq_const_fiolta,c_paid_invest,n_disp_subs_oth_biz,oth_pay_ral_inv_act,n_incr_pledge_loan,stot_out_inv_act,n_cashflow_inv_act,c_recp_borrow,proc_issue_bonds,oth_cash_recp_ral_fnc_act,stot_cash_in_fnc_act,free_cashflow,c_prepay_amt_borr,c_pay_dist_dpcp_int_exp,incl_dvd_profit_paid_sc_ms,oth_cashpay_ral_fnc_act,stot_cashout_fnc_act,n_cash_flows_fnc_act,eff_fx_flu_cash,n_incr_cash_cash_equ,c_cash_equ_beg_period,c_cash_equ_end_period,c_recp_cap_contrib,incl_cash_rec_saims,uncon_invest_loss,prov_depr_assets,depr_fa_coga_dpba,amort_intang_assets,lt_amort_deferred_exp,decr_deferred_exp,incr_acc_exp,loss_disp_fiolta,loss_scr_fa,loss_fv_chg,invest_loss,decr_def_inc_tax_assets,incr_def_inc_tax_liab,decr_inventories,decr_oper_payable,incr_oper_payable,others,im_net_cashflow_oper_act,conv_debt_into_cap,conv_copbonds_due_within_1y,fa_fnc_leases,end_bal_cash,beg_bal_cash,end_bal_cash_equ,beg_bal_cash_equ,im_n_incr_cash_equ,update_flag
0,1,000001.SZ,20061026.0,20061026.0,20060930,2,1,890295600.0,,,,21286910000.0,,18100000.0,,,,,7505273000.0,,,,1961730000.0,30843820000.0,,872332900.0,,19951480000.0,468502400.0,,3619684000.0,,1785413000.0,24590290000.0,6253525000.0,,,,304908947.0,,1589356000.0,63568691.0,,,-150647074.0,,9950768000.0,-8361412000.0,,,,,,,,,,,,-26724697.0,-2134612000.0,,,,,,993140436.0,220597733.0,15167126.0,13239923.0,-17840462.0,32942923.0,-56611288.0,,-2694666.0,-690617600.0,,,,-17477510000.0,22774260000.0,,6166820000.0,,,,966632100.0,787992400.0,16346660000.0,18659910000.0,-2134612000.0,0
1,1,000001.SZ,20070322.0,20070322.0,20061231,2,1,1302907000.0,,,,36990540000.0,,,,,,,10347630000.0,,,,1009763000.0,49486560000.0,,1230566000.0,,,2675420000.0,,4947326000.0,,1790982000.0,42076170000.0,7410385000.0,,,,363958757.0,,51122290000.0,249983343.0,,,,,59971830000.0,-8849543000.0,,,,,,,,,,1227269.0,-1227269.0,,-1440385000.0,19447900000.0,18007520000.0,,,,796414700.0,281505445.0,33567623.0,25047600.0,,,-59927542.0,,4928960.0,-990846089.0,-1754653.0,-14787193.0,,-30537740000.0,36296130000.0,2900107.0,7138344000.0,,,,13562690000.0,12216440000.0,4444825000.0,7231455000.0,-1440385000.0,0
2,1,000001.SZ,20070426.0,20070426.0,20070331,2,1,,,,,20174910000.0,,-3736559000.0,,,,,3146183000.0,,,,2051423000.0,28463520000.0,,609183400.0,344862500.0,,3638747000.0,,1302907000.0,,910002100.0,24744160000.0,3719364000.0,,,,67299343.0,,26756640000.0,746270777.0,,,,,30429150000.0,-3672515000.0,,,,,,,,,,,,,46848460.0,18007520000.0,18054360000.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0
3,1,000001.SZ,20070816.0,20070816.0,20070630,2,1,1123983000.0,,,,41624610000.0,86410000.0,2175380000.0,,,,,7003993000.0,,,,6632956000.0,57523350000.0,,922537000.0,916784000.0,25957190000.0,6330303000.0,,2802006000.0,,6707986000.0,44352640000.0,13170710000.0,16887000.0,56600450000.0,546532000.0,,,57163870000.0,97045000.0,59640010000.0,,,,59737060000.0,-2573190000.0,,,,,,,12641000.0,,,12641000.0,-12641000.0,,10584880000.0,22133230000.0,32718110000.0,,,,997486000.0,102701000.0,6146000.0,37285000.0,,,294000.0,,-12993000.0,-605002000.0,124332000.0,-55424000.0,,-39922600000.0,51360280000.0,,13170710000.0,,,,843813000.0,909080000.0,31874290000.0,21224150000.0,10584880000.0,0
4,1,000001.SZ,20071023.0,20071023.0,20070930,2,1,,,,,64518630000.0,,-687773000.0,,,,,11369470000.0,,,,608187000.0,85063360000.0,,1292840000.0,1423209000.0,32969110000.0,9984175000.0,,4811096000.0,,1206172000.0,64479120000.0,20584240000.0,,874320000.0,72225830000.0,262909000.0,,73363060000.0,636772000.0,82205470000.0,,,,82842240000.0,-9479182000.0,,,,,,,12666000.0,,,12666000.0,-12666000.0,,11092390000.0,22133230000.0,33225620000.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0


In [18]:
df_cashflow_null = df_cashflow.groupby('comp_type').apply(lambda x: x.isnull().all())
df_cashflow_null

Unnamed: 0_level_0,code,ts_code,ann_date,f_ann_date,end_date,comp_type,report_type,net_profit,finan_exp,c_fr_sale_sg,recp_tax_rends,n_depos_incr_fi,n_incr_loans_cb,n_inc_borr_oth_fi,prem_fr_orig_contr,n_incr_insured_dep,n_reinsur_prem,n_incr_disp_tfa,ifc_cash_incr,n_incr_disp_faas,n_incr_loans_oth_bank,n_cap_incr_repur,c_fr_oth_operate_a,c_inf_fr_operate_a,c_paid_goods_s,c_paid_to_for_empl,c_paid_for_taxes,n_incr_clt_loan_adv,n_incr_dep_cbob,c_pay_claims_orig_inco,pay_handling_chrg,pay_comm_insur_plcy,oth_cash_pay_oper_act,st_cash_out_act,n_cashflow_act,oth_recp_ral_inv_act,c_disp_withdrwl_invest,c_recp_return_invest,n_recp_disp_fiolta,n_recp_disp_sobu,stot_inflows_inv_act,c_pay_acq_const_fiolta,c_paid_invest,n_disp_subs_oth_biz,oth_pay_ral_inv_act,n_incr_pledge_loan,stot_out_inv_act,n_cashflow_inv_act,c_recp_borrow,proc_issue_bonds,oth_cash_recp_ral_fnc_act,stot_cash_in_fnc_act,free_cashflow,c_prepay_amt_borr,c_pay_dist_dpcp_int_exp,incl_dvd_profit_paid_sc_ms,oth_cashpay_ral_fnc_act,stot_cashout_fnc_act,n_cash_flows_fnc_act,eff_fx_flu_cash,n_incr_cash_cash_equ,c_cash_equ_beg_period,c_cash_equ_end_period,c_recp_cap_contrib,incl_cash_rec_saims,uncon_invest_loss,prov_depr_assets,depr_fa_coga_dpba,amort_intang_assets,lt_amort_deferred_exp,decr_deferred_exp,incr_acc_exp,loss_disp_fiolta,loss_scr_fa,loss_fv_chg,invest_loss,decr_def_inc_tax_assets,incr_def_inc_tax_liab,decr_inventories,decr_oper_payable,incr_oper_payable,others,im_net_cashflow_oper_act,conv_debt_into_cap,conv_copbonds_due_within_1y,fa_fnc_leases,end_bal_cash,beg_bal_cash,end_bal_cash_equ,beg_bal_cash_equ,im_n_incr_cash_equ,update_flag
comp_type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1
1,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
2,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False
3,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,True,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,True,True,False,False,False,False,False,False
4,False,False,False,False,False,False,False,False,False,False,False,True,True,False,True,True,True,False,False,False,False,False,False,False,False,False,False,False,True,True,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False


In [19]:
df_cashflow_null.to_csv('../../data/null_features/df_cashflow_null_features.csv')

In [20]:
df_cashflow_null_list = pd.DataFrame(columns=('comp_type', 'null_features'))
for i_comp, r in df_cashflow_null.iterrows():
    null_list = []
    for k in r.keys():
        if r[k]:
            null_list.append(k)
    df_cashflow_null_list = df_cashflow_null_list.append(
          pd.DataFrame({'comp_type': [i_comp], 'null_features': [null_list]}), ignore_index=True)
df_cashflow_null_list

Unnamed: 0,comp_type,null_features
0,1,[]
1,2,"[n_incr_disp_faas, uncon_invest_loss, conv_cop..."
2,3,"[n_incr_disp_faas, n_incr_loans_oth_bank, n_ca..."
3,4,"[n_depos_incr_fi, n_incr_loans_cb, prem_fr_ori..."


In [21]:
df_cashflow_null_list.to_csv('../../data/null_features/cashflow_null_features_list.csv', index=False)

# 概念股分类表

In [22]:
df_concept = pd.read_csv('../../data/financial_statements/concept_sheet.csv')
df_concept.head()

Unnamed: 0,code,name,src
0,TS0,密集调研,ts
1,TS1,南北船合并,ts
2,TS2,5G,ts
3,TS3,机场,ts
4,TS4,高价股,ts


# 概念股明细表-按概念排列

In [23]:
df_concept_c = pd.read_csv('../../data/financial_statements/concept_details_sheet_by_concept.csv')
df_concept_c.head()

Unnamed: 0,code,id,concept_name,ts_code,name,in_date,out_date
0,301,TS0,密集调研,000301.SZ,东方盛虹,,
1,401,TS0,密集调研,000401.SZ,冀东水泥,,
2,932,TS0,密集调研,000932.SZ,华菱钢铁,,
3,2013,TS0,密集调研,002013.SZ,中航机电,,
4,2106,TS0,密集调研,002106.SZ,莱宝高科,,


# 概念股明细表-按股票排列

In [24]:
df_concept_s = pd.read_csv('../../data/financial_statements/concept_details_sheet_by_stocks.csv.csv')
df_concept_s.head()

Unnamed: 0,code,id,concept_name,ts_code,name,in_date,out_date
0,1,TS11,银行,000001.SZ,平安银行,,
1,1,TS25,MSCI,000001.SZ,平安银行,,
2,2,TS13,安邦系,000002.SZ,万科A,,
3,2,TS22,白马股,000002.SZ,万科A,,
4,2,TS25,MSCI,000002.SZ,万科A,,


# 财务指标表

In [25]:
df_indicator = pd.read_csv('../../data/financial_statements/financial_indicator_sheet.csv')
df_indicator.head()

Unnamed: 0,code,ts_code,ann_date,end_date,eps,dt_eps,total_revenue_ps,revenue_ps,capital_rese_ps,surplus_rese_ps,undist_profit_ps,extra_item,profit_dedt,gross_margin,current_ratio,quick_ratio,cash_ratio,invturn_days,arturn_days,inv_turn,ar_turn,ca_turn,fa_turn,assets_turn,op_income,valuechange_income,interst_income,daa,ebit,ebitda,fcff,fcfe,current_exint,noncurrent_exint,interestdebt,netdebt,tangible_asset,working_capital,networking_capital,invest_capital,retained_earnings,diluted2_eps,bps,ocfps,retainedps,cfps,ebit_ps,fcff_ps,fcfe_ps,netprofit_margin,grossprofit_margin,cogs_of_sales,expense_of_sales,profit_to_gr,saleexp_to_gr,adminexp_of_gr,finaexp_of_gr,impai_ttm,gc_of_gr,op_of_gr,ebit_of_gr,roe,roe_waa,roe_dt,roa,npta,roic,roe_yearly,roa2_yearly,roe_avg,opincome_of_ebt,investincome_of_ebt,n_op_profit_of_ebt,tax_to_ebt,dtprofit_to_profit,salescash_to_or,ocf_to_or,ocf_to_opincome,capitalized_to_da,debt_to_assets,assets_to_eqt,dp_assets_to_eqt,ca_to_assets,nca_to_assets,tbassets_to_totalassets,int_to_talcap,eqt_to_talcapital,currentdebt_to_debt,longdeb_to_debt,ocf_to_shortdebt,debt_to_eqt,eqt_to_debt,eqt_to_interestdebt,tangibleasset_to_debt,tangasset_to_intdebt,tangibleasset_to_netdebt,ocf_to_debt,ocf_to_interestdebt,ocf_to_netdebt,ebit_to_interest,longdebt_to_workingcapital,ebitda_to_debt,turn_days,roa_yearly,roa_dp,fixed_assets,profit_prefin_exp,non_op_profit,op_to_ebt,nop_to_ebt,ocf_to_profit,cash_to_liqdebt,cash_to_liqdebt_withinterest,op_to_liqdebt,op_to_debt,roic_yearly,total_fa_trun,profit_to_op,q_opincome,q_investincome,q_dtprofit,q_eps,q_netprofit_margin,q_gsprofit_margin,q_exp_to_sales,q_profit_to_gr,q_saleexp_to_gr,q_adminexp_to_gr,q_finaexp_to_gr,q_impair_to_gr_ttm,q_gc_to_gr,q_op_to_gr,q_roe,q_dt_roe,q_npta,q_opincome_to_ebt,q_investincome_to_ebt,q_dtprofit_to_profit,q_salescash_to_or,q_ocf_to_sales,q_ocf_to_or,basic_eps_yoy,dt_eps_yoy,cfps_yoy,op_yoy,ebt_yoy,netprofit_yoy,dt_netprofit_yoy,ocf_yoy,roe_yoy,bps_yoy,assets_yoy,eqt_yoy,tr_yoy,or_yoy,q_gr_yoy,q_gr_qoq,q_sales_yoy,q_sales_qoq,q_op_yoy,q_op_qoq,q_profit_yoy,q_profit_qoq,q_netprofit_yoy,q_netprofit_qoq,equity_yoy,rd_exp,update_flag
0,1,000001.SZ,19911231.0,19911231,,,3.7097,3.7097,2.9271,0.8145,1.6646,,,,,,,,,,,,,0.092,146460000.0,3720000.0,,,,,,,,,,,,,,,223660000.0,,,,2.4791,,,,,33.658,,,,33.658,,,,,,44.8714,,27.6622,,,,3.0975,,27.6622,,,97.523,2.477,,,,,,,,1.7736,1.0181,8.9305,,,,,,,,,0.1336,7.4836,,,,,,,,,,,,3.0975,3.0975,82800000.0,,0.0,100.0,0.0,,,,,1.9446,,5.784,44.8714,,,,,,,,,,,,,,,,,,,,,,,,,,,,48.2674,58.9418,,,-34.9586,,49.1667,144.3702,,,,,,,,,,,,,144.3702,,0
1,1,000001.SZ,19921231.0,19921231,,,3.5271,3.5271,1.9611,,1.0863,,,,,,,,,,,,,0.08,220898000.0,4845018.85,,,,,,,,,,,,,,,146454700.0,,,,1.0863,,,,,36.1948,,,,36.1948,,,,,,47.4738,,30.6349,,,,2.8981,,30.6349,,,97.8537,2.1463,,,,,,,,2.604,1.0267,10.5706,,,,,,,,,0.359,2.7855,,,,,,,,,,,,2.8981,2.8981,187359500.0,,0.0,100.0,0.0,,,,,1.1524,,3.5202,47.4738,,,,,,,,,,,,,,,,,,,,,,,,,,,50.315,50.315,52.783,,,61.8262,,72.7619,-5.5882,42.075,42.075,,,,,,,,,,,-5.5882,,0
2,1,000001.SZ,19940416.0,19931231,1.06,,2.2969,2.2969,1.8473,0.6492,0.9744,,,,,,,,,,,,,0.073,295149400.0,7120959.95,,,,,,,,,,,,,,,437439300.0,1.01,4.47,,1.6236,,,,,44.1643,,,,44.1643,,,,,,48.844,,31.2311,,,,3.2448,,31.2311,,,97.6442,2.3558,,,,,,,,87.0343,7.7127,9.625,,,,,,,,,6.7363,0.1484,,,,,,,,,,,,3.2448,3.2448,351722400.0,,0.0,100.0,0.0,,,,,0.0373,,2.2959,48.844,,,,,,,,,,,,,,,,,,,,,,,,,,,33.9002,33.9002,58.7996,,,-28.0654,,23.9321,120.7556,30.144,30.144,,,,,,,,,,,120.7556,,0
3,1,000001.SZ,19940830.0,19940630,,,1.7982,1.7982,1.8473,0.9043,1.8635,,,,,,,,,,,,,0.044,125192500.0,2435946.1,,,,,,,,,,,,,,,745754400.0,0.77,5.62,,2.7678,,,,,42.912,,,,42.912,,,,,,42.3422,,15.3013,,,,1.9083,,30.6026,,,60.9673,1.1863,0.0963,,,,,,,87.8636,8.2397,8.0181,,,,,,,,,7.2397,0.1381,,,,,,,,,,,,3.8166,1.9083,,,197831.0,99.9037,0.0963,,,,,0.0187,,,42.3831,,,,,,,,,,,,,,,,,,,,,,,,,,,,26.9709,51.2416,,,,25.7271,33.7086,25.5968,81.8022,81.8022,,,,,,,,,,,,,0
4,1,000001.SZ,19950310.0,19941231,0.86,,3.2252,3.2252,1.2758,0.6944,0.8704,,,,,,,,,,,,,0.112,-203239600.0,63115202.6,,,,,,,,,,,,,,,674562600.0,0.83,3.84,,1.5649,,,,,25.6299,,,,25.6299,,0.207,,,,29.9326,,24.9164,,,,2.8723,,24.9164,,,-49.0415,15.2296,-0.3742,14.0183,,,,,,89.2834,9.3313,8.6748,,,,,,,,,8.3526,0.1197,,,,,,,,,,,,2.8723,2.8723,616119400.0,,-1723557.4,100.4159,-0.4159,,,,,0.0301,,2.8729,29.8086,,,,,,,,,,,,,,,,,,,,,,,,-18.8679,,,37.6201,37.1035,30.375,,,-5.1423,-14.094,66.1272,37.4428,19.6529,19.6529,,,,,,,,,,,37.4428,,0


# ST状态

In [26]:
df_st_1 = pd.read_csv('../../data/ST_sheets/ST_sheet_20000101-20091231.csv')
df_st_2 = pd.read_csv('../../data/ST_sheets/ST_sheet_20100101-20191231.csv')
df_st_3 = pd.read_csv('../../data/ST_sheets/ST_sheet_20200101-20200905.csv')

In [27]:
df_st_2.head()

Unnamed: 0.1,Unnamed: 0,tradeDate,ticker,tradeAbbrName,STflg
0,0,2010-01-04,4,*ST国农,*ST
1,1,2010-01-05,4,*ST国农,*ST
2,2,2010-01-06,4,*ST国农,*ST
3,3,2010-01-07,4,*ST国农,*ST
4,4,2010-01-08,4,*ST国农,*ST


# 特征词典

In [28]:
import xlrd

In [29]:
wb = xlrd.open_workbook('../../data/data_dict.xlsx')
ws_all = wb.sheet_by_name('全特征')
ws_bs_indu = wb.sheet_by_name('bs_indu')
ws_bs_bank = wb.sheet_by_name('bs_bank') 
ws_bs_secu = wb.sheet_by_name('bs_secu') 
ws_bs_insu = wb.sheet_by_name('bs_insu') 
ws_is_indu = wb.sheet_by_name('is_indu')
ws_is_bank = wb.sheet_by_name('is_bank') 
ws_is_secu = wb.sheet_by_name('is_secu') 
ws_is_insu = wb.sheet_by_name('is_insu')
ws_cf_indu = wb.sheet_by_name('cf_indu')
ws_cf_bank = wb.sheet_by_name('cf_bank') 
ws_cf_secu = wb.sheet_by_name('cf_secu') 
ws_cf_insu = wb.sheet_by_name('cf_insu')


In [30]:
ws_all.col_values(0, 1)

['cap_rese',
 'undistr_porfit',
 'surplus_rese',
 'special_rese',
 'money_cap',
 'trad_asset',
 'notes_receiv',
 'accounts_receiv',
 'oth_receiv',
 'prepayment',
 'div_receiv',
 'int_receiv',
 'inventories',
 'amor_exp',
 'nca_within_1y',
 'sett_rsrv',
 'loanto_oth_bank_fi',
 'premium_receiv',
 'reinsur_receiv',
 'reinsur_res_receiv',
 'pur_resale_fa',
 'oth_cur_assets',
 'total_cur_assets',
 'fa_avail_for_sale',
 'htm_invest',
 'lt_eqt_invest',
 'invest_real_estate',
 'time_deposits',
 'oth_assets',
 'lt_rec',
 'fix_assets',
 'cip',
 'const_materials',
 'fixed_assets_disp',
 'produc_bio_assets',
 'oil_and_gas_assets',
 'intan_assets',
 'r_and_d',
 'goodwill',
 'lt_amor_exp',
 'defer_tax_assets',
 'decr_in_disbur',
 'oth_nca',
 'total_nca',
 'cash_reser_cb',
 'depos_in_oth_bfi',
 'prec_metals',
 'deriv_assets',
 'rr_reins_une_prem',
 'rr_reins_outstd_cla',
 'rr_reins_lins_liab',
 'rr_reins_lthins_liab',
 'refund_depos',
 'ph_pledge_loans',
 'refund_cap_depos',
 'indep_acct_assets',
 'c

In [31]:
ws_bs_indu.col_values(2, 3)

['货币资金',
 '结算备付金',
 '拆出资金',
 '衍生金融资产',
 '交易性金融资产',
 '应收票据',
 '应收账款',
 '预付款项',
 '应收保费',
 '应收分保账款',
 '应收分保合同准备金',
 '应收利息',
 '应收股利',
 '其他应收款',
 '买入返售金融资产',
 '存货',
 '划分为持有待售的资产',
 '一年内到期的非流动资产',
 '其他流动资产',
 '流动资产的特殊项目',
 '流动资产的调整金额',
 '流动资产合计',
 '发放委托贷款及垫款',
 '可供出售金融资产',
 '持有至到期投资',
 '长期应收款',
 '长期股权投资',
 '投资性房地产',
 '固定资产',
 '在建工程',
 '工程物资',
 '固定资产清理',
 '生产性生物资产',
 '油气资产',
 '无形资产',
 '研发支出',
 '商誉',
 '长期待摊费用',
 '递延所得税资产',
 '其他非流动资产',
 '非流动资产的特殊项目',
 '非流动资产的调整金额',
 '非流动资产合计',
 '资产的特殊项目',
 '资产的调整金额',
 '资产总计',
 '短期借款',
 '向中央银行借款',
 '吸收存款',
 '拆入资金',
 '交易性金融负债',
 '衍生金融负债',
 '应付票据',
 '应付账款',
 '预收款项',
 '卖出回购金融资产款',
 '应付手续费及佣金',
 '应付职工薪酬',
 '应交税费',
 '应付利息',
 '应付股利',
 '其他应付款',
 '应付分保账款',
 '保险合同准备金',
 '代理买卖证券款',
 '代理承销证券款',
 '划分为持有待售的负债',
 '一年内到期的非流动负债',
 '预提费用',
 '其他流动负债',
 '流动负债的特殊项目',
 '流动负债的调整项目',
 '流动负债合计',
 '长期借款',
 '应付债券',
 '其中：优先股',
 '永续债',
 '长期应付款',
 '长期应付职工薪酬',
 '专项应付款',
 '预计负债',
 '递延收益',
 '递延所得税负债',
 '其他非流动负债',
 '非流动负债的特殊项目',
 '非流动负债的调整项目',
 '非流动负债合计',
 '负债的特殊项目',
 '负债的调整金额',
 '负债合计',
 '实收资本