In [1]:
from pathlib import Path
import sys
import os
root_dir = str(Path(os.getcwd()).parent)
sys.path.append(root_dir)

from report.load.load_report_time import load_yysj_data, get_trade_date_lag
from report.util import get_trade_date

import multiprocessing as mp
import pandas as pd
import numpy as np
import pyarrow.parquet as pq
from functools import partial
import datetime 

import warnings
warnings.filterwarnings('ignore')


In [2]:
# --------------------------------读取预约数据并存储为yysj.pq--------------------------------
yysj_file = f'{root_dir}/data/intermediate_results/yysj.parquet'
if os.path.isfile(yysj_file):
	yysj = pq.read_table(yysj_file).to_pandas()
else:
	with mp.Pool() as pool:
		yysj = pool.map(load_yysj_data, range(2007,2025))
	yysj = pd.concat(yysj).reset_index(drop=True)
	yysj.to_parquet(yysj_file)
yysj

Unnamed: 0,stock_symbol,首次预约时间,一次变更日期,二次变更日期,三次变更日期,实际披露时间,report_type,start_day,deadline
0,000001,2007-03-22,,,,2007-03-22,20061231,2007-01-01,2007-04-30
1,000002,2007-03-20,,,,2007-03-20,20061231,2007-01-01,2007-04-30
2,000004,2007-04-27,,,,2007-04-27,20061231,2007-01-01,2007-04-30
3,000006,2007-03-24,,,,2007-03-24,20061231,2007-01-01,2007-04-30
4,000007,2007-04-20,,,,2007-04-20,20061231,2007-01-01,2007-04-30
...,...,...,...,...,...,...,...,...,...
203742,688799,2024-08-31,,,,,20240630,2024-07-01,2024-08-30
203743,688800,2024-08-22,,,,,20240630,2024-07-01,2024-08-30
203744,688819,2024-08-29,,,,,20240630,2024-07-01,2024-08-30
203745,688981,2024-08-30,,,,,20240630,2024-07-01,2024-08-30


In [3]:
# --------------------------------生成预约时间衍生指标并存储为yysj_pro.pq--------------------------------
yysj_pro_file = f'{root_dir}/data/intermediate_results/yysj_pro.parquet'
if os.path.isfile(yysj_pro_file):
	yysj_pro = pq.read_table(yysj_pro_file).to_pandas()
else:
	yysj_pro = yysj.dropna(subset=['stock_symbol', '首次预约时间', '实际披露时间']).reset_index(drop=True).copy()
	# 生成最终预约时间
	yysj_pro['latest_book_date'] = np.nan
	yysj_pro['latest_book_date'] = yysj_pro['latest_book_date'].fillna(yysj_pro['三次变更日期'])
	yysj_pro['latest_book_date'] = yysj_pro['latest_book_date'].fillna(yysj_pro['二次变更日期'])
	yysj_pro['latest_book_date'] = yysj_pro['latest_book_date'].fillna(yysj_pro['一次变更日期'])
	yysj_pro['latest_book_date'] = yysj_pro['latest_book_date'].fillna(yysj_pro['首次预约时间'])
	# 预约时间排序
	yysj_pro['book_time_rank_first'] = yysj_pro.groupby('report_type')['首次预约时间'].rank(ascending=True, pct=True, method='min')
	yysj_pro['book_time_rank_last'] = yysj_pro.groupby('report_type')['latest_book_date'].rank(ascending=True, pct=True, method='min')
	yysj_pro['report_time_rank_last'] = yysj_pro.groupby('report_type')['实际披露时间'].rank(ascending=True, pct=True, method='min')
	yysj_pro['days_to_deadline'] = (yysj_pro['deadline'] - yysj_pro['首次预约时间']).apply(lambda x: x.days)
	yysj_pro = yysj_pro[['stock_symbol', 'report_type', 'book_time_rank_first', 'book_time_rank_last', 'report_time_rank_last', 'days_to_deadline']]
	yysj_pro.to_parquet(yysj_pro_file)
yysj_pro

Unnamed: 0,stock_symbol,report_type,book_time_rank_first,book_time_rank_last,report_time_rank_last,days_to_deadline
0,000001,20061231,0.280156,0.280156,0.280934,39
1,000002,20061231,0.248249,0.248249,0.248249,41
2,000004,20061231,0.874708,0.874708,0.875486,3
3,000006,20061231,0.309728,0.309728,0.309728,37
4,000007,20061231,0.734630,0.734630,0.737743,10
...,...,...,...,...,...,...
198680,688018,20240630,0.480769,0.557692,0.557692,31
198681,688027,20240630,0.980769,0.557692,0.557692,-1
198682,688100,20240630,0.480769,0.557692,0.557692,31
198683,688510,20240630,0.153846,0.096154,0.096154,36


In [4]:
# --------------------------------生成不同滞后期的对应时间并存储为trade_date_lag.pq--------------------------------
trade_date_lag_file = f'{root_dir}/data/intermediate_results/trade_date_lag.parquet'
if os.path.isfile(trade_date_lag_file):
	trade_date_lag = pq.read_table(trade_date_lag_file).to_pandas()
else:
	with mp.Pool() as pool:
		partial_func = partial(get_trade_date_lag, yysj)
		trade_date_lag = pool.map(partial_func, range(-30,31))
		trade_date_lag = pd.concat(trade_date_lag).reset_index(drop=True)
	trade_date_lag.to_parquet(trade_date_lag_file)
trade_date_lag

Unnamed: 0,stock_symbol,report_type,trade_date,lag
0,000001,20061231,2007-02-01,-30
1,000002,20061231,2007-01-30,-30
2,000004,20061231,2007-03-16,-30
3,000006,20061231,2007-02-05,-30
4,000007,20061231,2007-03-09,-30
...,...,...,...,...
12428196,688799,20240630,2024-10-23,30
12428197,688800,20240630,2024-10-14,30
12428198,688819,20240630,2024-10-21,30
12428199,688981,20240630,2024-10-22,30


In [5]:
# --------------------------------处理股票信息数据并存储为stk_info.pq--------------------------------
stk_info_file = f'{root_dir}/data/intermediate_results/stk_info.parquet'
if os.path.isfile(stk_info_file):
	stk_info = pq.read_table(stk_info_file).to_pandas()
else:
	stk_info = pd.read_csv(f'{root_dir}/data/raw_data/stk_info.csv')
	stk_info['stock_symbol'] = stk_info['order_book_id'].str[:6]
	stk_info['listed_date'] = pd.to_datetime(stk_info['listed_date'], errors='coerce').dt.date
	stk_info = stk_info.drop(columns='order_book_id')
	stk_info.to_parquet(stk_info_file)
stk_info

Unnamed: 0,industry_code,market_tplus,symbol,special_type,exchange,status,type,de_listed_date,listed_date,sector_code_name,...,round_lot,trading_hours,board_type,industry_name,issue_price,trading_code,office_address,province,purchasedate,stock_symbol
0,J66,1,平安银行,Normal,XSHE,Active,CS,0000-00-00,1991-04-03,金融,...,100.0,"09:31-11:30,13:01-15:00",MainBoard,货币金融服务,40.00,1,中国广东省深圳市深南东路5047号;中国广东省深圳市福田区益田路5023号平安金融中心B座,广东省,,000001
1,K70,1,万科A,Normal,XSHE,Active,CS,0000-00-00,1991-01-29,房地产,...,100.0,"09:31-11:30,13:01-15:00",MainBoard,房地产业,1.00,2,深圳市盐田区大梅沙环梅路33号万科中心,广东省,,000002
2,Unknown,1,PT金田A,PT,XSHE,Delisted,CS,2002-06-14,1991-07-03,未知,...,100.0,"09:31-11:30,13:01-15:00",MainBoard,未知,10.00,3,深圳市罗湖区嘉宾路深华商业大厦19楼1909,广东省,,000003
3,I65,1,国华网安,Normal,XSHE,Active,CS,0000-00-00,1990-12-01,医疗保健,...,100.0,"09:31-11:30,13:01-15:00",MainBoard,软件和信息技术服务业,1.00,4,深圳市福田区梅林街道孖岭社区凯丰路10号翠林大厦12层,广东省,,000004
4,Unknown,1,ST星源,Other,XSHE,Delisted,CS,2024-04-26,1990-12-10,未知,...,100.0,"09:31-11:30,13:01-15:00",MainBoard,未知,10.00,5,深圳市罗湖区深南东路2017号华乐大厦3F,广东省,,000005
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5387,C39,1,瑞可达,Normal,XSHG,Active,CS,0000-00-00,2021-07-22,信息技术,...,200.0,"09:31-11:30,13:01-15:00",KSH,计算机、通信和其他电子设备制造业,15.02,688800,江苏省苏州市吴中区吴淞江科技产业园淞葭路998号,江苏省,2021-07-12 周一,688800
5388,C38,1,天能股份,Normal,XSHG,Active,CS,0000-00-00,2021-01-18,工业,...,200.0,"09:31-11:30,13:01-15:00",KSH,电气机械和器材制造业,41.79,688819,浙江省长兴县画溪工业园包桥路18号,浙江省,2021-01-05 周二,688819
5389,C39,1,中芯国际,Normal,XSHG,Active,CS,0000-00-00,2020-07-16,信息技术,...,200.0,"09:31-11:30,13:01-15:00",KSH,计算机、通信和其他电子设备制造业,27.46,688981,中国上海市浦东新区张江路18号,开曼群岛,,688981
5390,C39,1,九号公司,Normal,XSHG,Active,CS,0000-00-00,2020-10-29,非必须消费品,...,500.0,"09:31-11:30,13:01-15:00",KSH,计算机、通信和其他电子设备制造业,18.94,689009,,,,689009


In [6]:
# --------------------------------处理财报数据并存储为report_data.pq--------------------------------
report_data_file = f'{root_dir}/data/intermediate_results/report_data.parquet'
if os.path.isfile(report_data_file):
	report_data = pq.read_table(report_data_file).to_pandas()
else:
	report_data = pd.read_csv(f'{root_dir}/data/raw_data/report_data.csv')
	report_data['stock_symbol'] = report_data['order_book_id'].str[:6]
	report_data['report_type'] = report_data['quarter'].str.replace(r'(\d{4})q1', r'\g<1>0331', regex=True)
	report_data['report_type'] = report_data['report_type'].str.replace(r'(\d{4})q2', r'\g<1>0630', regex=True)
	report_data['report_type'] = report_data['report_type'].str.replace(r'(\d{4})q3', r'\g<1>0930', regex=True)
	report_data['report_type'] = report_data['report_type'].str.replace(r'(\d{4})q4', r'\g<1>1231', regex=True)
	report_data.sort_values(by=['stock_symbol', 'report_type'], inplace=True)
	report_data = report_data[['stock_symbol', 'report_type', 'total_liabilities', 'net_profit', 'total_assets', 'total_equity', 'basic_earnings_per_share']]
	report_data['roa'] = report_data['net_profit'] / report_data['total_assets']
	report_data['roe'] = report_data['net_profit'] / report_data['total_equity']
	report_data['ia'] = report_data.groupby('stock_symbol')['total_assets'].pct_change(fill_method=None)
	report_data['eps_lag1'] = report_data.groupby('stock_symbol')['basic_earnings_per_share'].shift(1)
	report_data['ue'] = report_data['basic_earnings_per_share'] - report_data['eps_lag1']
	report_data['report_type_lag1'] = report_data.groupby('stock_symbol')['report_type'].shift(1)
	report_data.to_parquet(report_data_file)
report_data

Unnamed: 0,stock_symbol,report_type,total_liabilities,net_profit,total_assets,total_equity,basic_earnings_per_share,roa,roe,ia,eps_lag1,ue,report_type_lag1
0,000001,20060331,2.342934e+11,2.331835e+08,2.395576e+11,5.264133e+09,0.12,0.000973,0.044297,,,,
1,000001,20060630,2.372318e+11,5.178560e+08,2.428239e+11,5.592040e+09,0.25,0.002133,0.092606,0.013635,0.12,0.13,20060331
2,000001,20060930,2.401763e+11,9.723380e+08,2.462303e+11,6.053955e+09,0.47,0.003949,0.160612,0.014028,0.25,0.22,20060630
3,000001,20061231,2.541637e+11,1.411947e+09,2.607607e+11,6.597040e+09,0.68,0.005415,0.214027,0.059011,0.47,0.21,20060930
4,000001,20070331,2.805525e+11,5.350840e+08,2.876586e+11,7.106094e+09,0.26,0.001860,0.075299,0.103152,0.68,-0.42,20061231
...,...,...,...,...,...,...,...,...,...,...,...,...,...
255460,689009,20230331,4.161949e+09,1.734555e+07,9.204271e+09,5.042322e+09,0.24,0.001885,0.003440,-0.020163,6.35,-6.11,20221231
255461,689009,20230630,4.719132e+09,2.220691e+08,1.002299e+10,5.303860e+09,3.11,0.022156,0.041869,0.088950,0.24,2.87,20230331
255462,689009,20230930,5.449535e+09,3.783601e+08,1.081371e+10,5.364172e+09,5.29,0.034989,0.070535,0.078890,3.11,2.18,20230630
255463,689009,20231231,5.315924e+09,5.962034e+08,1.084963e+10,5.533706e+09,8.37,0.054952,0.107740,0.003322,5.29,3.08,20230930


In [7]:
# --------------------------------处理因子数据并存储为factor_data.pq--------------------------------
factor_data_file = f'{root_dir}/data/intermediate_results/factor_data.parquet'
if os.path.isfile(factor_data_file):
	factor_data = pq.read_table(factor_data_file).to_pandas()
else:
	factor_data_file_list = [f'{root_dir}/data/raw_data/factor_data1.csv',
							f'{root_dir}/data/raw_data/factor_data2.csv',
							f'{root_dir}/data/raw_data/factor_data3.csv',
							f'{root_dir}/data/raw_data/factor_data4.csv',
							f'{root_dir}/data/raw_data/factor_data5.csv',]
	factor_data_list = []
	for factor_data_1 in factor_data_file_list:
		factor_data = pd.read_csv(factor_data_1)
		factor_data['stock_symbol'] = factor_data['order_book_id'].str[:6]
		factor_data['trade_date'] = pd.to_datetime(factor_data['date']).dt.date
		factor_data.drop(columns=['order_book_id', 'date'], inplace=True)
		factor_data_list.append(factor_data)
	factor_data = pd.concat(factor_data_list)
	# 拼接上市日期
	factor_data = pd.merge(left=factor_data, right=stk_info[['stock_symbol', 'listed_date']], on='stock_symbol', how='left')
	factor_data['ln_assets'] = np.log(factor_data['market_cap_3'])
	factor_data.sort_values(by=['stock_symbol', 'trade_date'], inplace=True).reset_index(drop=True)
	factor_data.to_parquet(factor_data_file)
factor_data

Unnamed: 0,market_cap_3,book_to_market_ratio_lf,du_return_on_equity_ttm,VOL5,MTM,VOLT20,VOLT10,total_assets_mrq_0,return_on_asset_ttm,DAVOL5,...,circulation_a,non_circulation_a,total_a,free_circulation,preferred_shares,total,stock_symbol,trade_date,listed_date,ln_assets
0,1.179360e+11,0.118041,,,,,,9.880178e+10,,,...,549193000.0,5.050807e+09,5.600000e+09,549193000.0,0.0,5.600000e+09,601688,2010-02-26,2010-02-26,25.493408
1,1.184400e+11,0.117539,,,,,,9.880178e+10,,,...,549193000.0,5.050807e+09,5.600000e+09,549193000.0,0.0,5.600000e+09,601688,2010-03-01,2010-02-26,25.497672
2,1.164800e+11,0.119517,,,,,,9.880178e+10,,,...,549193000.0,5.050807e+09,5.600000e+09,549193000.0,0.0,5.600000e+09,601688,2010-03-02,2010-02-26,25.480985
3,1.200080e+11,0.116003,,,,,,9.880178e+10,,,...,549193000.0,5.050807e+09,5.600000e+09,549193000.0,0.0,5.600000e+09,601688,2010-03-03,2010-02-26,25.510824
4,1.165920e+11,0.119402,,,,,,9.880178e+10,,,...,549193000.0,5.050807e+09,5.600000e+09,549193000.0,0.0,5.600000e+09,601688,2010-03-04,2010-02-26,25.481946
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13278604,3.050105e+09,0.757138,0.56093,0.871771,0.06,0.130645,,3.627149e+09,0.036145,0.582940,...,646208651.0,0.000000e+00,6.462087e+08,260052211.0,0.0,6.462087e+08,603166,2024-07-30,2014-11-27,21.838442
13278605,3.153498e+09,0.732314,0.56093,0.872548,0.01,0.133156,,3.627149e+09,0.036145,0.583041,...,646208651.0,0.000000e+00,6.462087e+08,260052211.0,0.0,6.462087e+08,603166,2024-07-31,2014-11-27,21.871778
13278606,3.159960e+09,0.730816,0.56093,0.793969,0.02,0.142736,,3.627149e+09,0.036145,0.530827,...,646208651.0,0.000000e+00,6.462087e+08,260052211.0,0.0,6.462087e+08,603166,2024-08-01,2014-11-27,21.873825
13278607,3.108264e+09,0.742971,0.56093,0.748091,0.07,0.146320,,3.627149e+09,0.036145,0.501310,...,646208651.0,0.000000e+00,6.462087e+08,260052211.0,0.0,6.462087e+08,603166,2024-08-02,2014-11-27,21.857330


In [8]:
# --------------------------------处理指数数据并存储为index_data.pq--------------------------------
index_data_file = f'{root_dir}/data/intermediate_results/index_data.parquet'
if os.path.isfile(index_data_file):
	index_data = pq.read_table(index_data_file).to_pandas()
else:
	index_data = pd.read_csv(f'{root_dir}/data/raw_data/index_data.csv')
	index_data['stock_symbol'] = index_data['order_book_id'].str[:6]
	index_data['trade_date'] = pd.to_datetime(index_data['date']).dt.date
	index_data.drop(columns=['order_book_id', 'date'], inplace=True)
	index_data.to_parquet(index_data_file)
index_data

Unnamed: 0,open,close,high,total_turnover,volume,low,prev_close,stock_symbol,trade_date
0,3081.0657,3117.7787,3123.6094,7.500649e+09,1.184454e+09,3075.8072,,000869,2017-04-25
1,3130.6137,3128.3387,3138.0979,6.114450e+09,9.041084e+08,3119.6652,3117.7787,000869,2017-04-26
2,3137.0743,3133.0217,3139.6993,6.296439e+09,8.666304e+08,3102.3068,3128.3387,000869,2017-04-27
3,3121.3363,3110.4341,3124.7721,3.866797e+09,5.453917e+08,3104.8405,3133.0217,000869,2017-04-28
4,3123.8744,3095.8492,3128.4513,4.270389e+09,6.781997e+08,3089.8568,3110.4341,000869,2017-05-02
...,...,...,...,...,...,...,...,...,...
18660801,3092.4547,3063.6285,3125.4495,2.953934e+10,8.251090e+08,3053.5547,3104.6262,399293,2024-07-24
18660802,3044.4968,3047.6705,3072.5205,3.279069e+10,8.056904e+08,3037.4862,3063.6285,399293,2024-07-25
18660803,3055.2459,3074.4698,3092.5807,2.936868e+10,7.634770e+08,3050.6233,3047.6705,399293,2024-07-26
18660804,3063.3387,3029.6265,3063.3387,2.936733e+10,6.725334e+08,3028.1764,3074.4698,399293,2024-07-29


In [9]:
# --------------------------------分析师预测数据并存储为forecast_data.pq--------------------------------
forecast_data_file = f'{root_dir}/data/intermediate_results/forecast_data.parquet'
if os.path.isfile(forecast_data_file):
	forecast_data = pq.read_table(forecast_data_file).to_pandas()
else:
	forecast_data_file_list = [f'{root_dir}/data/raw_data/forecast1.xlsx',
							f'{root_dir}/data/raw_data/forecast2.xlsx',
							f'{root_dir}/data/raw_data/forecast3.xlsx',
							f'{root_dir}/data/raw_data/forecast4.xlsx']

	forecast_data_list = []
	for forecast_data_1 in forecast_data_file_list:
		forecast_data_1 = pd.read_excel(forecast_data_1, ).iloc[2:, :]
		forecast_data_1['stock_symbol'] = forecast_data_1['Stkcd']
		forecast_data_1['declare_date'] = pd.to_datetime(forecast_data_1['DeclareDate']).dt.date
		forecast_data_1['report_type'] = forecast_data_1['Fenddt'].str.replace('-','')
		forecast_data_1 = forecast_data_1[['stock_symbol', 'report_type', 'AnanmID', 'declare_date', 'Feps', 'FROA', 'FROE','TotalProfit']]
		forecast_data_list.append(forecast_data_1)
	forecast_data = pd.concat(forecast_data_list)
	# 拼接真实eps
	forecast_data = pd.merge(left=forecast_data, right=report_data[['stock_symbol', 'report_type', 'basic_earnings_per_share']], on=['stock_symbol', 'report_type'], how='left')
	# 拼接真实发报日
	forecast_data = pd.merge(left=forecast_data, right=yysj[['stock_symbol', 'report_type', '实际披露时间']], on=['stock_symbol', 'report_type'], how='left')
	forecast_data = forecast_data.sort_values(by=['stock_symbol', 'report_type', 'declare_date']).reset_index(drop=True)
	forecast_data['bias'] = forecast_data['basic_earnings_per_share'] - forecast_data['Feps']
	forecast_data['days2ann'] = forecast_data['实际披露时间'] - forecast_data['declare_date']
	forecast_data['days2ann'] = forecast_data['days2ann'].fillna(datetime.timedelta(days=999))
	forecast_data['days2ann'] = forecast_data['days2ann'].apply(lambda x: x.days)
	forecast_data.to_parquet(forecast_data_file)
forecast_data

Unnamed: 0,stock_symbol,report_type,AnanmID,declare_date,Feps,FROA,FROE,TotalProfit,basic_earnings_per_share,实际披露时间,bias,days2ann
0,000001,20021231,30264494,2002-06-28,0.2,,,,,,,999
1,000001,20021231,30248981,2002-07-18,0.301,,,,,,,999
2,000001,20031231,30266317,2003-04-04,0.29,,,,,,,999
3,000001,20031231,30265866,2003-06-04,0.28,,,,,,,999
4,000001,20041231,30245529,2004-03-29,0.2,,,,,,,999
...,...,...,...,...,...,...,...,...,...,...,...,...
2060734,900950,20131231,30243251,2012-12-27,1.771,,,,,,,999
2060735,900950,20141231,302432993024329230245083,2012-03-28,2.46,,,,,,,999
2060736,920002,20241231,30655100,2024-05-28,1.77,,8.3,69000000,,,,999
2060737,920002,20251231,30655100,2024-05-28,2.16,,9.1,82000000,,,,999


In [13]:
# --------------------------------分析师预处理测数据并存储为forecast_data_pro.pq--------------------------------
forecast_data_pro_file = f'{root_dir}/data/intermediate_results/forecast_data_pro.parquet'
if os.path.isfile(forecast_data_pro_file):
	forecast_data_pro = pq.read_table(forecast_data_pro_file).to_pandas()
else:	
	forecast_data_pro = forecast_data.groupby(['stock_symbol', 'report_type'])['bias'].mean().reset_index()
	forecast_data_pro['count'] = forecast_data.groupby(['stock_symbol', 'report_type'])['Feps'].count().values
	forecast_data_pro['fprofit'] = forecast_data.groupby(['stock_symbol', 'report_type'])['TotalProfit'].mean().values
	forecast_data_pro['eps'] = forecast_data.groupby(['stock_symbol', 'report_type'])['basic_earnings_per_share'].mean().values
	forecast_data_pro['eps'] = forecast_data_pro['eps'].replace(0,np.nan)
	forecast_data_pro['bias'] = (forecast_data_pro['bias'] / forecast_data_pro['eps']).abs()
	forecast_data_pro['bias'] = forecast_data_pro['bias'].replace(np.inf, np.nan)
	forecast_data_pro['feps_std'] = forecast_data.groupby(['stock_symbol', 'report_type'])['Feps'].std().values
	forecast_data_pro['feps_mean'] = forecast_data.groupby(['stock_symbol', 'report_type'])['Feps'].mean().values
	forecast_data_pro['feps_mean'] = forecast_data_pro['feps_mean'].replace(0,np.nan)
	forecast_data_pro['disp'] = forecast_data_pro['feps_std'] / forecast_data_pro['feps_mean']
	# forecast_data_pro['frep'] = forecast_data.groupby(['stock_symbol', 'report_type'])['AnanmID'].mean().values
	forecast_data_pro.to_parquet(forecast_data_pro_file)
forecast_data_pro

Unnamed: 0,stock_symbol,report_type,bias,count,fprofit,eps,feps_std,feps_mean,disp
0,000001,20021231,,2,,,0.071418,0.250500,0.285101
1,000001,20031231,,2,,,0.007071,0.285000,0.024811
2,000001,20041231,,5,,,0.027179,0.216800,0.125365
3,000001,20051231,,9,,,0.071673,0.191000,0.375251
4,000001,20061231,0.52402,21,,0.68,0.157422,0.323667,0.486371
...,...,...,...,...,...,...,...,...,...
59731,900950,20131231,,5,,,0.394439,1.529600,0.257871
59732,900950,20141231,,1,,,,2.460000,
59733,920002,20241231,,1,69000000.0,,,1.770000,
59734,920002,20251231,,1,82000000.0,,,2.160000,
