# 获得分红信息

## 步骤1：获得分红信息

In [46]:
import datetime
import pandas as pd
from jqdata import *

In [47]:
# 确定是哪一个年之前的数据
query_yr = '2024'

# 一直查询到什么时候
query_yr_bgn = '2020'

# 指数构成的查询时间
end_date = '2024-3-24'

# 这个是中证1000的指数
# bench_index = '000852.XSHG'
bench_index = '000300.XSHG'

stock_pool = get_index_stocks(bench_index, end_date)

In [48]:
# finance是jqdata里面的库，包含分红等信息
f = finance.STK_XR_XD

q =query(
    f.code,f.company_name,
    f.report_date,
    f.bonus_amount_rmb
).filter(
    finance.STK_XR_XD.code.in_(stock_pool),
    f.bonus_amount_rmb!='NaN',
    f.report_date<str(query_yr)+'-01-01'
).order_by(
    finance.STK_XR_XD.report_date.desc()
)

df = finance.run_query(q)

  cursor.execute(statement, parameters)


In [49]:
# 设置日期作为指数，后面的resample需要针对日期进行操作
df.set_index(pd.to_datetime(df.report_date),inplace=True)

# 以日期为指数进行重抽样
# 因为一年内可能多次分红
df = df.groupby('code').resample('Y').sum()

# 重新设置
df=df.reset_index()

# 设置report year的时间序列
df['report_year']=df['report_date'].dt.year

# 挑选近几年的情况
df = df[df['report_year'] >= int(query_yr_bgn)]


In [50]:
#用来存储最终市值数据的pandas数组
market_pd = pd.DataFrame()


# 查询每年的市值，按照日期的概念
for date_i in set(df['report_year']):

    query_date = get_trade_days(start_date=None, end_date=str(int(date_i)+1)+'-01-01', count=2)[0]

    sec_list = list(df[df['report_year'] == date_i]['code'])
    
    
    # 查询指定日期、指定
    q = query(
            valuation.code,
            valuation.market_cap,
        ).filter(
            balance.code.in_(sec_list)
        )
    
    query_pd = get_fundamentals(q,query_date)
    
    query_pd['query_date'] = np.array([date_i for _ in range(query_pd.shape[0])])
    
    market_pd = pd.concat([market_pd,query_pd],axis = 0)
    
# 组合数据
df = pd.merge(df,market_pd,how='left',left_on = ['code','report_year'],right_on = ['code','query_date'])

# 分红比例
df['div_ratio'] = df['bonus_amount_rmb']/df['market_cap']/10000

# 删除多余的列
df.drop('query_date', axis = 1, inplace = True)

In [51]:
# 调整格式
div_ret = df.pivot_table(index = ['code'], values = ['div_ratio'],columns = ['report_year'])
div_ret.fillna(0,inplace = True)

# 注意这里是固定3年，如果是其他年数需要调整
div_ret['avg_div'] = (div_ret.iloc[:,0] + div_ret.iloc[:,1] + div_ret.iloc[:,2])/3

# 筛选股票
# 是否大于0.05
div_ret = div_ret[div_ret['avg_div'] > 0.03]

In [52]:
div_ret.head()

Unnamed: 0_level_0,div_ratio,div_ratio,div_ratio,div_ratio,avg_div
report_year,2020,2021,2022,2023,Unnamed: 5_level_1
code,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
000002.XSHE,0.036866,0.050732,0.031072,0.0,0.039557
000157.XSHE,0.055636,0.044818,0.059041,0.0,0.053165
000408.XSHE,0.0,0.0,0.109271,0.031434,0.036424
000425.XSHE,0.018657,0.0,0.075826,0.0,0.031494
000651.XSHE,0.061517,0.076252,0.061995,0.0,0.066588


In [53]:
# 名字信息
name_pd =pd.DataFrame(get_all_securities()['display_name'])


# 提取行业的编码信息
Sec_list = [[d[0],d[1]['industry_code'],d[1]['industry_name']] for d in pd.DataFrame.from_dict(get_industry(list(set(list(div_ret.index))), date=end_date), orient='index')['sw_l1'].items()]

# 重新生成新的pandas数据，准备进行合并
industry_pd = pd.DataFrame(data = Sec_list,columns = ['code','industry','industry_name'])
industry_pd = industry_pd.set_index(['code'])

In [54]:
# 增加名字
div_ret = pd.concat([div_ret,name_pd], axis =1,sort=False,join = 'inner')

# 增加行业
div_ret = pd.concat([div_ret,industry_pd], axis =1,sort=False,join = 'inner')

In [56]:
div_ret.head()

Unnamed: 0_level_0,"(div_ratio, 2020)","(div_ratio, 2021)","(div_ratio, 2022)","(div_ratio, 2023)","(avg_div, )",display_name,industry,industry_name
code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
000002.XSHE,0.036866,0.050732,0.031072,0.0,0.039557,万科A,801180,房地产I
000157.XSHE,0.055636,0.044818,0.059041,0.0,0.053165,中联重科,801890,机械设备I
000408.XSHE,0.0,0.0,0.109271,0.031434,0.036424,藏格矿业,801050,有色金属I
000425.XSHE,0.018657,0.0,0.075826,0.0,0.031494,徐工机械,801890,机械设备I
000651.XSHE,0.061517,0.076252,0.061995,0.0,0.066588,格力电器,801110,家用电器I


## 步骤2：输出到CSV文件


In [57]:
div_ret.to_csv("dividend_information.csv")