In [1]:
import datetime as dt
from utility import (
    read_sql,
    fd_basicinfo,
    fd_typeclass,
    fd_hshkiport,
    nearest_report_date
)
import pandas as pd

In [2]:
# 获取回溯的报告日

# 获取回溯的报告日

today = dt.datetime.today()
date_3yrs_ago = today.replace(year=today.year - 3)
report_dates_begin = nearest_report_date(date_3yrs_ago.strftime("%Y%m%d"))
current_date = today.strftime("%Y%m%d")

In [3]:
# 获取所有目标股票型基金（清算结束日期晚于当前日）

security_ids = read_sql(f"""
select SECURITYID from TQ_FD_TYPECLASS 
WHERE
    ISVALID = 1 AND
    L1CODE = 1 AND
    (ENDDATE>='{today}' or ENDDATE = '19000101') 
ORDER BY SECURITYID;
""")["SECURITYID"].unique().tolist()

In [7]:
basic_info = fd_basicinfo(security_ids, current_date)
type_info = fd_typeclass(security_ids, current_date)
hkport_info = fd_hshkiport(security_ids, current_date)
hkport_info = hkport_info[hkport_info.INDUSTRYNAME == "合计"]

In [8]:
hkport_info.groupby(["SECURITYID", "REPORTDATE"], as_index=False).sum()

Unnamed: 0,SECURITYID,REPORTDATE,MVALUE,ACCNETMKTCAP
0,1030004030,20200930,5.729386e+07,9.13
1,1030004030,20201231,9.348727e+07,12.81
2,1030004030,20210331,1.735264e+08,23.05
3,1030004030,20210630,1.639447e+07,15.62
4,1030004030,20210930,3.454067e+06,32.85
...,...,...,...,...
359,1030009731,20200930,1.399008e+07,95.53
360,1030009731,20201231,9.791957e+06,95.17
361,1030009731,20210331,1.490032e+07,93.93
362,1030009731,20210630,2.953495e+07,92.27


In [9]:
sss = hkport_info[hkport_info.SECURITYID == '1030009731']

sss[sss.REPORTDATE == '20210930']

Unnamed: 0,SECURITYID,INDCLASSCODE,INDUSTRYCODE,INDUSTRYNAME,REPORTDATE,MVALUE,ACCNETMKTCAP
4306,1030009731,2102,ZZZ,合计,20210930,28863697.51,91.64


In [28]:
df = pd.merge(type_info, basic_info, on="SECURITYID")

In [29]:
df.L3NAME.unique()

array(['标准指数股票型基金', '增强指数股票型基金', 'ETF联接基金', '其他行业股票型基金',
       '偏股型基金（股票上下限60%-95%）', '标准股票型基金', '医药行业股票型基金', '偏债型基金',
       '股票型分级子基金(优先份额)', '股票型分级子基金(进取份额)', '封闭式标准股票型基金'], dtype=object)

In [30]:
# 分类
df.loc[(df["L3NAME"].isin(["其他行业股票型基金", "偏股型基金（股票上下限60%-95%）", '标准股票型基金', '医药行业股票型基金', '封闭式标准股票型基金'])), "股票型（子类）"] = "普通股票型基金"
df.loc[(df["L3NAME"].isin(["标准指数股票型基金"])), "股票型（子类）"] = "被动指数型基金"
df.loc[(df["L3NAME"].isin(["增强指数股票型基金"])), "股票型（子类）"] = "增强指数型基金"

In [31]:
df.groupby("股票型（子类）").count()

Unnamed: 0_level_0,SECURITYID,L1CODE,L1NAME,L2CODE,L2NAME,L3CODE,L3NAME,FDNAME,SNAMECOMP,FSYMBOL,FDNATURE,INVESTSTYLE
股票型（子类）,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
增强指数型基金,63,63,63,63,63,63,63,63,45,63,63,63
普通股票型基金,217,217,217,217,217,217,217,217,66,217,217,217
被动指数型基金,344,344,344,344,344,344,344,344,215,344,344,344


In [32]:
df

Unnamed: 0,SECURITYID,L1CODE,L1NAME,L2CODE,L2NAME,L3CODE,L3NAME,FDNAME,SNAMECOMP,FSYMBOL,FDNATURE,INVESTSTYLE,股票型（子类）
0,1030000030,1,股票基金,1.2,指数股票型基金,1.2.1,标准指数股票型基金,国泰沪深300指数证券投资基金,国泰沪深300指数,020011,证券投资基金,指数型,被动指数型基金
1,1030000034,1,股票基金,1.2,指数股票型基金,1.2.2,增强指数股票型基金,华安MSCI中国A股指数增强型证券投资基金,,040002,证券投资基金,指数型,增强指数型基金
2,1030000045,1,股票基金,1.2,指数股票型基金,1.2.1,标准指数股票型基金,博时裕富沪深300指数证券投资基金,博时沪深300指数,050002,证券投资基金,指数型,被动指数型基金
3,1030000045,1,股票基金,1.2,指数股票型基金,1.2.1,标准指数股票型基金,博时裕富沪深300指数证券投资基金,博时沪深300指数,050002,证券投资基金,指数型,被动指数型基金
4,1030000096,1,股票基金,1.2,指数股票型基金,1.2.2,增强指数股票型基金,易方达上证50指数增强型证券投资基金,易方达上证50增强,110003,证券投资基金,指数型,增强指数型基金
...,...,...,...,...,...,...,...,...,...,...,...,...,...
933,1030005090,1,股票基金,1.2,指数股票型基金,1.2.1,标准指数股票型基金,富国中证煤炭指数分级证券投资基金,富国中证煤炭指数分级,161032,证券投资基金,指数型,被动指数型基金
934,1030005095,1,股票基金,1.2,指数股票型基金,1.2.1,标准指数股票型基金,信诚中证信息安全指数分级证券投资基金,信诚中证信息安全指数分级,165523,证券投资基金,指数型,被动指数型基金
935,1030005100,1,股票基金,1.2,指数股票型基金,1.2.1,标准指数股票型基金,信诚中证智能家居指数分级证券投资基金,信诚中证智能家居指数分级,165524,证券投资基金,指数型,被动指数型基金
936,1030005248,1,股票基金,1.2,指数股票型基金,1.2.1,标准指数股票型基金,华安创业板50指数分级证券投资基金,华安创业板50指数分级,160420,证券投资基金,指数型,被动指数型基金
