In [1]:
import tushare as ts
import pandas as pd
import json

In [2]:
ts.set_token('53cd3b985c649c978160c6ec04bce24f4fbd2ebcb4673e8f2fba9a43')

pro = ts.pro_api()

In [3]:
key_map = {
    'hghy': '化工行业',
    'dzxx': '电子信息',
    'ysjs': '有色金属',
    'fdc': '房地产',
    'jrhy': '金融行业',
    'jdly': '酒店旅游'
}

In [4]:
with open('new_import/stock_codes.json', 'r') as f:
    js = json.load(f)
    code_dict = dict()
    for k, v in js.items():
        for s in v:
            s = s[2:] + '.' + s[:2]
            s = s.upper()
            code_dict[s] = key_map[k]

In [5]:
codes = list(code_dict.keys())

In [6]:
len(codes)

678

In [7]:
'002450.SZ' in codes

True

In [8]:
pro.stock_basic(ts_code='002450.SZ', exchange='', list_status='P', fields='ts_code,fullname,name')

Unnamed: 0,ts_code,name,fullname
0,002450.SZ,*ST康得,康得新复合材料集团股份有限公司


In [9]:
df_list = []
for status in ['L', 'D', 'P']:
    df_list.append(pro.stock_basic(exchange='', list_status=status, fields='ts_code,fullname,name'))
data = pd.concat(df_list)

In [10]:
len(data)

4202

In [11]:
data = data[data['ts_code'].isin(codes)].reset_index(drop=True)

In [12]:
data

Unnamed: 0,ts_code,name,fullname
0,000001.SZ,平安银行,平安银行股份有限公司
1,000002.SZ,万科A,万科企业股份有限公司
2,000005.SZ,世纪星源,深圳世纪星源股份有限公司
3,000006.SZ,深振业A,深圳市振业(集团)股份有限公司
4,000007.SZ,全新好,深圳市全新好股份有限公司
...,...,...,...
673,300104.SZ,乐视退(退),乐视网信息技术(北京)股份有限公司
674,300431.SZ,暴风退(退),暴风集团股份有限公司
675,600074.SH,退保千(退),江苏保千里视像科技集团股份有限公司
676,600485.SH,*ST信威,北京信威科技集团股份有限公司


In [13]:
data.columns = ['companyId:ID(Company)', 'name:string', 'fullname:string']

In [14]:
data.head()

Unnamed: 0,companyId:ID(Company),name:string,fullname:string
0,000001.SZ,平安银行,平安银行股份有限公司
1,000002.SZ,万科A,万科企业股份有限公司
2,000005.SZ,世纪星源,深圳世纪星源股份有限公司
3,000006.SZ,深振业A,深圳市振业(集团)股份有限公司
4,000007.SZ,全新好,深圳市全新好股份有限公司


In [15]:
'002450.SZ' in data['companyId:ID(Company)']

False

In [16]:
data[':LABEL'] = 'Company'

In [17]:
data.head()

Unnamed: 0,companyId:ID(Company),name:string,fullname:string,:LABEL
0,000001.SZ,平安银行,平安银行股份有限公司,Company
1,000002.SZ,万科A,万科企业股份有限公司,Company
2,000005.SZ,世纪星源,深圳世纪星源股份有限公司,Company
3,000006.SZ,深振业A,深圳市振业(集团)股份有限公司,Company
4,000007.SZ,全新好,深圳市全新好股份有限公司,Company


In [18]:
data.to_csv('new_import/company.csv', index=False)

# 制作industry表

In [19]:
industry_set = set(code_dict.values())

In [22]:
industry2idx = {
    ind: i
    for i, ind in enumerate(industry_set)
}

In [23]:
industry2idx

{'化工行业': 0, '酒店旅游': 1, '金融行业': 2, '电子信息': 3, '有色金属': 4, '房地产': 5}

In [25]:
tup_list = [(v, k) for k, v in industry2idx.items()]

In [27]:
industry_df = pd.DataFrame(tup_list, columns=['industryId:ID(Industry)', 'name:string'])

In [28]:
industry_df[':LABEL'] = 'Industry'

In [29]:
industry_df

Unnamed: 0,industryId:ID(Industry),name:string,:Label
0,0,化工行业,Industry
1,1,酒店旅游,Industry
2,2,金融行业,Industry
3,3,电子信息,Industry
4,4,有色金属,Industry
5,5,房地产,Industry


In [30]:
industry_df.to_csv('new_import/industry.csv', index=False)

# 制作company-industry的关系

In [31]:
code_ind_tups = [(industry2idx[i], c) for c, i in code_dict.items()]

In [32]:
code_ind_tups[0]

(0, '600075.SH')

In [33]:
ind_comp_df = pd.DataFrame(code_ind_tups, columns=[':START_ID(Industry)', ':END_ID(Company)'])

In [34]:
ind_comp_df[':TYPE'] = 'in_industry'

In [36]:
ind_comp_df.head()

Unnamed: 0,:START_ID(Industry),:END_ID(Company),:TYPE
0,0,600075.SH,in_industry
1,0,600078.SH,in_industry
2,0,600091.SH,in_industry
3,0,600135.SH,in_industry
4,0,600141.SH,in_industry


In [37]:
ind_comp_df.to_csv('new_import/industry_company.csv', index=False)