In [50]:
import os

import pandas as pd
import pymongo

def seccode2bsitem(str_code):
    """
    Transfer the code into b/s item of the account
    :param str_code: 6-digit code
    :return:
    str
        {
        'st': stock, 股票
        'ce': 现金及一般等价物,
        'unknown': others
        }
    :note:
    无B股股东账户， 在清洗持仓数据时，为考虑B股代码问题。

    """
    str_code = str_code.zfill(6)
    if str_code[:3] in ['600', '601', '603', '688', '689']:  # 未考虑B股
        return 'st'
    elif str_code[:2] in ['00', '30']:
        return 'st'
    elif str_code[:3] in ['204', '511', '159', '519', '521', '660']:
        return 'ce'
    elif str_code[:2] in ['13']:
        return 'ce'
    else:
        print(f'New security type found, please check {str_code} type and update the function "seccode2bsitem".')
        return 'unknown'

def process_raw_data_cash(__fpath_holding):
    str_ext = os.path.splitext(__fpath_holding)[1]
    if str_ext in ['.xlsx', '.xls']:
        __df_capital = pd.read_excel(__fpath_holding)
        __df_holding = pd.read_excel(__fpath_holding)
    elif str_ext == '.csv':
        __df_capital = pd.read_csv(__fpath_holding, nrows=1, encoding='gbk',
                                 dtype={'资产账户': str, '总资产': float, '总负债': float, '净资产': float,
                                        '资金可用金': float})
        __df_holding = pd.read_csv(__fpath_holding, skiprows=3, encoding='gbk',
                                 dtype={'证券代码': str, '市值': float})
    else:
        raise TypeError('Unknown file type!')
    return __df_capital, __df_holding
str_date = '20200414'

In [38]:

fpath = f'D:/data/A_trading_data/1500+/A_result/{str_date}/2hao_MS/holding_sw.csv'
df_capital, df_holding = process_raw_data_cash(fpath)
df_capital


Unnamed: 0,资产账户,总资产,市值,资金资产,资金可用金,资金冻结金,资金解冻金,货币代码,Unnamed: 8,Unnamed: 9,Unnamed: 10,Unnamed: 11,Unnamed: 12
0,2019029790,45520035.82,43408154.09,2111881.73,2111881.73,4336364.29,4330295.89,人民币,,,,,


In [24]:
# 此步骤之前，需要规范一下列名。
df_holding_draft = df_holding.loc[:, ['证券代码', '市值']]
df_holding_draft['bsitem'] = df_holding_draft['证券代码'].apply(seccode2bsitem)
holding_mv = df_holding_draft.groupby(by='bsitem').sum().T
dict_bs_mv = holding_mv.to_dict('record')
dict_bs_mv

Unnamed: 0,证券代码,市值,bsitem
0,603360,390474.0,st
1,603528,39150.0,st
2,603538,371175.0,st
3,603669,405440.0,st
4,603686,245106.0,st
...,...,...,...
57,603069,63375.0,st
58,002332,370761.0,st
59,002258,295230.0,st
60,300575,281770.0,st


In [31]:
df_capital.T

Unnamed: 0,0
资产账户,2019029790
总资产,4.53155e+07
市值,4.31975e+07
资金资产,2.11795e+06
资金可用金,2.11795e+06
资金冻结金,3.99109e+06
资金解冻金,4.27167e+06
货币代码,人民币
Unnamed: 8,
Unnamed: 9,


In [45]:
dirpath = f'D:/data/A_trading_data/1500+/A_result/{str_date}'
client = pymongo.MongoClient('mongodb://localhost:27017/')
db_basicinfo = client['basicinfo']
col_myacctsinfo = db_basicinfo['myacctsinfo']
for _ in col_myacctsinfo.find({'date': str_date, 'rptmark': '1'}):
    fpath_holding = _['fpath_holding']
    if '/' in fpath_holding:
        fpath_holding = dirpath + fpath_holding
        df_capital, df_holding = process_raw_data_cash(fpath_holding)
        print(df_capital[:2])
        print(df_holding[:2])





XLRDError: Unsupported format, or corrupt file: Expected BOF record; found b'="\xb1\xd2\xd6\xd6"\t'

In [59]:
fpath = f'D:/data/A_trading_data/1500+/A_result/20200414/1203hao/holding.xls'
df_capital, df_holding = process_raw_data_cash(fpath)
df_holding

XLRDError: Unsupported format, or corrupt file: Expected BOF record; found b'="\xb1\xd2\xd6\xd6"\t'

In [79]:
f = open(fpath, 'rb')
lines = f.readlines()
for line in lines:
    line = line.decode('gbk').replace('=', '').replace('"','')

    print(line)


币种	余额	可用	可取	参考市值	资产	盈亏	

人民币	3173692.72	3871328.10	3173692.72	50861237.75	54732565.85	-88361.90	



证券名称	证券数量	可卖数量	价格币种	成本价	浮动盈亏	盈亏比例(%)	折算汇率	最新市值	当前价	今买数量	今卖数量	证券代码	股东代码	成本金额	备注	

华宝添益	260519	260519	人民币	99.791	57788.57	0.22		26055286.74	100.013	0	0	511990	B882751428	25997498.17		

人福医药	38100	300	人民币	19.539	70159.84	9.42		814578.00	21.38	37800	37800	600079	B882751428	744418.16		

诺德股份	62	62	人民币	-436.369	27377.26	--		322.40	5.20	0	114500	600110	B882751428	-27054.86		

商赢环球	0	0	人民币	0.000	121087.88	--		0.00	13.320	0	1200	600146	B882751428	-121087.88		

长春经开	11100	11100	人民币	7.541	1428.50	1.71		85137.00	7.67	0	0	600215	B882751428	83708.50		

沧州大化	0	0	人民币	0.000	-5170.25	--		0.00	8.330	0	7600	600230	B882751428	5170.25		

天通股份	40	40	人民币	2468.794	-98450.17	-99.69		301.60	7.54	0	0	600330	B882751428	98751.77		

健康元	20900	0	人民币	12.370	11914.14	4.61		270446.00	12.94	20900	0	600380	B882751428	258531.86		

扬农化工	2000	0	人民币	74.733	2694.11	1.80		152160.00	76.08	2000	0	600486	B882751428	14946

In [70]:
import pandas as pd

a = pd.read_csv(fpath,nrows=1,sep='=',encoding='gbk')
a

Unnamed: 0.1,Unnamed: 0,币种\t,余额\t,可用\t,可取\t,参考市值\t,资产\t,盈亏\t
0,,人民币\t,3173692.72,3871328.1,3173692.72,50861237.75,54732565.85,-88361.9


In [73]:
b= pd.read_csv(fpath, skiprows=3, encoding='gbk', sep='\t=')
b


  """Entry point for launching an IPython kernel.


Unnamed: 0,"=""证券名称""","""证券数量""","""可卖数量""","""价格币种""","""成本价""","""浮动盈亏""","""盈亏比例(%)""","""折算汇率""","""最新市值""","""当前价""","""今买数量""","""今卖数量""","""证券代码""","""股东代码""","""成本金额""","""备注"""
0,"=""华宝添益""\t260519\t260519","""人民币""\t99.791\t57788.57\t0.22\t\t26055286.74\t...","""511990""","""B882751428""","""25997498.17""",,,,,,,,,,,
1,"=""人福医药""\t38100\t300","""人民币""\t19.539\t70159.84\t9.42\t\t814578.00\t21...","""600079""","""B882751428""","""744418.16""",,,,,,,,,,,
2,"=""诺德股份""\t62\t62","""人民币""\t-436.369\t27377.26\t--\t\t322.40\t5.20\...","""600110""","""B882751428""","""-27054.86""",,,,,,,,,,,
3,"=""商赢环球""\t0\t0","""人民币""\t0.000\t121087.88\t--\t\t0.00\t13.320\t0...","""600146""","""B882751428""","""-121087.88""",,,,,,,,,,,
4,"=""长春经开""\t11100\t11100","""人民币""\t7.541\t1428.50\t1.71\t\t85137.00\t7.67\...","""600215""","""B882751428""","""83708.50""",,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
125,"=""万兴科技""\t300\t300","""人民币""\t79.913\t500.21\t2.09\t\t24474.00\t81.58...","""300624""","""0899198036""","""23973.79""",,,,,,,,,,,
126,"=""金陵体育""\t0\t0","""人民币""\t0.000\t-3870.32\t--\t\t0.00\t25.920\t0\...","""300651""","""0899198036""","""3870.32""",,,,,,,,,,,
127,"=""沪宁股份""\t2100\t2100","""人民币""\t39.390\t3465.45\t4.19\t\t86184.00\t41.0...","""300669""","""0899198036""","""82718.55""",,,,,,,,,,,
128,"=""艾德生物""\t2000\t2000","""人民币""\t77.084\t5231.17\t3.39\t\t159400.00\t79....","""300685""","""0899198036""","""154168.83""",,,,,,,,,,,


In [80]:
def read_lines(fpath, skiprows=0, nrows=0):
    with open(fpath, 'rb') as f:
        list_list_datainline = []
        list_lines = f.readlines()
        if nrows:
            for line in list_lines[skiprows:skiprows+nrows]:
                line = line.decode('gbk').replace('=', '').replace('"','')
                list_datainline = line.split()
                list_list_datainline.apend(list_datainline)
                return list_list_datainline
        else:
            for line in list_lines[skiprows:]:
                line = line.decode('gbk').replace('=', '').replace('"','')
                list_datainline = line.split()
                list_list_datainline.apend(list_datainline)
                return list_list_datainline

print(read_lines(fpath, skiprows=0))

SyntaxError: invalid syntax (<ipython-input-80-7f261790d7bd>, line 2)

In [81]:
print(0 == False)








True
