In [182]:
from datetime import datetime
import os

import pandas as pd
import pymongo

def read_lines(fpath, skiprows=0, nrows=0):
    with open(fpath, 'rb') as f:
        list_list_datainline = []
        list_lines = f.readlines()
        if nrows:
            for line in list_lines[skiprows:skiprows+nrows]:
                line = line.decode(encoding='gbk', errors='replace').replace('=', '').replace('"','')
                list_datainline = line.split('\t')
                list_list_datainline.append(list_datainline)
        else:
            for line in list_lines[skiprows:]:
                line = line.decode(encoding='gbk', errors='replace').replace('=', '').replace('"','')
                list_datainline = line.split('\t')
                list_list_datainline.append(list_datainline)
        df_ret = pd.DataFrame(list_list_datainline[1:], columns=list_list_datainline[0])
        return df_ret

def read_xlsx(fpath):
    """
    本函数为业务函数， 面向读取xlsx格式的一揽子方案。
    1. 正常情况下，xlsx正常读取；
    2. 如925的情况，xlsx格式，首行为合并单元格： 资金

    :param fpath:
    :return:
    """
    df_capital = pd.read_excel(fpath, nrows=1)
    df_holding = pd.read_excel(fpath, skiprows=3)
    if 'Unnamed' in list(df_capital.columns)[1]:
        df_capital = pd.read_excel(fpath, nrows=1, skiprows=1)
        df_holding = pd.read_excel(fpath, skiprows=10)
    return df_capital, df_holding


def seccode2bsitem(str_code):
    """
    Transfer the code into b/s item of the account
    :param str_code: 6-digit code
    :return:
    str
        {
        'st': stock, 股票
        'ce': 现金及一般等价物,
        'unknown': others
        }
    :note:
    无B股股东账户， 在清洗持仓数据时，为考虑B股代码问题。

    """
    str_code = str_code.zfill(6)
    if str_code[:3] in ['600', '601', '603', '688', '689']:  # 未考虑B股
        return 'st'
    elif str_code[:2] in ['00', '30']:
        return 'st'
    elif str_code[:3] in ['204', '511', '159', '519', '521', '660']:
        return 'ce'
    elif str_code[:2] in ['13']:
        return 'ce'
    else:
        print(f'New security type found, please check {str_code} type and update the function "seccode2bsitem".')
        return 'unknown'

def process_raw_data_cash(__fpath_holding):
    str_ext = os.path.splitext(__fpath_holding)[1]
    if str_ext == '.xlsx':
        __df_capital, __df_holding = read_xlsx(__fpath_holding)
    elif str_ext =='.xls':
        __df_capital = read_lines(__fpath_holding, nrows=2)
        __df_holding = read_lines(__fpath_holding, skiprows=3)
    elif str_ext == '.csv':
        __df_capital = pd.read_csv(__fpath_holding, nrows=1, encoding='gbk',
                                 dtype={'资产账户': str, '总资产': float, '总负债': float, '净资产': float,
                                        '资金可用金': float})
        __df_holding = pd.read_csv(__fpath_holding, skiprows=3, encoding='gbk',
                                 dtype={'证券代码': str, '市值': float})
    else:
        raise TypeError('Unknown file type!')
    return __df_capital, __df_holding

pd.set_option('display.width', 9999)
pd.set_option('display.max_columns', 9999)


# str_today = datetime.strftime(datetime.today(), '%Y%m%d')

In [185]:
str_today = '20200415'
dirpath = f'D:/data/A_trading_data/1500+/A_result/{str_today}'
client = pymongo.MongoClient('mongodb://localhost:27017/')
db_basicinfo = client['basicinfo']
col_myacctsinfo = db_basicinfo['myacctsinfo']
for _ in col_myacctsinfo.find({'date': str_today, 'rptmark': '1'}):
    fpath_holding = _['fpath_holding']
    if '/' in fpath_holding:
        print(fpath_holding)
        fpath_holding = dirpath + fpath_holding
        df_capital, df_holding = process_raw_data_cash(fpath_holding)
        # print(df_capital[:2])
        print(df_holding[:2])




/1203hao/holding.xls
   证券名称    证券数量    可卖数量 价格币种     成本价      浮动盈亏 盈亏比例(%) 折算汇率         最新市值      当前价 今买数量   今卖数量    证券代码        股东代码         成本金额 备注  \r\n
0  华宝添益  260529  260529  人民币  99.787  56443.94    0.22       26053942.11  100.004    0      0  511990  B882751428  25997498.17     \r\n
1  人福医药       0       0  人民币   0.000  65713.27      --              0.00   21.160    0  38100  600079  B882751428    -65713.27     \r\n
/925hao/holding.xlsx
   证券名称    证券代码  证券余额  证券可用  冻结数量     最新价      成本价  成本(价港币)       市值     浮动盈亏  当日参考盈亏  盈亏比例% 交易市场        股东账号
0  兆易创新  603986   300   300     0  255.13  241.096        0  76539.0  4210.10    2943   5.82   沪A  B881506846
1  晨光文具  603899  1200     0  1200   52.87   52.491        0  63444.0   455.14  455.14   0.72   沪A  B881506846
/1211hao_MS/holding_haitong.xlsx
  全选  序号      代码  证券名称    持股数量    可卖数量  冻结数量     成本价           市值      浮动盈亏    股份余额   可申赎数量  市场        股东代码
0      1  511990  华宝添益  329757  329757     0  99.808  32977019.03  64535.16  32