In [4]:
import baostock as bs
import pandas as pd
import datetime, time
import threading
import sys

STOCK_URL = '../static/data/'
_output = sys.stdout
class BaoStock(object):
    _instance_lock = threading.Lock()
    init_date = '1999-07-26'
    data_split = 10000

    def __init__(self):
        pass

    def __new__(cls, *args, **kwargs):
        if not hasattr(cls, '_instance'):
            with BaoStock._instance_lock:
                if not hasattr(cls, '_instance'):
                    BaoStock._instance = super().__new__(cls)

            return BaoStock._instance
        
    def login(self):
        lg = bs.login(user_id='anonymous', password='123456')
        if lg.error_code == '0':
            return
        else:
            _output.write('\rlogin respond error_code:' + lg.error_code)
            _output.write('\rlogin respond  error_msg:' + lg.error_msg)
        
    def logout(self):
        bs.logout

    def get_data(self, code='sh.600000', data_frequency='d', start_date='init_date', end_date='2006-02-01'):
        daily_query = 'date,code,open,high,low,close,preclose,volume,amount,adjustflag,turn,tradestatus,pctChg,isST'
        min_query = 'date,time,code,open,high,low,close,volume,amount,adjustflag'
        dimension = daily_query
        frequency = 'd'
        print(f'尝试获取 {code} 数据')
        # 根据查询数据的频率，修正查询数据的维度与频率
        if data_frequency == 'd':
            dimension = daily_query
            frequency = 'd'
        elif data_frequency == '5':
            dimension = min_query
            frequency = '5'

        start = datetime.datetime.strptime(start_date, '%Y-%m-%d').date()
        end = datetime.datetime.strptime(end_date, '%Y-%m-%d').date()
        total_days = (end - start).days
        offset = 365
        data_list = []
        for i in range(int(total_days / offset) + 1):
            start_temp = (start + datetime.timedelta(days=offset * i)).strftime("%Y-%m-%d")
            if i == int(total_days / offset):
                end_temp = end_date
            else:
                end_temp = (start + datetime.timedelta(days=offset * (i + 1))).strftime("%Y-%m-%d")
            print(f'尝试获取 {code} 从 {start_temp} 至 {end_temp} 的数据')
            rs = bs.query_history_k_data_plus(code, dimension, start_date=start_temp, end_date=end_temp,
                                              frequency=frequency, adjustflag='3')
            if rs.error_code == '0':
                # 打印结果集
                while (rs.error_code == '0') & rs.next():
                    # 获取一条记录，将记录合并在一起
                    data_list.append(rs.get_row_data())
                # TODO 进度条
                print(f'成功获取 {code} 从 {start_temp} 至 {end_temp} 的数据')
            else:
                print('query_history_k_data_plus respond error_code:' + rs.error_code)
                print('query_history_k_data_plus respond  error_msg:' + rs.error_msg)
        result = pd.DataFrame(data_list, columns=rs.fields)
        return result
    
    # 获取一系列DataFrame的长度
    def count_data(self,data):
        count = data.count().date
        return count
        
    # 获取DataFrame或者某只股票的最近更新
    def get_last_date(self, data_or_code, data_frequency='d'):
        if data_frequency == 'd':
            path = 'day/'
        elif data_frequency == '5':
            path = 'min/'
        else:
            print('Frequency shoulb be d or 5.')
            return
        if type(data_or_code) == pd.core.frame.DataFrame:
            last_date = data.iloc[-1].date
        elif type(data_or_code) == str:
            date_data = pd.read_csv(STOCK_URL + path + data_or_code + '.csv', usecols=['date'])
            if date_data.count().date == 0:
                _output.write(f'\r{code}.csv 无数据')
                return 0
            else:
                last_date = date_data.iloc[-1].date
        return last_date
    
    # 根据数据频率设定文件目录
    def set_path(self, data_frequency):
        if data_frequency == 'd':
            path = STOCK_URL + 'day/'
        elif data_frequency == '5':
            path = STOCK_URL + 'min/'
        else:
            print('Frequency shoulb be d or 5.')
        return path
    
    # 生成CSV文件
    def generate_csv(self,code='sh.600000',data_frequency ='d', *,data_frame):
        path = self.set_path(data_frequency)
        if data_frame.count().date == 0:
            print('Data cant be None.')
            return
        else:
            result = data_frame
            count = result.count().date
            if count < 10000:
                result.to_csv(path + code + '.csv', mode='w', index=False)
                print(f'{code}.csv 已经生成')
            else:
                result[:10000].to_csv(path + code + '.csv', mode='w', index=False)
                self.add_to_csv(code=code,data_frequency=data_frequency,data_frame=result[10000:])
        
    # 向CSV中注入数据
    def add_to_csv(self,code='sh.600000',data_frequency ='d', *,data_frame):
        path = self.set_path(data_frequency)
        count = data_frame.count().date
        print(count)
        if count == 0:
            print('Data cant be Empty.')
            return
        elif count <= self.data_split:
            data_frame.to_csv(path + code + '.csv', mode='a',header=False, index=False)
            last = self.get_last_date(data_or_code=code,data_frequency=data_frequency)
            print(f'{code}已经更新至{last}')
        else:
            pass
            for i in range(int(count/self.data_split)+1):
                t = data_frame[self.data_split*(i):(i+1)*self.data_split]
                t.to_csv(path + code + '.csv', mode='a',header=False, index=False)
                last = self.get_last_date(data_or_code=code,data_frequency=data_frequency)
                print(f'{code}已经更新至{last}')
        print(f'{code}.csv 已经更新至最新')
            
    # 更新一只股票数据
    def up_to_date(self, code='sh.600000', data_frequency='d'):
        self.set_path(data_frequency)
        try:
            last_date = self.get_last_date(data_or_code=code,data_frequency=data_frequency)
            h = datetime.datetime.now().hour
            # 目前设定P.M.06:00后更新当天数据，6点之前更新至昨日即可
            if h <= 18:
                end = (datetime.datetime.now().date() + datetime.timedelta(days=-1)).strftime("%Y-%m-%d")
            else:
                end = datetime.datetime.now().strftime('%Y-%m-%d')
            if last_date == end:
                print(f'{code} 已经更新至最新 {end}')
                return
            start = (datetime.datetime.strptime(last_date, '%Y-%m-%d').date() + datetime.timedelta(days=1)).strftime('%Y-%m-%d')
            add_data = self.get_data(code=code, data_frequency=data_frequencya,start_date=start,end_date=end)
            print(start)
            print(end)
            self.add_to_csv(code=code,data_frequency=data_frequency,data_frame=add_data)
        except Exception as e:
            print(f'没有找到 {code}.csv')
            start='2006-01-01'
            end = datetime.datetime.now().strftime('%Y-%m-%d')
            add_data = self.get_data(code=code, data_frequency=data_frequency,start_date=start,end_date=end)
            count = add_data.count().date
            if count ==0:
                print(f'{code} has no data')
            else:
                print(f'Trying to creat {code}.csv')
                self.generate_csv(code=code,data_frequency=data_frequency,data_frame=add_data)
                
    # 获取所有股票代码    
    def get_all_stock_code(self):
        date = datetime.datetime.now().strftime('%Y-%m-%d')
        # 获取证券信息
        rs = bs.query_all_stock(day='2020-07-08')
        # _output.write('query_all_stock respond error_code:' + rs.error_code)
        # _output.write('query_all_stock respond  error_msg:' + rs.error_msg)

        # 打印结果集
        data_list = []
        while (rs.error_code == '0') & rs.next():
            # 获取一条记录，将记录合并在一起
            data_list.append(rs.get_row_data())
        result = pd.DataFrame(data_list, columns=rs.fields)
        # 结果集输出到csv文件
        print(result)
        result.to_csv(STOCK_URL + 'all_stock.csv', mode='w', index=False, encoding="GBK")
        print(f'所有指数代码已经更新至 {date}')
        
    def update_all_stock(self):
        try:
            code = pd.read_csv(STOCK_URL + 'all_stock.csv', usecols=['code'])
            count = code.count().code
            if count == 0:
                print('指数代码为空，请检查')
                return
            else:
                print('开始更新日交易数据。。。')
                for row in code.iterrows():
                    self.up_to_date(code=row[1].code, data_frequency='d')
                    
                print('开始更新日5分钟交易数据。。。')
                for row in code.iterrows():
                    self.up_to_date(code=row[1].code, data_frequency='5')
        except Exception as e:
            raise e
        # 读取所有股票代码
        # 遍历所有代码，执行更新
        
    def get_baostock_last_date(self):
        daily_query = 'date,code,open,high,low,close,preclose,volume,amount,adjustflag,turn,tradestatus,pctChg,isST'
        start_date = (datetime.datetime.now().date() + datetime.timedelta(days=-10)).strftime("%Y-%m-%d")
        end_date = datetime.datetime.now().strftime('%Y-%m-%d')
        rs = bs.query_history_k_data_plus('sh.000001',daily_query,start_date=start_date, end_date=end_date,frequency='d', adjustflag='3')
        result = None
        if rs.error_code == '0':
            # 打印结果集
            data_list = []
            while (rs.error_code == '0') & rs.next():
                # 获取一条记录，将记录合并在一起
                data_list.append(rs.get_row_data())
            result = pd.DataFrame(data_list, columns=rs.fields)
        last_date = result.iloc[-1].date
        return last_datea
        
print('Kick my ass')
baostock = BaoStock()
baostock.login()
# baostock.get_baostock_last_date()
# s = baostock.get_data(code='sh.600097',data_frequency='5', start_date='2020-05-26', end_date='2020-06-28')

# print(s)
baostock.get_all_stock_code()
# for i in range(300000):
#     baostock.login()
#     s = baostock.get_data(code='sh.600097',data_frequency='5', start_date='1999-07-26', end_date='2000-06-01')
#     print(i)
    
# baostock.generate_csv(code='sh.600000',data_frame=s,data_frequency='5')
# baostock.up_to_date(code='sh.600000', data_frequency='5')
# baostock.update_all_stock()
baostock.logout()
        

Kick my ass
login success!
           code tradeStatus   code_name
0     sh.000001           1      上证综合指数
1     sh.000002           1      上证A股指数
2     sh.000003           1      上证B股指数
3     sh.000004           1     上证工业类指数
4     sh.000005           1     上证商业类指数
...         ...         ...         ...
4395  sz.399994           1  中证信息安全主题指数
4396  sz.399995           1    中证基建工程指数
4397  sz.399996           1    中证智能家居指数
4398  sz.399997           1      中证白酒指数
4399  sz.399998           1      中证煤炭指数

[4400 rows x 3 columns]
所有指数代码已经更新至 2020-07-09


In [22]:
import sys
import time
_output = sys.stdout
for i in range(100):
    _output.write('\r'+str(i))
    time.sleep(.1)

99

In [33]:
import pandas as pd

STOCK_URL = '../static/data/'
def generate_min_ignore():
    t = {'code': []}
    df = pd.DataFrame(data=t, index=None)
    df.to_csv(STOCK_URL + 'min_ignore.csv', mode='w', index=False, encoding="GBK")
    print(f'成功创建 min_ignore.csv')

# 判断股票代码是否在分钟时间黑名单中
def is_code_in_min_ignore(code, min_ignore):
    ignore_list = min_ignore['code'].values.tolist()
    if code in ignore_list:
        return True
    else:
        return False
# 将股票代码添加至分钟数据黑名单
def add_to_min_ignore(code):
    try:
        t = {'code': [code]}
        df = pd.DataFrame(data=t, index=None)
        df.to_csv(STOCK_URL + 'min_ignore.csv', mode='a', header=False, index=False, encoding="GBK")
        print(f'已经添加 {code} 至 min_ignore.csv.')
    except Exception as e:
        print(e)
        generate_min_ignore()
        add_to_min_ignore(code=code)

    # 读取分钟数据黑名单
def read_min_ignore():
    try:
        ignore_df = pd.read_csv(STOCK_URL + 'min_ignore.csv', usecols=['code'])
        return ignore_df
    except Exception as e:
        print(e)
        generate_min_ignore()
        read_min_ignore()
# generate_min_ignore()
# df = read_min_ignore()
# is_code_in_min_ignore('sh.1000001',df)
# add_to_min_ignore('ssdds')
read_min_ignore()
    


[Errno 2] File ../static/data/min_ignore.csv does not exist: '../static/data/min_ignore.csv'
成功创建 min_ignore.csv


In [11]:
import sys
import time
_output = sys.stdout
for i in range(3*10):
    t = 3*10 - i
    _output.write(f'\r还需等待 {t/10} 秒' + ' ' + '=' * t)
    time.sleep(.1)

