In [None]:
import requests
import pymongo
import a1chemy.data_source as data_source
from tqdm.notebook import tqdm
import random
import time
from a1chemy.common import Tag
import json

mongo_client = pymongo.MongoClient("mongodb://localhost:27017/", username='a1chemy', password='1B2C9046-E3CC-447F-9961-E125759BA44F')
mongo_ticks_client = data_source.MongoTicks(mongo_client=mongo_client)
mongo_tag_client = data_source.MongoTags(mongo_client=mongo_client)
mongo_fund = data_source.MongoFund(mongo_client=mongo_client)


xueqiu_client = data_source.XueQiuDataParser()
jisilu_client = data_source.Jisilu()

def grabNationData(nation_tag_id, params, url, exchange_extractor):
    all_stocks=xueqiu_client.get_all_stocks(params=params, url=url, exchange_extractor=exchange_extractor)
    print("stocks size=" + str(len(all_stocks)))
    mongo_tag_client.tags_collection.delete_many({'parent':nation_tag_id})
    nation_tag = Tag(id=nation_tag_id, parent=None)
    mongo_tag_client.insert(tag=nation_tag)
    for stock in tqdm(all_stocks):
        values = {
            'symbol': stock.symbol,
            'exchange': stock.exchange
        }
        #print("stock_info, exchange={}, symbol={}".format(stock.exchange, stock.symbol))
        tag = Tag(id=stock.to_tag_id(), parent=nation_tag.id, values=values)
        mongo_tag_client.insert(tag=tag)
        #print("tag: id={}, parent={}, values={}".format(tag.id, tag.parent, values))

def grabListData(pid, tag_id):
    all_stocks=xueqiu_client.list(pid=pid)
    print("stocks size=" + str(len(all_stocks)))
    mongo_tag_client.tags_collection.delete_many({'parent':tag_id})
    list_tag = Tag(id=tag_id, parent=None)
    mongo_tag_client.insert(tag=list_tag)
    for stock in tqdm(all_stocks):
        values = {
            'symbol': stock.symbol,
            'exchange': stock.exchange
        }
        #print("stock_info, exchange={}, symbol={}".format(stock.exchange, stock.symbol))
        tag = Tag(id=stock.to_tag_id(), parent=list_tag.id, values=values)
        mongo_tag_client.insert(tag=tag)
        #print("tag: id={}, parent={}, values={}".format(tag.id, tag.parent, values))

def grabTicksData(nation_tag_id, params, url, exchange_extractor):
    all_stocks=xueqiu_client.get_all_stocks(params=params, url=url, exchange_extractor=exchange_extractor)
    start_time = time.time()
    for stock in tqdm(all_stocks):
        try:
#             time.sleep(random.uniform(0.1,0.3))
            stock_ticks = xueqiu_client.history(symbol=stock.symbol, exchange=stock.exchange, period = 'day')
            stock_ticks.name = stock.name
            mongo_ticks_client.delete(exchange=None, symbol=stock.symbol)
            mongo_ticks_client.upsert(exchange=stock.exchange, symbol=stock.symbol, ticks=stock_ticks)
        except Exception as e:
            print("exception when get data, name=" + stock.name)
    end_time = time.time()
    print('total_time={}'.format(end_time - start_time))

def grabTicksDataParallel(nation_tag_id, params, url, exchange_extractor):
    all_stocks=xueqiu_client.get_all_stocks(params=params, url=url, exchange_extractor=exchange_extractor)
    underlying_list = []
    all_stock_map = {}
    for stock in all_stocks:
        underlying_list.append({'symbol':stock.symbol, 'exchange': stock.exchange})
        all_stock_map[stock.symbol] = stock
    start_time = time.time()
    stock_ticks_list = xueqiu_client.historys(underlying_list=underlying_list, period = 'day')
    crawl_time = time.time()
    
    delete_data = []
    upsert_data = []
    for stock_ticks in tqdm(stock_ticks_list):
        try:
            stock = all_stock_map[stock_ticks.symbol]
            stock_ticks.name = stock.name
            delete_data.append(stock.symbol)
            upsert_data.append({'symbol':stock.symbol, 'exchange':stock.exchange, 'ticks': stock_ticks})
        except Exception as e:
            print("exception when get data, name=" + stock.name)
    mongo_ticks_client.delete_multiple(symbols=delete_data)
    mongo_ticks_client.bulk_upsert(ticks_list=upsert_data)
    mongo_time = time.time()
    print('crawl:{} mongo:{}'.format(crawl_time-start_time, mongo_time-crawl_time))
def grabFundData(nation_tag_id, params, url, exchange_extractor):
    all_stocks=xueqiu_client.get_all_stocks(params=params, url=url, exchange_extractor=exchange_extractor)
    for stock in tqdm(all_stocks):
        try:
#             time.sleep(random.uniform(0.1,0.3))
            fund_data = jisilu_client.get_fund_info(symbol=stock.symbol, exchange=stock.exchange, name=stock.name)
            mongo_fund.delete(exchange=None, symbol=fund_data.symbol)
            mongo_fund.upsert(exchange=fund_data.exchange, symbol=fund_data.symbol, fund=fund_data)
        except Exception as e:
            print("exception when get data, name=" + stock.name)
            print(e)
            
def grabLofData(nation_tag_id, params, url, exchange_extractor):
    all_stocks=xueqiu_client.get_all_stocks(params=params, url=url, exchange_extractor=exchange_extractor)
    for stock in tqdm(all_stocks):
        try:
#             time.sleep(random.uniform(0.1,0.3))
            fund_data = jisilu_client.get_lof_info(symbol=stock.symbol, exchange=stock.exchange, name=stock.name)
            mongo_fund.delete(exchange=None, symbol=fund_data.symbol)
            mongo_fund.upsert(exchange=fund_data.exchange, symbol=fund_data.symbol, fund=fund_data)
        except Exception as e:
            print("exception when get data, name=" + stock.name)
            print(e)

In [None]:
cn_stock_params = (
            ('page', 1),
            ('size', 5000),
            ('order', 'desc'),
            ('orderby', 'percent'),
            ('order_by', 'percent'),
            ('market', 'CN'),
            ('type', 'sh_sz'),
            ('_', str(int(round(time.time() * 1000)))),
        )
hk_stock_params = (
            ('page', 1),
            ('size', 5000),
            ('order', 'desc'),
            ('orderby', 'percent'),
            ('order_by', 'percent'),
            ('market', 'HK'),
            ('type', 'hk'),
            ('_', str(int(round(time.time() * 1000)))),
        )
cn_etf_params = (
    ('type', '18'),
    ('parent_type', '1'),
    ('order', 'desc'),
    ('order_by', 'percent'),
    ('page', '1'),
    ('size', '1000'),
    ('_', str(int(round(time.time() * 1000)))),
)
cn_lof_params = (
    ('type', '19'),
    ('parent_type', '1'),
    ('order', 'desc'),
    ('order_by', 'percent'),
    ('page', '1'),
    ('size', '1000'),
    ('_', str(int(round(time.time() * 1000)))),
)

etf_url='https://xueqiu.com/service/v5/stock/screener/fund/list'

In [None]:
#grabNationData(nation_tag_id='cn_etf', params=cn_etf_params, url=etf_url, exchange_extractor=lambda x:x[0:2])
# grabTicksData(nation_tag_id='cn_etf', params=cn_etf_params, url=etf_url, exchange_extractor=lambda x:x[0:2])
grabTicksDataParallel(nation_tag_id='cn_etf', params=cn_etf_params, url=etf_url, exchange_extractor=lambda x:x[0:2])

In [None]:
grabNationData(nation_tag_id='cn_stock', params=cn_stock_params, url=None, exchange_extractor=lambda x:x[0:2])
grabTicksData(nation_tag_id='cn_stock', params=cn_stock_params, url=None, exchange_extractor=lambda x:x[0:2])

In [None]:
grabNationData(nation_tag_id='cn_etf', params=cn_etf_params, url=etf_url, exchange_extractor=lambda x:x[0:2])
grabTicksData(nation_tag_id='cn_etf', params=cn_etf_params, url=etf_url, exchange_extractor=lambda x:x[0:2])
grabNationData(nation_tag_id='cn_lof', params=cn_lof_params, url=etf_url, exchange_extractor=lambda x:x[0:2])
grabTicksData(nation_tag_id='cn_lof', params=cn_lof_params, url=etf_url, exchange_extractor=lambda x:x[0:2])

grabFundData(nation_tag_id='cn_etf', params=cn_etf_params, url=etf_url, exchange_extractor=lambda x:x[0:2])

grabNationData(nation_tag_id='cn_stock', params=cn_stock_params, url=None, exchange_extractor=lambda x:x[0:2])
grabTicksData(nation_tag_id='cn_stock', params=cn_stock_params, url=None, exchange_extractor=lambda x:x[0:2])



In [None]:
grabLofData(nation_tag_id='cn_lof', params=cn_etf_params, url=etf_url, exchange_extractor=lambda x:x[0:2])


In [None]:
grabFundData(nation_tag_id='cn_etf', params=cn_etf_params, url=etf_url, exchange_extractor=lambda x:x[0:2])


In [None]:
grabNationData(nation_tag_id='hk_stock', params=hk_stock_params, url=None, exchange_extractor=lambda x:'HKEX')
grabTicksData(nation_tag_id='hk_stock', params=hk_stock_params, url=None, exchange_extractor=lambda x:'HKEX')

In [None]:
cookies = {
    '__utmz': '1.1572180062.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none)',
    'bid': '783ea5e048552adebcc2fb818cf94d7a_ki4lux2p',
    'Hm_lvt_fe218c11eab60b6ab1b6f84fb38bcc4a': '1615726259',
    'xq_is_login': '1',
    'u': '9309587698',
    'device_id': '61ea52dcd695ee0bad6e5046537b60a2',
    'xq_a_token': '8b7f2861e58cc3c99085831b7a2d2256f748cdf5',
    'xqat': '8b7f2861e58cc3c99085831b7a2d2256f748cdf5',
    'xq_id_token': 'eyJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiJ9.eyJ1aWQiOjkzMDk1ODc2OTgsImlzcyI6InVjIiwiZXhwIjoxNjMxNjMwNjkwLCJjdG0iOjE2MjkwMzg2OTA5MTgsImNpZCI6ImQ5ZDBuNEFadXAifQ.lYKRL3EHynL-buRBdOR3HnZotzmNyQnIEYAJftG2v0QNDnVEQsSz6rNIGgwt26TvRy0bU-cJDpNvooAva89iQZO1izkl7SNUXW9JVXizTGa5ICNK64ds34xA24WG0eL9RHUAGUrMKzGoRcYW3boBMbXYPD0MpDSood4kjWgpHSr9FIVeArVFmfcQe12r4fU6ohxECBVRLWKnRDyw3DbbAbN1oi0GKlYMX5CVsf4ybr8dtYiEc9KCykwnEfC7W3e300trAA_DtVh0PTi2U3bMA2R0WT2uur51FVtJk9KduOERr22bcR-L0kMQ5ZhV9P4Zv1KS0-aRPkk8J8E4awcYtw',
    'xq_r_token': 'a049d10c72e1a536b428c40d35d5d041c5623069',
    's': 'du1218isi8',
    '__utma': '1.904466151.1572180062.1625795897.1629043930.44',
    'Hm_lvt_1db88642e346389874251b5a1eded6e3': '1627741645,1628432878,1628433688,1629384991',
    'acw_tc': '2760828916296256207223479e6887f0d012111ccc263d36848de7232ec594',
    'is_overseas': '0',
    'Hm_lpvt_1db88642e346389874251b5a1eded6e3': '1629625647',
}

xueqiu_client = data_source.XueQiuDataParser(cookies=cookies)

grabListData(pid=5, tag_id='ZH_ETF')
grabListData(pid=13, tag_id='TOP')
grabListData(pid=14, tag_id='CASH_COW')
grabListData(pid=11, tag_id='ZH_STOCK')
grabListData(pid=16, tag_id='ETF_SELECTED')