In [1]:
import os
import re
import time
import math
import json
import random
import asyncio
import aiohttp
import numpy as np
import pandas as pd

In [2]:
from pathlib import Path
os.chdir(Path(os.getcwd()).parent)

In [3]:
from libs.utils import Utils

In [4]:
np.set_printoptions(formatter={'float': lambda x: "{0:0.2f}".format(x)})
pd.options.display.float_format = '{:,.2f}'.format

In [5]:
policy = asyncio.WindowsSelectorEventLoopPolicy()
asyncio.set_event_loop_policy(policy)

In [6]:
HEADERS = [
    ('User-Agent','User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.141 Safari/537.36'),
    ('Accept','text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8'),
    ('Accept-Encoding','gzip, deflate'),
    ('Accept-Language','zh-CN,zh;q=0.8,en-US;q=0.6,en;q=0.5;q=0.4')
]

In [7]:
async def get(session, url, ssl=False):
    async with session.get(url, ssl=ssl) as resp:
        assert resp.status == 200
        return await resp.text()

In [8]:
class Quotation:
    
    api = 'http://hq.sinajs.cn/list='
    max_group_size = 800
    symbols = []
    
    
    def __init__(self, symbols=None, group_count=1):
        if symbols:
            self.symbols = symbols
        else:
            self.symbols = Utils.get_running_symbols()
        
        min_group_count = math.ceil(len(self.symbols)/self.max_group_size)
        if group_count < min_group_count:
            self.group_count =  min_group_count
        else:
            self.group_count = group_count
        self.group_size = math.ceil(len(self.symbols)/self.group_count)
        
        symbols_with_prefix = list(map(lambda x: 'sh'+x[-6:] if x[0]=='6' else 'sz'+x[-6:], self.symbols))
        self.symbol_groups = list([
            ','.join(
                symbols_with_prefix[idx*self.group_size:min((idx+1)*self.group_size, len(self.symbols))]
            ) for idx in range(self.group_count)
        ])
        
        self.sessions = [ aiohttp.ClientSession(headers=HEADERS) for _ in range(self.group_count) ]
        
        
    def parse_real_data(self, data_str, array=None):

        grep_str = re.compile(
            r'(\d+)="([^,=]+)%s%s'
            % (r",([\.\d]+)" * 29, r",([-\.\d:]+)" * 2)
        )
    
        results = grep_str.finditer(data_str)
        
        if array is not None:
            idx = -1
            for stock_match_object in results:
                idx += 1
                stock = stock_match_object.groups()

                array[idx, 0] = float(stock[2])   # open
                array[idx, 1] = float(stock[3])   # close
                array[idx, 2] = float(stock[4])   # now
                array[idx, 3] = float(stock[5])   # high
                array[idx, 4] = float(stock[6])   # low
                array[idx, 5] = float(stock[9])   # turnover
                if array[idx, 5] == 0 and stock[4] != stock[7] and stock[7] == stock[8]:
                    array[idx, 2] = float(stock[7])
                    array[idx, 5] = float(stock[11]) 
                array[idx, 6] = float(stock[10])   # volume
                
        else:
            stock_dict = dict()
            for stock_match_object in results:
                stock = stock_match_object.groups()
                stock_dict[stock[0]] = dict(
                    name=stock[1],
                    open=float(stock[2]),
                    close=float(stock[3]),
                    now=float(stock[4]),
                    high=float(stock[5]),
                    low=float(stock[6]),
                    buy=float(stock[7]),
                    sell=float(stock[8]),
                    turnover=int(stock[9]),
                    volume=float(stock[10]),
                    bid1_volume=int(stock[11]),
                    bid1=float(stock[12]),
                    bid2_volume=int(stock[13]),
                    bid2=float(stock[14]),
                    bid3_volume=int(stock[15]),
                    bid3=float(stock[16]),
                    bid4_volume=int(stock[17]),
                    bid4=float(stock[18]),
                    bid5_volume=int(stock[19]),
                    bid5=float(stock[20]),
                    ask1_volume=int(stock[21]),
                    ask1=float(stock[22]),
                    ask2_volume=int(stock[23]),
                    ask2=float(stock[24]),
                    ask3_volume=int(stock[25]),
                    ask3=float(stock[26]),
                    ask4_volume=int(stock[27]),
                    ask4=float(stock[28]),
                    ask5_volume=int(stock[29]),
                    ask5=float(stock[30]),
                    date=stock[31],
                    time=stock[32],
                )
                
            return stock_dict 
        
        
    async def exit(self):
        for session in self.sessions:
            await session.close()
            
            
    async def snapshot(self, array=None):
        urls = [f'{self.api}{symbols}' for symbols in self.symbol_groups]
        results = await asyncio.gather(*[ get(self.sessions[_], urls[_]) for _ in range(self.group_count)])
        return self.parse_real_data(''.join(results), array=array)
    
    
    async def get_market_values(self):
        
        urls = [f'http://sqt.gtimg.cn/utf8/offset=1,2,3,45,46,31,48,49&q={symbols}' for symbols in self.symbol_groups]
        results = await asyncio.gather(*[ get(self.sessions[_], urls[_]) for _ in range(self.group_count)])
#         print(results)
        grep_str = re.compile(r'\d+="(\d+)~([^~]+)~(\d*)~([.\d]*)~([.\d]*)~(\d+)~([-.\d]+)~([-.\d]+)";\n')
        results = grep_str.finditer(''.join(results))
        stock_dict = dict()
        for stock_match_object in results:
            stock = stock_match_object.groups()
            stock_dict[stock[2]] = dict(
                name=stock[1],
                symbol=stock[2],
#                 mcap=float(stock[3]),
#                 tcap=float(stock[4]),
                mcap= float(stock[3]) if stock[3] else np.nan,
                tcap= float(stock[4]) if stock[4] else np.nan,
                zt_price=np.nan if stock[6].startswith('-') else float(stock[6]),
                dt_price=np.nan if stock[6].startswith('-') else float(stock[7]),
                dt=stock[5],
            )
        return stock_dict
        
        
    async def real(self, symbols):
        length = len(symbols)
        symbols_with_prefix = list(map(lambda x: 'sh'+x[-6:] if x[0]=='6' else 'sz'+x[-6:], symbols))
        group_count = math.ceil(length/self.max_group_size)
        group_size = math.ceil(length/group_count)
        symbol_groups = list([
            ','.join(
                symbols_with_prefix[idx*group_size:min((idx+1)*group_size, length)]
            ) for idx in range(group_count)
        ])
        urls = [f'{self.api}{item}' for item in symbol_groups]
        results = await asyncio.gather(*[ get(self.sessions[_], urls[_]) for _ in range(group_count)])
        return self.parse_real_data(''.join(results))
    
    
    async def kline(self, symbols, scale=240, ma=5, length=1023):
        args = (scale, ma, length)
        url = 'http://money.finance.sina.com.cn/quotes_service/api/json_v2.php/CN_MarketData.getKLineData?symbol={}&scale={}&ma={}&datalen={}'          
        urls = list(map(
            lambda x: url.format('sh'+x[-6:], *args) if x[0]=='6' else \
                      url.format('sz'+x[-6:], *args),
            symbols
        ))
        
        sessions = [ aiohttp.ClientSession(headers=HEADERS) for _ in range(len(urls)) ]
        results = await asyncio.gather(*[ get(sessions[_], urls[_]) for _ in range(len(urls))])
        for session in sessions:
            await session.close()
            
        klines = list(map(
            lambda x: json.loads(re.sub('(\w+)\s?:\s?("?[^",]+"?,?)', "\"\g<1>\":\g<2>", x)),
            results
        ))
        
        securities = {}
        for idx, symbol in enumerate(symbols):
            securities[symbol] = klines[idx]

        return securities
    
    async def min_data(self, symbols):
        url = 'https://data.gtimg.cn/flashdata/hushen/minute/{}.js'    
        urls = list(map(

            lambda x: url.format('sh'+x[-6:]) if x[0]=='6' else \
                      url.format('sz'+x[-6:]),
            symbols
        ))

        sessions = [ aiohttp.ClientSession(headers=HEADERS) for _ in range(len(urls)) ]
        results = await asyncio.gather(*[ get(sessions[_], urls[_]) for _ in range(len(urls))])
        for session in sessions:
            await session.close()

        mdata = {}

        for _, result in enumerate(results):

            date = re.search('date:(\d+)', result).group(1)
            grep_str = re.compile(r'(\d+) ([\d.]+) (\d+)\\n')
            grep_results = grep_str.finditer(result)

            data = []
            for match_object in grep_results:
                item = match_object.groups()
                data.append((item[0], float(item[1]), int(item[2])))

            mdata[symbols[_]] = {
                'date': date,
                'data': data
            }
        return mdata

In [9]:
Utils.update_symbols()

In [None]:
await q.exit()

In [11]:
q = Quotation()

In [None]:
len(q.symbols)

In [None]:
ss = await q.snapshot()

In [None]:
await q.real(['002717', '002116'])

In [None]:
results = await q.kline(['002717', '002116'])

In [None]:
ss['002717']

In [None]:
ss['688819']

In [None]:
snapshot= await q.snapshot()

In [None]:
snapshot['002717']

In [None]:
market_values = await q.get_market_values()

In [None]:
market_values['601777']

In [None]:
len(market_values.keys())

In [None]:
for symbols in q.symbol_groups:
    print([ symbol in symbols for symbol in ['300933', '688317', '688656', '688819']])

In [None]:
market_values['688656']

In [12]:
symbols = ['002717', '688007']

In [13]:
mdata = await q.min_data(symbols)

In [14]:
mdata

{'002717': {'date': '210305',
  'data': [('0930', 3.13, 2003),
   ('0931', 3.14, 4413),
   ('0932', 3.14, 5351),
   ('0933', 3.14, 6795),
   ('0934', 3.14, 8768),
   ('0935', 3.14, 10107),
   ('0936', 3.15, 10285),
   ('0937', 3.14, 12928),
   ('0938', 3.15, 15770),
   ('0939', 3.13, 17270),
   ('0940', 3.13, 17894),
   ('0941', 3.14, 17972),
   ('0942', 3.14, 18184),
   ('0943', 3.13, 18331),
   ('0944', 3.14, 18473),
   ('0945', 3.13, 18589),
   ('0946', 3.12, 23551),
   ('0947', 3.14, 24874),
   ('0948', 3.14, 25576),
   ('0949', 3.15, 26220),
   ('0950', 3.14, 26266),
   ('0951', 3.15, 29223),
   ('0952', 3.15, 29447),
   ('0953', 3.14, 29472),
   ('0954', 3.15, 29834),
   ('0955', 3.15, 29851),
   ('0956', 3.14, 30309),
   ('0957', 3.15, 30690),
   ('0958', 3.13, 30986),
   ('0959', 3.13, 31098),
   ('1000', 3.13, 31211),
   ('1001', 3.14, 31272),
   ('1002', 3.14, 31433),
   ('1003', 3.14, 31486),
   ('1004', 3.14, 31846),
   ('1005', 3.13, 31880),
   ('1006', 3.13, 32294),
   ('