In [1]:
import requests
import xmltodict
import time
import os
from rebuild import get_edge_point, rebuild_line

class LineTracer(object):
    def __init__(self, line_id, init_fail=0, mx_size=15):
        self._line_id = line_id
        self._file = open("data/{line_id}.csv".format(line_id=line_id), "a+")
        self._diver = dict()
        self._fail_cnt = init_fail
        self._mx_size = mx_size
        self._mx_line_num = 3

        for line in open("data/{line_id}.csv".format(line_id=line_id)).readlines():
            self._diver[line.strip()] = True

    def check_ready(self, max_fail):
        if self._fail_cnt > max_fail:
            return True
        if os.stat(os.path.join('data', self._line_id + '.csv')).st_size < 1024 * self._mx_size:
            return False
        sx, sy = get_edge_point(self._diver)
        line_x, line_y = rebuild_line(sx, sy, self._diver, 200)
        if len(line_x) > self._mx_line_num:
            self._mx_size += 5
            return False
        return True

    def desc(self):
        print("[{line_id}] updated, total point num: {num}, fail cnt: {fail_cnt}".format(
            line_id=self._line_id, num=len(self._diver), fail_cnt=self._fail_cnt))

In [2]:
import base64
import hashlib
from Crypto.Cipher import ARC4 as rc4

def decrypt(key, data):
    real_data = base64.b64decode(data)
    real_key = 'aibang' + key
    md5_key = bytes(hashlib.md5(real_key.encode('utf-8')).hexdigest().encode('utf-8'))
    dec = rc4.new(md5_key)
    return dec.decrypt(real_data)


In [3]:
headers = {
    'Host': 'transapp.btic.org.cn',
    'PID': '5',
    'PLATFORM': 'ios',
    'CID': '18d31a75a568b1e9fab8e410d398f981',
    'TIME': '1539706356',
    'ABTOKEN': '31d7dae1d869a172f3b66fa14fe274d1',
    'VID': '6',
    'IMEI': '3256',
    'CTYPE': 'json'
}

def get_line_info(tracer):
    url = 'http://transapp.btic.org.cn:8512/ssgj/bus.php?id={id}&no=1&encrypt=1'.format(id=tracer._line_id)
    resp = requests.get(url, headers=headers, timeout=(3.05, 9.05))
    if resp.status_code != 200:
        print("request error: {code} {data}".format(code=resp.status_code, data=resp.text))
        return False
    data = xmltodict.parse(resp.text)
    bus_list = data['root']['data']['bus']
    if type(bus_list) != type(list()):
        bus_list = [bus_list]

    for bus in bus_list:
        gt = bus['gt']
        x = bus['x']
        y = bus['y']
        bus_id = bus['id']

        x = float(decrypt(gt, x))
        y = float(decrypt(gt, y))

        key = "%.6f,%.6f" % (x, y)
        if key not in tracer._diver:
            tracer._diver[key] = True
            tracer._file.write("{data}\n".format(data=key))
            tracer._fail_cnt = 0
    tracer._file.flush()
    tracer.desc()
    return True

In [4]:
import os
import json
from IPython.display import clear_output


class LineDownloader(object):
    def __init__(self, max_work=10, max_sz=15, fail_retry=10):
        self._max_work = max_work
        self._max_sz = max_sz
        self._fail_retry = fail_retry
        self._bus_info = json.loads(open('bus.json').read())['lines']['line']
        self._bus_index = 0
        self._finished = dict()
        self._working = dict()
        
        print("Init LineDownloader...")
        for file in os.listdir('data'):
            if file.split(".")[-1] != 'csv':
                continue
            file = file.split(".")[0]
            self._working[file] = LineTracer(file, init_fail=self._fail_retry)
        print("Load {num} unfinished lines".format(num=len(self._working)))
        for file in os.listdir('data/finished'):
            if file.split(".")[-1] != 'csv':
                continue
            file = file.split(".")[0]
            self._finished[file[1:]] = None
        print("Load {num} finished lines".format(num=len(self._finished)))
        print("Check finished line")
        self.check_finish()

    def check_finish(self):
        wait_for_remove = list()
        for file in self._working:
            if self._working[file].check_ready(self._fail_retry):
                print("{line_id} has finished, move to waiting list".format(line_id=file))
                wait_for_remove.append(file)
        for file in wait_for_remove:
            self._working[file]._file.close()
            self._working.pop(file)
            self._finished[file] = None

    def _work(self):
        self.check_finish()

        while len(self._working) < self._max_work and self._bus_index < len(self._bus_info):
            bus_info = self._bus_info[self._bus_index]
            self._bus_index += 1
            if bus_info['id'] in self._working or bus_info['id'] in self._finished:
                continue
            self._working[bus_info['id']] = LineTracer(bus_info['id'], init_fail=self._fail_retry)

        req_cnt = 0
        clear_output(wait=True)
        for line_id in self._working:
            self._working[line_id]._fail_cnt += 1
            try:
                get_line_info(self._working[line_id])
            except Exception as e:
                print('[{line_id}] error, fail cnt {fail_cnt}'.format(
                    line_id=line_id, fail_cnt=self._working[line_id]._fail_cnt), e)
            time.sleep(0.5)
            req_cnt += 1
            if req_cnt > self._max_work:
                break
        time.sleep(5)

    def work(self):
        while self._bus_index < len(self._bus_info):
            self._work()

In [5]:
downloader = LineDownloader()

Init LineDownloader...
Load 3948 unfinished lines
Load 26 finished lines
Check finished line
784 has finished, move to waiting list
3413 has finished, move to waiting list
974 has finished, move to waiting list
3349 has finished, move to waiting list
3980 has finished, move to waiting list
381 has finished, move to waiting list
197 has finished, move to waiting list
1065 has finished, move to waiting list
1064 has finished, move to waiting list
2029 has finished, move to waiting list
3451 has finished, move to waiting list
936 has finished, move to waiting list
659 has finished, move to waiting list
1468 has finished, move to waiting list
3928 has finished, move to waiting list
3900 has finished, move to waiting list


In [None]:
downloader.work()

[3573] updated, total point num: 1401, fail cnt: 0
[3572] updated, total point num: 557, fail cnt: 0
