In [1]:
from collections import defaultdict, Counter
import concurrent.futures
from functools import reduce
import glob

import pandas as pd
import simplejson as json
import jieba

In [2]:
words_filename = '../words/words.csv'
jieba_dict_filename = '../words/dict.txt.big'
jieba.set_dictionary(jieba_dict_filename)

In [3]:
def wordlist_to_wordset(word_list):
    words = []
    for word_group in word_list:
        if type(word_group) is str:
            words.extend([c.strip() for c in word_group.split('/')])
    return set(words)

def build_tw_cn_dict(tw_word_list, cn_word_list):
    
    tw2cn = defaultdict(list)
    cn2tw = defaultdict(list)
    
    for tw_word_group, cn_word_group in zip(tw_word_list, cn_word_list):
        if type(tw_word_group) is str:
            tw_words = [c.strip() for c in tw_word_group.split('/')]
        else:
            tw_words = [None]
        
        if type(cn_word_group) is str:
            cn_words = [c.strip() for c in cn_word_group.split('/')]
        else:
            tw_words = [None]
            
        for tw_word in tw_words:
            for cn_word in cn_words:
                tw2cn[tw_word].append(cn_word)
                cn2tw[cn_word].append(tw_word)
    
    return tw2cn, cn2tw

def new_defined_wordset(tw_word_list, cn_word_list):
    
    wordset = set()
    for tw_word_group, cn_word_group in zip(tw_word_list, cn_word_list):
        if type(tw_word_group) is str and type(cn_word_group) is str:
            tw_words = [c.strip() for c in tw_word_group.split('/')]
            cn_words = set([c.strip() for c in cn_word_group.split('/')])
            for cn_word in cn_words:
                for tw_word in tw_words:
                    if cn_word in tw_word:
                        break
                else:
                    wordset.add(cn_word)
    
    wordset -= wordlist_to_wordset(tw_word_list)
    return wordset

In [4]:
df = pd.read_csv(words_filename)
cn_word_list = df['cn_word'].tolist()
tw_word_list = df['tw_word'].tolist()
cn_word_set = wordlist_to_wordset(cn_word_list)
tw_word_set = wordlist_to_wordset(tw_word_list)
word_set = cn_word_set - tw_word_set
# filtered_word_set = new_defined_wordset(tw_word_list, cn_word_list)
tw2cn, cn2tw = build_tw_cn_dict(tw_word_list, cn_word_list)

In [5]:
def find_cn_words(article):
    cn_word_count = Counter()
    seg_list = jieba.cut(article, cut_all=False)
    seg_set = set(seg_list)
    
    return seg_set & word_set

In [6]:
def find_cn_words_from_filename(filename):
    with open(filename, 'r', encoding='utf-8') as f:
        article = json.load(f)
        result = find_cn_words(article['content'])
        if result:
            print(article['url'], result)
        return Counter(result)

In [12]:
import time

freq_counter = Counter()
start_time = time.time()
for idx, filename in enumerate(filenames, 1):
    freq_counter += find_cn_words_from_filename(filename)
    if idx % 100 == 0:
        print('Has completed %d jobs' % idx)
        print('Time elapsed: %f sec' % (time.time() - start_time))

Has completed 100 jobs
Time elapsed: 1.684376 sec
Has completed 200 jobs
Time elapsed: 3.623185 sec
Has completed 300 jobs
Time elapsed: 5.197073 sec
Has completed 400 jobs
Time elapsed: 6.804704 sec
Has completed 500 jobs
Time elapsed: 9.058501 sec
Has completed 600 jobs
Time elapsed: 11.029980 sec
Has completed 700 jobs
Time elapsed: 13.095075 sec
Has completed 800 jobs
Time elapsed: 15.066096 sec
Has completed 900 jobs
Time elapsed: 16.966455 sec
Has completed 1000 jobs
Time elapsed: 18.621367 sec
Has completed 1100 jobs
Time elapsed: 20.741680 sec
Has completed 1200 jobs
Time elapsed: 22.569165 sec
Has completed 1300 jobs
Time elapsed: 24.505848 sec
Has completed 1400 jobs
Time elapsed: 26.566295 sec
Has completed 1500 jobs
Time elapsed: 28.433637 sec
Has completed 1600 jobs
Time elapsed: 30.520096 sec
Has completed 1700 jobs
Time elapsed: 32.467821 sec
Has completed 1800 jobs
Time elapsed: 34.235077 sec
Has completed 1900 jobs
Time elapsed: 35.777641 sec
Has completed 2000 jobs
Ti

Has completed 15600 jobs
Time elapsed: 289.948839 sec
Has completed 15700 jobs
Time elapsed: 291.833802 sec
Has completed 15800 jobs
Time elapsed: 293.934811 sec
Has completed 15900 jobs
Time elapsed: 295.603940 sec
Has completed 16000 jobs
Time elapsed: 297.726820 sec
Has completed 16100 jobs
Time elapsed: 299.325246 sec
Has completed 16200 jobs
Time elapsed: 301.258444 sec
Has completed 16300 jobs
Time elapsed: 303.011214 sec
Has completed 16400 jobs
Time elapsed: 304.890756 sec
Has completed 16500 jobs
Time elapsed: 306.724864 sec
Has completed 16600 jobs
Time elapsed: 308.628384 sec
Has completed 16700 jobs
Time elapsed: 310.471155 sec
Has completed 16800 jobs
Time elapsed: 312.358535 sec
Has completed 16900 jobs
Time elapsed: 314.367806 sec
Has completed 17000 jobs
Time elapsed: 316.003733 sec
Has completed 17100 jobs
Time elapsed: 318.004829 sec
Has completed 17200 jobs
Time elapsed: 319.737660 sec
Has completed 17300 jobs
Time elapsed: 321.643143 sec
Has completed 17400 jobs
Tim

Has completed 27100 jobs
Time elapsed: 484.750608 sec
Has completed 27200 jobs
Time elapsed: 486.728299 sec
Has completed 27300 jobs
Time elapsed: 488.395028 sec
Has completed 27400 jobs
Time elapsed: 489.716495 sec
Has completed 27500 jobs
Time elapsed: 491.253846 sec
Has completed 27600 jobs
Time elapsed: 492.951571 sec
Has completed 27700 jobs
Time elapsed: 495.587421 sec
Has completed 27800 jobs
Time elapsed: 497.716278 sec
Has completed 27900 jobs
Time elapsed: 499.323168 sec
Has completed 28000 jobs
Time elapsed: 501.271103 sec
Has completed 28100 jobs
Time elapsed: 503.173278 sec
Has completed 28200 jobs
Time elapsed: 504.893394 sec
Has completed 28300 jobs
Time elapsed: 506.077449 sec
Has completed 28400 jobs
Time elapsed: 507.529485 sec
Has completed 28500 jobs
Time elapsed: 509.202990 sec
Has completed 28600 jobs
Time elapsed: 510.715796 sec
Has completed 28700 jobs
Time elapsed: 512.790744 sec
Has completed 28800 jobs
Time elapsed: 514.511575 sec
Has completed 28900 jobs
Tim

Has completed 42300 jobs
Time elapsed: 743.908774 sec
Has completed 42400 jobs
Time elapsed: 745.759018 sec
Has completed 42500 jobs
Time elapsed: 747.364099 sec
Has completed 42600 jobs
Time elapsed: 749.314874 sec
Has completed 42700 jobs
Time elapsed: 750.865845 sec
Has completed 42800 jobs
Time elapsed: 752.300901 sec
Has completed 42900 jobs
Time elapsed: 753.898443 sec
Has completed 43000 jobs
Time elapsed: 755.182628 sec
Has completed 43100 jobs
Time elapsed: 756.504640 sec
Has completed 43200 jobs
Time elapsed: 757.482067 sec
Has completed 43300 jobs
Time elapsed: 759.382347 sec
Has completed 43400 jobs
Time elapsed: 761.016864 sec
Has completed 43500 jobs
Time elapsed: 762.562978 sec
Has completed 43600 jobs
Time elapsed: 764.225237 sec
Has completed 43700 jobs
Time elapsed: 765.771676 sec
Has completed 43800 jobs
Time elapsed: 767.346103 sec
Has completed 43900 jobs
Time elapsed: 769.078698 sec
Has completed 44000 jobs
Time elapsed: 770.532812 sec
Has completed 44100 jobs
Tim

Has completed 57500 jobs
Time elapsed: 999.937946 sec
Has completed 57600 jobs
Time elapsed: 1001.533110 sec
Has completed 57700 jobs
Time elapsed: 1003.507923 sec
Has completed 57800 jobs
Time elapsed: 1005.318292 sec
Has completed 57900 jobs
Time elapsed: 1007.063025 sec
Has completed 58000 jobs
Time elapsed: 1008.531389 sec
Has completed 58100 jobs
Time elapsed: 1010.083422 sec
Has completed 58200 jobs
Time elapsed: 1011.828606 sec
Has completed 58300 jobs
Time elapsed: 1013.299890 sec
Has completed 58400 jobs
Time elapsed: 1014.753269 sec
Has completed 58500 jobs
Time elapsed: 1015.905819 sec
Has completed 58600 jobs
Time elapsed: 1017.596661 sec
Has completed 58700 jobs
Time elapsed: 1019.397997 sec
Has completed 58800 jobs
Time elapsed: 1021.247080 sec
Has completed 58900 jobs
Time elapsed: 1022.688424 sec
Has completed 59000 jobs
Time elapsed: 1024.960100 sec
Has completed 59100 jobs
Time elapsed: 1026.310238 sec
Has completed 59200 jobs
Time elapsed: 1027.797209 sec
Has complet

Has completed 68800 jobs
Time elapsed: 1186.953464 sec
Has completed 68900 jobs
Time elapsed: 1189.009724 sec
Has completed 69000 jobs
Time elapsed: 1190.573038 sec
Has completed 69100 jobs
Time elapsed: 1192.559548 sec
Has completed 69200 jobs
Time elapsed: 1194.026089 sec
Has completed 69300 jobs
Time elapsed: 1195.806233 sec
Has completed 69400 jobs
Time elapsed: 1197.569178 sec
Has completed 69500 jobs
Time elapsed: 1199.476566 sec
Has completed 69600 jobs
Time elapsed: 1200.602284 sec
Has completed 69700 jobs
Time elapsed: 1202.330004 sec
Has completed 69800 jobs
Time elapsed: 1203.367553 sec
Has completed 69900 jobs
Time elapsed: 1204.948066 sec
Has completed 70000 jobs
Time elapsed: 1206.167418 sec
Has completed 70100 jobs
Time elapsed: 1207.860579 sec
Has completed 70200 jobs
Time elapsed: 1209.698414 sec
Has completed 70300 jobs
Time elapsed: 1211.203559 sec
Has completed 70400 jobs
Time elapsed: 1213.193952 sec
Has completed 70500 jobs
Time elapsed: 1214.986794 sec
Has comple

Has completed 83700 jobs
Time elapsed: 1436.378646 sec
Has completed 83800 jobs
Time elapsed: 1437.938238 sec
Has completed 83900 jobs
Time elapsed: 1439.402003 sec
Has completed 84000 jobs
Time elapsed: 1441.516928 sec
Has completed 84100 jobs
Time elapsed: 1443.432632 sec
Has completed 84200 jobs
Time elapsed: 1445.087226 sec
Has completed 84300 jobs
Time elapsed: 1447.090965 sec
Has completed 84400 jobs
Time elapsed: 1448.738329 sec
Has completed 84500 jobs
Time elapsed: 1450.376439 sec
Has completed 84600 jobs
Time elapsed: 1452.105724 sec
Has completed 84700 jobs
Time elapsed: 1453.370524 sec
Has completed 84800 jobs
Time elapsed: 1455.210162 sec
Has completed 84900 jobs
Time elapsed: 1456.797891 sec
Has completed 85000 jobs
Time elapsed: 1458.377413 sec
Has completed 85100 jobs
Time elapsed: 1459.716597 sec
Has completed 85200 jobs
Time elapsed: 1461.226948 sec
Has completed 85300 jobs
Time elapsed: 1462.482357 sec
Has completed 85400 jobs
Time elapsed: 1464.173170 sec
Has comple

Has completed 98600 jobs
Time elapsed: 1685.392705 sec
Has completed 98700 jobs
Time elapsed: 1687.377069 sec
Has completed 98800 jobs
Time elapsed: 1688.895742 sec
Has completed 98900 jobs
Time elapsed: 1690.355818 sec
Has completed 99000 jobs
Time elapsed: 1692.223167 sec
Has completed 99100 jobs
Time elapsed: 1693.960523 sec
Has completed 99200 jobs
Time elapsed: 1695.686891 sec
Has completed 99300 jobs
Time elapsed: 1697.445876 sec
Has completed 99400 jobs
Time elapsed: 1699.218197 sec
Has completed 99500 jobs
Time elapsed: 1700.818639 sec
Has completed 99600 jobs
Time elapsed: 1702.131784 sec
Has completed 99700 jobs
Time elapsed: 1703.579240 sec
Has completed 99800 jobs
Time elapsed: 1705.023412 sec
Has completed 99900 jobs
Time elapsed: 1706.649106 sec
Has completed 100000 jobs
Time elapsed: 1708.153459 sec
Has completed 100100 jobs
Time elapsed: 1709.485122 sec
Has completed 100200 jobs
Time elapsed: 1711.234757 sec
Has completed 100300 jobs
Time elapsed: 1713.297041 sec
Has co

Has completed 109700 jobs
Time elapsed: 1867.707642 sec
Has completed 109800 jobs
Time elapsed: 1868.945130 sec
Has completed 109900 jobs
Time elapsed: 1870.575918 sec
Has completed 110000 jobs
Time elapsed: 1871.998467 sec
Has completed 110100 jobs
Time elapsed: 1873.898212 sec
Has completed 110200 jobs
Time elapsed: 1875.827978 sec
Has completed 110300 jobs
Time elapsed: 1877.220636 sec
Has completed 110400 jobs
Time elapsed: 1879.042401 sec
Has completed 110500 jobs
Time elapsed: 1880.401226 sec
Has completed 110600 jobs
Time elapsed: 1881.855767 sec
Has completed 110700 jobs
Time elapsed: 1883.042176 sec
Has completed 110800 jobs
Time elapsed: 1884.634904 sec
Has completed 110900 jobs
Time elapsed: 1886.054907 sec
Has completed 111000 jobs
Time elapsed: 1887.827454 sec
Has completed 111100 jobs
Time elapsed: 1889.412601 sec
Has completed 111200 jobs
Time elapsed: 1890.017483 sec
Has completed 111300 jobs
Time elapsed: 1892.198035 sec
Has completed 111400 jobs
Time elapsed: 1893.871

Has completed 124400 jobs
Time elapsed: 2132.021481 sec
Has completed 124500 jobs
Time elapsed: 2134.466434 sec
Has completed 124600 jobs
Time elapsed: 2136.502682 sec
Has completed 124700 jobs
Time elapsed: 2138.728998 sec
Has completed 124800 jobs
Time elapsed: 2140.426836 sec
Has completed 124900 jobs
Time elapsed: 2142.220646 sec
Has completed 125000 jobs
Time elapsed: 2144.726244 sec
Has completed 125100 jobs
Time elapsed: 2147.021452 sec
Has completed 125200 jobs
Time elapsed: 2149.241087 sec
Has completed 125300 jobs
Time elapsed: 2151.281644 sec
Has completed 125400 jobs
Time elapsed: 2153.315770 sec
Has completed 125500 jobs
Time elapsed: 2155.840739 sec
Has completed 125600 jobs
Time elapsed: 2158.240846 sec
Has completed 125700 jobs
Time elapsed: 2160.745698 sec
Has completed 125800 jobs
Time elapsed: 2163.890488 sec
Has completed 125900 jobs
Time elapsed: 2166.265946 sec
Has completed 126000 jobs
Time elapsed: 2168.718949 sec
Has completed 126100 jobs
Time elapsed: 2170.908

Has completed 139100 jobs
Time elapsed: 2468.564443 sec
Has completed 5000 tasks
Has completed 139200 jobs
Time elapsed: 2470.135798 sec
Has completed 139300 jobs
Time elapsed: 2471.785683 sec
Has completed 5000 tasks
Has completed 139400 jobs
Time elapsed: 2474.084996 sec
Has completed 5000 tasks
Has completed 5000 tasks
Has completed 5000 tasks
Has completed 5000 tasks
Has completed 139500 jobs
Time elapsed: 2475.936935 sec
Has completed 5000 tasks
Has completed 5000 tasks
Has completed 5000 tasks
Has completed 5000 tasks
Has completed 5000 tasks
Has completed 5000 tasks
Has completed 5000 tasks
Has completed 5000 tasks
Has completed 5000 tasks
Has completed 139600 jobs
Time elapsed: 2477.713705 sec
Has completed 5000 tasks
Has completed 5000 tasks
Has completed 5000 tasks
Has completed 5000 tasks
Has completed 5000 tasks
Has completed 5000 tasks
Has completed 5000 tasks
Has completed 5000 tasks
Has completed 139700 jobs
Time elapsed: 2479.717655 sec
Has completed 5000 tasks
Has comp

Exception in thread Thread-155:
Traceback (most recent call last):
  File "<ipython-input-8-52d523c4c767>", line 23, in run
    func, args = self.input_queue.get(block=False)
  File "/usr/lib/python3.6/queue.py", line 161, in get
    raise Empty
queue.Empty

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/usr/lib/python3.6/threading.py", line 916, in _bootstrap_inner
    self.run()
  File "<ipython-input-8-52d523c4c767>", line 24, in run
    except Queue.Empty:
NameError: name 'Queue' is not defined
Exception in thread Thread-90:
Traceback (most recent call last):
  File "<ipython-input-8-52d523c4c767>", line 23, in run
    func, args = self.input_queue.get(block=False)
  File "/usr/lib/python3.6/queue.py", line 161, in get
    raise Empty
queue.Empty

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/usr/lib/python3.6/threading.py", line 916, in _bootstrap_in

Exception in thread Thread-106:
Traceback (most recent call last):
  File "<ipython-input-8-52d523c4c767>", line 23, in run
    func, args = self.input_queue.get(block=False)
  File "/usr/lib/python3.6/queue.py", line 161, in get
    raise Empty
queue.Empty

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/usr/lib/python3.6/threading.py", line 916, in _bootstrap_inner
    self.run()
  File "<ipython-input-8-52d523c4c767>", line 24, in run
    except Queue.Empty:
NameError: name 'Queue' is not defined



Has completed 148200 jobs
Time elapsed: 2691.080638 sec
Has completed 148300 jobs
Time elapsed: 2691.402152 sec
Has completed 148400 jobs
Time elapsed: 2691.716182 sec
Has completed 148500 jobs
Time elapsed: 2692.088197 sec
Has completed 148600 jobs
Time elapsed: 2692.414788 sec
Has completed 148700 jobs
Time elapsed: 2692.748157 sec
Has completed 148800 jobs
Time elapsed: 2693.068356 sec
Has completed 148900 jobs
Time elapsed: 2693.370850 sec
Has completed 149000 jobs
Time elapsed: 2693.675525 sec
Has completed 149100 jobs
Time elapsed: 2693.984053 sec
Has completed 149200 jobs
Time elapsed: 2694.307536 sec
Has completed 149300 jobs
Time elapsed: 2694.595422 sec
Has completed 149400 jobs
Time elapsed: 2694.872980 sec
Has completed 149500 jobs
Time elapsed: 2695.138221 sec
Has completed 149600 jobs
Time elapsed: 2695.444845 sec
Has completed 149700 jobs
Time elapsed: 2695.816579 sec
Has completed 149800 jobs
Time elapsed: 2696.103344 sec
Has completed 149900 jobs
Time elapsed: 2696.418

Has completed 162900 jobs
Time elapsed: 2738.468834 sec
Has completed 163000 jobs
Time elapsed: 2738.825570 sec
Has completed 163100 jobs
Time elapsed: 2739.256235 sec
Has completed 163200 jobs
Time elapsed: 2739.651255 sec
Has completed 163300 jobs
Time elapsed: 2740.067684 sec
Has completed 163400 jobs
Time elapsed: 2740.384275 sec
Has completed 163500 jobs
Time elapsed: 2740.737512 sec
Has completed 163600 jobs
Time elapsed: 2741.137499 sec
Has completed 163700 jobs
Time elapsed: 2741.515763 sec
Has completed 163800 jobs
Time elapsed: 2741.895637 sec
Has completed 163900 jobs
Time elapsed: 2742.249177 sec
Has completed 164000 jobs
Time elapsed: 2742.571600 sec
Has completed 164100 jobs
Time elapsed: 2742.892433 sec
Has completed 164200 jobs
Time elapsed: 2743.219908 sec
Has completed 164300 jobs
Time elapsed: 2743.556084 sec
Has completed 164400 jobs
Time elapsed: 2743.859761 sec
Has completed 164500 jobs
Time elapsed: 2744.200654 sec
Has completed 164600 jobs
Time elapsed: 2744.566

Has completed 177600 jobs
Time elapsed: 2787.918172 sec
Has completed 177700 jobs
Time elapsed: 2788.266738 sec
Has completed 177800 jobs
Time elapsed: 2788.614668 sec
Has completed 177900 jobs
Time elapsed: 2788.983003 sec
Has completed 178000 jobs
Time elapsed: 2789.361355 sec
Has completed 178100 jobs
Time elapsed: 2789.717731 sec
Has completed 178200 jobs
Time elapsed: 2790.062253 sec
Has completed 178300 jobs
Time elapsed: 2790.330981 sec
Has completed 178400 jobs
Time elapsed: 2790.614359 sec
Has completed 178500 jobs
Time elapsed: 2790.956458 sec
Has completed 178600 jobs
Time elapsed: 2791.286901 sec
Has completed 178700 jobs
Time elapsed: 2791.639936 sec
Has completed 178800 jobs
Time elapsed: 2792.002825 sec
Has completed 178900 jobs
Time elapsed: 2792.352731 sec
Has completed 179000 jobs
Time elapsed: 2792.742296 sec
Has completed 179100 jobs
Time elapsed: 2793.076678 sec
Has completed 179200 jobs
Time elapsed: 2793.395402 sec
Has completed 179300 jobs
Time elapsed: 2793.715

Has completed 192300 jobs
Time elapsed: 2836.136147 sec
Has completed 192400 jobs
Time elapsed: 2836.435493 sec
Has completed 192500 jobs
Time elapsed: 2836.750319 sec
Has completed 192600 jobs
Time elapsed: 2837.089763 sec
Has completed 192700 jobs
Time elapsed: 2837.454108 sec
Has completed 192800 jobs
Time elapsed: 2837.768045 sec
Has completed 192900 jobs
Time elapsed: 2838.102321 sec
Has completed 193000 jobs
Time elapsed: 2838.429829 sec
Has completed 193100 jobs
Time elapsed: 2838.703234 sec
Has completed 193200 jobs
Time elapsed: 2838.986597 sec
Has completed 193300 jobs
Time elapsed: 2839.306802 sec
Has completed 193400 jobs
Time elapsed: 2839.638486 sec
Has completed 193500 jobs
Time elapsed: 2839.946952 sec
Has completed 193600 jobs
Time elapsed: 2840.234472 sec
Has completed 193700 jobs
Time elapsed: 2840.508655 sec
Has completed 193800 jobs
Time elapsed: 2840.834516 sec
Has completed 193900 jobs
Time elapsed: 2841.144133 sec
Has completed 194000 jobs
Time elapsed: 2841.434

Has completed 207000 jobs
Time elapsed: 2882.332405 sec
Has completed 207100 jobs
Time elapsed: 2882.665522 sec
Has completed 207200 jobs
Time elapsed: 2882.975096 sec
Has completed 207300 jobs
Time elapsed: 2883.321296 sec
Has completed 207400 jobs
Time elapsed: 2883.660973 sec
Has completed 207500 jobs
Time elapsed: 2883.991496 sec
Has completed 207600 jobs
Time elapsed: 2884.322217 sec
Has completed 207700 jobs
Time elapsed: 2884.645120 sec
Has completed 207800 jobs
Time elapsed: 2884.946847 sec
Has completed 207900 jobs
Time elapsed: 2885.255049 sec
Has completed 208000 jobs
Time elapsed: 2885.572238 sec
Has completed 208100 jobs
Time elapsed: 2885.836551 sec
Has completed 208200 jobs
Time elapsed: 2886.153063 sec
Has completed 208300 jobs
Time elapsed: 2886.441364 sec
Has completed 208400 jobs
Time elapsed: 2886.767946 sec
Has completed 208500 jobs
Time elapsed: 2887.089254 sec
Has completed 208600 jobs
Time elapsed: 2887.426253 sec
Has completed 208700 jobs
Time elapsed: 2887.732

Has completed 221700 jobs
Time elapsed: 2928.337149 sec
Has completed 221800 jobs
Time elapsed: 2928.648165 sec
Has completed 221900 jobs
Time elapsed: 2928.987849 sec
Has completed 222000 jobs
Time elapsed: 2929.283577 sec
Has completed 222100 jobs
Time elapsed: 2929.618013 sec
Has completed 222200 jobs
Time elapsed: 2929.928784 sec
Has completed 222300 jobs
Time elapsed: 2930.258991 sec
Has completed 222400 jobs
Time elapsed: 2930.597892 sec
Has completed 222500 jobs
Time elapsed: 2930.960744 sec
Has completed 222600 jobs
Time elapsed: 2931.316193 sec
Has completed 222700 jobs
Time elapsed: 2931.675544 sec
Has completed 222800 jobs
Time elapsed: 2931.996646 sec
Has completed 222900 jobs
Time elapsed: 2932.306128 sec
Has completed 223000 jobs
Time elapsed: 2932.621628 sec
Has completed 223100 jobs
Time elapsed: 2932.918466 sec
Has completed 223200 jobs
Time elapsed: 2933.255154 sec
Has completed 223300 jobs
Time elapsed: 2933.541191 sec
Has completed 223400 jobs
Time elapsed: 2933.833

Has completed 236400 jobs
Time elapsed: 2974.671155 sec
Has completed 236500 jobs
Time elapsed: 2974.982749 sec
Has completed 236600 jobs
Time elapsed: 2975.261422 sec
Has completed 236700 jobs
Time elapsed: 2975.596635 sec
Has completed 236800 jobs
Time elapsed: 2975.888288 sec
Has completed 236900 jobs
Time elapsed: 2976.191416 sec
Has completed 237000 jobs
Time elapsed: 2976.480470 sec
Has completed 237100 jobs
Time elapsed: 2976.783621 sec
Has completed 237200 jobs
Time elapsed: 2977.117931 sec
Has completed 237300 jobs
Time elapsed: 2977.498100 sec
Has completed 237400 jobs
Time elapsed: 2977.831441 sec
Has completed 237500 jobs
Time elapsed: 2978.124818 sec
Has completed 237600 jobs
Time elapsed: 2978.442449 sec
Has completed 237700 jobs
Time elapsed: 2978.770688 sec
Has completed 237800 jobs
Time elapsed: 2979.071072 sec
Has completed 237900 jobs
Time elapsed: 2979.381248 sec
Has completed 238000 jobs
Time elapsed: 2979.708028 sec
Has completed 238100 jobs
Time elapsed: 2980.017

Has completed 251100 jobs
Time elapsed: 3022.497788 sec
Has completed 251200 jobs
Time elapsed: 3022.813039 sec
Has completed 251300 jobs
Time elapsed: 3023.106838 sec
Has completed 251400 jobs
Time elapsed: 3023.385773 sec
Has completed 251500 jobs
Time elapsed: 3023.664908 sec
Has completed 251600 jobs
Time elapsed: 3023.948070 sec
Has completed 251700 jobs
Time elapsed: 3024.290404 sec
Has completed 251800 jobs
Time elapsed: 3024.614563 sec
Has completed 251900 jobs
Time elapsed: 3024.906915 sec
Has completed 252000 jobs
Time elapsed: 3025.231889 sec
Has completed 252100 jobs
Time elapsed: 3025.574270 sec
Has completed 252200 jobs
Time elapsed: 3025.920274 sec
Has completed 252300 jobs
Time elapsed: 3026.259634 sec
Has completed 252400 jobs
Time elapsed: 3026.608845 sec
Has completed 252500 jobs
Time elapsed: 3026.956924 sec
Has completed 252600 jobs
Time elapsed: 3027.324811 sec
Has completed 252700 jobs
Time elapsed: 3027.658469 sec
Has completed 252800 jobs
Time elapsed: 3027.934

Has completed 265800 jobs
Time elapsed: 3068.932038 sec
Has completed 265900 jobs
Time elapsed: 3069.242872 sec
Has completed 266000 jobs
Time elapsed: 3069.570891 sec
Has completed 266100 jobs
Time elapsed: 3069.882045 sec
Has completed 266200 jobs
Time elapsed: 3070.166876 sec
Has completed 266300 jobs
Time elapsed: 3070.469240 sec
Has completed 266400 jobs
Time elapsed: 3070.765683 sec
Has completed 266500 jobs
Time elapsed: 3071.066799 sec
Has completed 266600 jobs
Time elapsed: 3071.376052 sec
Has completed 266700 jobs
Time elapsed: 3071.715927 sec
Has completed 266800 jobs
Time elapsed: 3072.048200 sec
Has completed 266900 jobs
Time elapsed: 3072.353713 sec
Has completed 267000 jobs
Time elapsed: 3072.652646 sec
Has completed 267100 jobs
Time elapsed: 3072.974600 sec
Has completed 267200 jobs
Time elapsed: 3073.280129 sec
Has completed 267300 jobs
Time elapsed: 3073.559391 sec
Has completed 267400 jobs
Time elapsed: 3073.899201 sec
Has completed 267500 jobs
Time elapsed: 3074.207

Has completed 280500 jobs
Time elapsed: 3115.383337 sec
Has completed 280600 jobs
Time elapsed: 3115.703010 sec
Has completed 280700 jobs
Time elapsed: 3115.991433 sec
Has completed 280800 jobs
Time elapsed: 3116.293600 sec
Has completed 280900 jobs
Time elapsed: 3116.608274 sec
Has completed 281000 jobs
Time elapsed: 3116.919175 sec
Has completed 281100 jobs
Time elapsed: 3117.233831 sec
Has completed 281200 jobs
Time elapsed: 3117.519758 sec
Has completed 281300 jobs
Time elapsed: 3117.788242 sec
Has completed 281400 jobs
Time elapsed: 3118.050116 sec
Has completed 281500 jobs
Time elapsed: 3118.300661 sec
Has completed 281600 jobs
Time elapsed: 3118.567310 sec
Has completed 281700 jobs
Time elapsed: 3118.839084 sec
Has completed 281800 jobs
Time elapsed: 3119.150555 sec
Has completed 281900 jobs
Time elapsed: 3119.450223 sec
Has completed 282000 jobs
Time elapsed: 3119.741596 sec
Has completed 282100 jobs
Time elapsed: 3120.014117 sec
Has completed 282200 jobs
Time elapsed: 3120.347

Has completed 295200 jobs
Time elapsed: 3161.981232 sec
Has completed 295300 jobs
Time elapsed: 3162.289582 sec
Has completed 295400 jobs
Time elapsed: 3162.607131 sec
Has completed 295500 jobs
Time elapsed: 3162.890584 sec
Has completed 295600 jobs
Time elapsed: 3163.155313 sec
Has completed 295700 jobs
Time elapsed: 3163.448624 sec
Has completed 295800 jobs
Time elapsed: 3163.773636 sec
Has completed 295900 jobs
Time elapsed: 3164.096439 sec
Has completed 296000 jobs
Time elapsed: 3164.417969 sec
Has completed 296100 jobs
Time elapsed: 3164.755079 sec
Has completed 296200 jobs
Time elapsed: 3165.059256 sec
Has completed 296300 jobs
Time elapsed: 3165.373630 sec
Has completed 296400 jobs
Time elapsed: 3165.660644 sec
Has completed 296500 jobs
Time elapsed: 3165.953773 sec
Has completed 296600 jobs
Time elapsed: 3166.301855 sec
Has completed 296700 jobs
Time elapsed: 3166.608116 sec
Has completed 296800 jobs
Time elapsed: 3166.919651 sec
Has completed 296900 jobs
Time elapsed: 3167.246

Has completed 309900 jobs
Time elapsed: 3208.643831 sec
Has completed 310000 jobs
Time elapsed: 3208.928238 sec
Has completed 310100 jobs
Time elapsed: 3209.239229 sec
Has completed 310200 jobs
Time elapsed: 3209.528198 sec
Has completed 310300 jobs
Time elapsed: 3209.820135 sec
Has completed 310400 jobs
Time elapsed: 3210.103550 sec
Has completed 310500 jobs
Time elapsed: 3210.393167 sec
Has completed 310600 jobs
Time elapsed: 3210.655092 sec
Has completed 310700 jobs
Time elapsed: 3210.933947 sec
Has completed 310800 jobs
Time elapsed: 3211.252003 sec
Has completed 310900 jobs
Time elapsed: 3211.556752 sec
Has completed 311000 jobs
Time elapsed: 3211.881529 sec
Has completed 311100 jobs
Time elapsed: 3212.196489 sec
Has completed 311200 jobs
Time elapsed: 3212.497944 sec
Has completed 311300 jobs
Time elapsed: 3212.773433 sec
Has completed 311400 jobs
Time elapsed: 3213.068812 sec
Has completed 311500 jobs
Time elapsed: 3213.369485 sec
Has completed 311600 jobs
Time elapsed: 3213.644

Has completed 324600 jobs
Time elapsed: 3255.149271 sec
Has completed 324700 jobs
Time elapsed: 3255.496982 sec
Has completed 324800 jobs
Time elapsed: 3255.892451 sec
Has completed 324900 jobs
Time elapsed: 3256.255801 sec
Has completed 325000 jobs
Time elapsed: 3256.593647 sec
Has completed 325100 jobs
Time elapsed: 3256.897201 sec
Has completed 325200 jobs
Time elapsed: 3257.309494 sec
Has completed 325300 jobs
Time elapsed: 3257.699830 sec
Has completed 325400 jobs
Time elapsed: 3258.090580 sec
Has completed 325500 jobs
Time elapsed: 3258.476876 sec
Has completed 325600 jobs
Time elapsed: 3258.842861 sec
Has completed 325700 jobs
Time elapsed: 3259.161053 sec
Has completed 325800 jobs
Time elapsed: 3259.466888 sec
Has completed 325900 jobs
Time elapsed: 3259.827663 sec
Has completed 326000 jobs
Time elapsed: 3260.187267 sec
Has completed 326100 jobs
Time elapsed: 3260.542366 sec
Has completed 326200 jobs
Time elapsed: 3260.897294 sec
Has completed 326300 jobs
Time elapsed: 3261.207

Has completed 339300 jobs
Time elapsed: 3304.422426 sec
Has completed 339400 jobs
Time elapsed: 3304.747354 sec
Has completed 339500 jobs
Time elapsed: 3305.054808 sec
Has completed 339600 jobs
Time elapsed: 3305.383987 sec
Has completed 339700 jobs
Time elapsed: 3305.714938 sec
Has completed 339800 jobs
Time elapsed: 3306.045058 sec
Has completed 339900 jobs
Time elapsed: 3306.355333 sec
Has completed 340000 jobs
Time elapsed: 3306.677728 sec
Has completed 340100 jobs
Time elapsed: 3307.024513 sec
Has completed 340200 jobs
Time elapsed: 3307.335099 sec
Has completed 340300 jobs
Time elapsed: 3307.705540 sec
Has completed 340400 jobs
Time elapsed: 3308.000720 sec
Has completed 340500 jobs
Time elapsed: 3308.326871 sec
Has completed 340600 jobs
Time elapsed: 3308.671557 sec
Has completed 340700 jobs
Time elapsed: 3309.010983 sec
Has completed 340800 jobs
Time elapsed: 3309.337305 sec
Has completed 340900 jobs
Time elapsed: 3309.687102 sec
Has completed 341000 jobs
Time elapsed: 3310.007

Has completed 354000 jobs
Time elapsed: 3350.869797 sec
Has completed 354100 jobs
Time elapsed: 3351.164148 sec
Has completed 354200 jobs
Time elapsed: 3351.473239 sec
Has completed 354300 jobs
Time elapsed: 3351.774334 sec
Has completed 354400 jobs
Time elapsed: 3352.070877 sec
Has completed 354500 jobs
Time elapsed: 3352.371608 sec
Has completed 354600 jobs
Time elapsed: 3352.656851 sec
Has completed 354700 jobs
Time elapsed: 3352.935796 sec
Has completed 354800 jobs
Time elapsed: 3353.250641 sec
Has completed 354900 jobs
Time elapsed: 3353.529873 sec
Has completed 355000 jobs
Time elapsed: 3353.799743 sec
Has completed 355100 jobs
Time elapsed: 3354.071249 sec
Has completed 355200 jobs
Time elapsed: 3354.339792 sec
Has completed 355300 jobs
Time elapsed: 3354.628224 sec
Has completed 355400 jobs
Time elapsed: 3354.959578 sec
Has completed 355500 jobs
Time elapsed: 3355.245309 sec
Has completed 355600 jobs
Time elapsed: 3355.605675 sec
Has completed 355700 jobs
Time elapsed: 3355.926

Has completed 368700 jobs
Time elapsed: 3396.721452 sec
Has completed 368800 jobs
Time elapsed: 3396.975191 sec
Has completed 368900 jobs
Time elapsed: 3397.344520 sec
Has completed 369000 jobs
Time elapsed: 3397.736910 sec
Has completed 369100 jobs
Time elapsed: 3398.039331 sec
Has completed 369200 jobs
Time elapsed: 3398.345569 sec
Has completed 369300 jobs
Time elapsed: 3398.617903 sec
Has completed 369400 jobs
Time elapsed: 3398.886814 sec
Has completed 369500 jobs
Time elapsed: 3399.150032 sec
Has completed 369600 jobs
Time elapsed: 3399.443807 sec
Has completed 369700 jobs
Time elapsed: 3399.746034 sec
Has completed 369800 jobs
Time elapsed: 3400.054325 sec
Has completed 369900 jobs
Time elapsed: 3400.347867 sec
Has completed 370000 jobs
Time elapsed: 3400.641322 sec
Has completed 370100 jobs
Time elapsed: 3400.936399 sec
Has completed 370200 jobs
Time elapsed: 3401.253655 sec
Has completed 370300 jobs
Time elapsed: 3401.554117 sec
Has completed 370400 jobs
Time elapsed: 3401.864

Has completed 383400 jobs
Time elapsed: 3441.316554 sec
Has completed 383500 jobs
Time elapsed: 3441.599335 sec
Has completed 383600 jobs
Time elapsed: 3441.877683 sec
Has completed 383700 jobs
Time elapsed: 3442.176433 sec
Has completed 383800 jobs
Time elapsed: 3442.467299 sec
Has completed 383900 jobs
Time elapsed: 3442.817104 sec
Has completed 384000 jobs
Time elapsed: 3443.134899 sec
Has completed 384100 jobs
Time elapsed: 3443.426686 sec
Has completed 384200 jobs
Time elapsed: 3443.766456 sec
Has completed 384300 jobs
Time elapsed: 3444.096754 sec
Has completed 384400 jobs
Time elapsed: 3444.419003 sec
Has completed 384500 jobs
Time elapsed: 3444.741440 sec
Has completed 384600 jobs
Time elapsed: 3445.063358 sec
Has completed 384700 jobs
Time elapsed: 3445.365245 sec
Has completed 384800 jobs
Time elapsed: 3445.726295 sec
Has completed 384900 jobs
Time elapsed: 3446.087605 sec
Has completed 385000 jobs
Time elapsed: 3446.435328 sec
Has completed 385100 jobs
Time elapsed: 3446.787

Has completed 398100 jobs
Time elapsed: 3488.705243 sec
Has completed 398200 jobs
Time elapsed: 3489.024670 sec
Has completed 398300 jobs
Time elapsed: 3489.356108 sec
Has completed 398400 jobs
Time elapsed: 3489.744740 sec
Has completed 398500 jobs
Time elapsed: 3490.075223 sec
Has completed 398600 jobs
Time elapsed: 3490.480947 sec
Has completed 398700 jobs
Time elapsed: 3490.955305 sec
Has completed 398800 jobs
Time elapsed: 3491.382544 sec
Has completed 398900 jobs
Time elapsed: 3491.754202 sec
Has completed 399000 jobs
Time elapsed: 3492.137276 sec
Has completed 399100 jobs
Time elapsed: 3492.502150 sec
Has completed 399200 jobs
Time elapsed: 3492.843389 sec
Has completed 399300 jobs
Time elapsed: 3493.166915 sec
Has completed 399400 jobs
Time elapsed: 3493.547874 sec
Has completed 399500 jobs
Time elapsed: 3493.894004 sec
Has completed 399600 jobs
Time elapsed: 3494.262483 sec
Has completed 399700 jobs
Time elapsed: 3494.589519 sec
Has completed 399800 jobs
Time elapsed: 3494.938

Has completed 412800 jobs
Time elapsed: 3536.378848 sec
Has completed 412900 jobs
Time elapsed: 3536.692495 sec
Has completed 413000 jobs
Time elapsed: 3537.013305 sec
Has completed 413100 jobs
Time elapsed: 3537.319215 sec
Has completed 413200 jobs
Time elapsed: 3537.608677 sec
Has completed 413300 jobs
Time elapsed: 3537.920854 sec
Has completed 413400 jobs
Time elapsed: 3538.233538 sec
Has completed 413500 jobs
Time elapsed: 3538.514927 sec
Has completed 413600 jobs
Time elapsed: 3538.802545 sec
Has completed 413700 jobs
Time elapsed: 3539.084566 sec
Has completed 413800 jobs
Time elapsed: 3539.355992 sec
Has completed 413900 jobs
Time elapsed: 3539.680799 sec
Has completed 414000 jobs
Time elapsed: 3540.026294 sec
Has completed 414100 jobs
Time elapsed: 3540.383819 sec
Has completed 414200 jobs
Time elapsed: 3540.709853 sec
Has completed 414300 jobs
Time elapsed: 3541.092113 sec
Has completed 414400 jobs
Time elapsed: 3541.434291 sec
Has completed 414500 jobs
Time elapsed: 3541.750

In [14]:
freq_counter.most_common(50)

[('行車', 4578),
 ('技巧', 2607),
 ('上線', 2290),
 ('接手', 1985),
 ('罰款', 1852),
 ('分成', 1757),
 ('電動車', 1537),
 ('電商', 1534),
 ('頂尖', 1356),
 ('專區', 1320),
 ('耳機', 1116),
 ('外帶', 1101),
 ('破局', 1097),
 ('視頻', 1080),
 ('經警', 1045),
 ('務實', 1024),
 ('加快', 1010),
 ('場均', 969),
 ('解放軍', 946),
 ('特首', 852),
 ('央視', 846),
 ('公安', 822),
 ('下海', 782),
 ('網絡', 779),
 ('聯網', 671),
 ('保安', 665),
 ('一次性', 652),
 ('下課', 624),
 ('網民', 586),
 ('匝道', 550),
 ('奔馳', 531),
 ('比特', 513),
 ('質量', 504),
 ('高校', 461),
 ('信息', 454),
 ('大專', 448),
 ('定點', 429),
 ('總書記', 399),
 ('旅遊局', 394),
 ('本體', 379),
 ('轉會', 367),
 ('看點', 363),
 ('麻豆', 359),
 ('互聯網', 341),
 ('筆記本', 341),
 ('被叫', 340),
 ('北漂', 334),
 ('說唱', 330),
 ('摩', 330),
 ('全家福', 320)]