In [1]:
import requests
import json
import time
from bs4 import BeautifulSoup, Comment
import IPython.display as display
import pickle
import pandas as pd
pd.set_option('max_colwidth',1000)
import re
from tqdm import tqdm

## Start with original url

In [3]:
def url_to_root(url):
    headers = {'Accept-Encoding': 'identity',
               'User-Agent': 
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36'

                }
    response = requests.get(url, headers=headers, timeout=5)
#     print(response.status_code)
    
    root = BeautifulSoup(response.text, 'html5lib')
    return root

original_url = 'https://legalref.judiciary.hk/lrs/common/ju/judgment.jsp'
root = url_to_root(original_url)

## Hieachy 1: parent court categories

In [3]:
def parse_courts(root):
    """Parse the info of parent courts.
    Args:
        root : BeautifulSoup Object
    Return:
        courts_dict (dict): court -> url
    """
    # get all raw infomation about courts (looks urgly)
    info_list = root.find('script', string=re.compile("var myMenu")).text.split("'")
    # find the parent courts' name inside 'span' tag
    courts_list = [BeautifulSoup(i).find('span').text.strip(" ") for i in info_list if i[1:5]=='span']
    # find corresponding urls of parents courts(order matters here)
    courts_urls = [j for j in info_list if j[:5]=='https']
    # store to dict
    courts_dict = {i:j for i, j in zip(courts_list, courts_urls)}
    return courts_dict

courts = parse_courts(root)
courts

{'Court of Final Appeal': 'https://legalref.judiciary.hk/lrs/common/ju/judgment.jsp?L1=FA#H1',
 'Court of Appeal of the High Court': 'https://legalref.judiciary.hk/lrs/common/ju/judgment.jsp?L1=CA#H2',
 'High Court': 'https://legalref.judiciary.hk/lrs/common/ju/judgment.jsp?L1=HC#H3',
 'Competition Tribunal': 'https://legalref.judiciary.hk/lrs/common/ju/judgment.jsp?L1=CT#H4',
 'District Court': 'https://legalref.judiciary.hk/lrs/common/ju/judgment.jsp?L1=DC#H5',
 'Family Court': 'https://legalref.judiciary.hk/lrs/common/ju/judgment.jsp?L1=FC#H6',
 'Lands Tribunal': 'https://legalref.judiciary.hk/lrs/common/ju/judgment.jsp?L1=LD#H7',
 'Miscellaneous': 'https://legalref.judiciary.hk/lrs/common/ju/judgment.jsp?L1=OT#H8'}

In [4]:
df_courts = pd.DataFrame([(k,v) for k,v in courts.items()], columns=['parent_court', 'parent_court_url'])
df_courts

Unnamed: 0,parent_court,parent_court_url
0,Court of Final Appeal,https://legalref.judiciary.hk/lrs/common/ju/ju...
1,Court of Appeal of the High Court,https://legalref.judiciary.hk/lrs/common/ju/ju...
2,High Court,https://legalref.judiciary.hk/lrs/common/ju/ju...
3,Competition Tribunal,https://legalref.judiciary.hk/lrs/common/ju/ju...
4,District Court,https://legalref.judiciary.hk/lrs/common/ju/ju...
5,Family Court,https://legalref.judiciary.hk/lrs/common/ju/ju...
6,Lands Tribunal,https://legalref.judiciary.hk/lrs/common/ju/ju...
7,Miscellaneous,https://legalref.judiciary.hk/lrs/common/ju/ju...


## Hieachy 2: sub courts

In [5]:
# test case
url = 'https://legalref.judiciary.hk/lrs/common/ju/judgment.jsp?L1=DC#H5'
root = url_to_root(url)
        
# get raw infomation
info_list = root.find('script', string=re.compile("var myMenu")).text.split(",")

200


In [6]:
parent_courts_set = set(df_courts['parent_court'])
parent_courts_set

{'Competition Tribunal',
 'Court of Appeal of the High Court',
 'Court of Final Appeal',
 'District Court',
 'Family Court',
 'High Court',
 'Lands Tribunal',
 'Miscellaneous'}

In [7]:
def clear_name(a):
    ans = ' '.join([i.strip("\\s'")for i in a.split(" ")])
    return ans

parent_courts_set = set(df_courts['parent_court'])
now = set([BeautifulSoup(i.strip("' ")).text.strip(" ") for i in info_list if len(re.findall('<a name=', i))!=0])
sub_courts = sorted(list(now - parent_courts_set))
sub_courts = [clear_name(s) for s in sub_courts]
sub_courts

['Civil Action',
 'Criminal Case',
 'Distraint Case',
 'District Court Tax Claim',
 'Employee Compensation Case',
 'Equal Opportunitie Action',
 'Intended Action',
 'Miscellaneou Proceeding',
 'Occupational Deafne (Compensation) Appeal',
 'Personal Injurie Action',
 'Stamp Duty Appeal']

In [8]:
def parse_sub_courts(courts, parent_courts_set):
    """Parse sub-courts information.
    Args:
        courts (dict) : parent_courts -> url
        parent_courts_set 
    Return:
        sub_dict (dict) : parent_courts -> dictionary(sub_courts -> url) 
    
    """
    sub_dict = {} # initialize

    for c, url in courts.items():
        time.sleep(1) # request web slowly for benevolence
        root = url_to_root(url)
        
        # get sub courts names
        # get raw infomation
        info_list = root.find('script', string=re.compile("var myMenu")).text.split(",")
        # create bs4 object list for strings has tag "a" and attr name
        now = set([BeautifulSoup(i.strip("' ")).text.strip(" ") for i in info_list if len(re.findall('<a name=', i))!=0])
        sub_courts = sorted(list(now - parent_courts_set)) # notice order in alphbets
        sub_courts_cleared = [clear_name(s) for s in sub_courts] # clear names
        print('subcourts:',len(sub_courts))
        
        # get sub courts urls
        # re-get raw infomation
        info_list = root.find('script', string=re.compile("var myMenu")).text.split("'") # notice split by up-comma
        # observe sub-courts' urls are longer than parents', so find sub-courts' urls' length
        http_min_len = min(set([len(i) for i in info_list if i[:5]=='https']))
        # get sub-courts' urls
        sub_courts_urls = [i for i in info_list if i[:5]=='https' if len(i)>http_min_len]
        print('urls:', len(sub_courts_urls))
        # store in dict
        subs = {sc.strip(" "):(sc_, url) for sc, sc_, url in zip(sub_courts, sub_courts_cleared, sub_courts_urls)}
        # assign to parent courts
        print('dict length: ',len(subs))
        sub_dict[c] = subs
    return sub_dict

parent_courts_set = set(df_courts['parent_court'])
sub_dict = parse_sub_courts(courts, parent_courts_set)
sub_dict

200
subcourts: 5
urls: 5
dict length:  5
200
subcourts: 6
urls: 6
dict length:  6
200
subcourts: 31
urls: 31
dict length:  31
200
subcourts: 2
urls: 2
dict length:  2
200
subcourts: 11
urls: 11
dict length:  11
200
subcourts: 3
urls: 3
dict length:  3
200
subcourts: 19
urls: 19
dict length:  19
200
subcourts: 11
urls: 11
dict length:  11


{'Court of Final Appeal': {'Final Appeal (Civil)': ('Final Appeal (Civil)',
   'https://legalref.judiciary.hk/lrs/common/ju/judgment.jsp?L1=FA&L2=CV&AR=1#A1'),
  'Final Appeal (Criminal)': ('Final Appeal (Criminal)',
   'https://legalref.judiciary.hk/lrs/common/ju/judgment.jsp?L1=FA&L2=CC&AR=2#A2'),
  'Miscellaneous Proceedings': ('Miscellaneou Proceeding',
   'https://legalref.judiciary.hk/lrs/common/ju/judgment.jsp?L1=FA&L2=MP&AR=3#A3'),
  'Miscellaneous Proceedings (Civil)': ('Miscellaneou Proceeding (Civil)',
   'https://legalref.judiciary.hk/lrs/common/ju/judgment.jsp?L1=FA&L2=MV&AR=4#A4'),
  'Miscellaneous Proceedings (Criminal)': ('Miscellaneou Proceeding (Criminal)',
   'https://legalref.judiciary.hk/lrs/common/ju/judgment.jsp?L1=FA&L2=MC&AR=5#A5')},
 'Court of Appeal of the High Court': {'Application for Review': ('Application for Review',
   'https://legalref.judiciary.hk/lrs/common/ju/judgment.jsp?L1=CA&L2=AR&AR=1#A1'),
  "Attorney General\\'s Reference": ('Attorney General 

In [9]:
def df_for_sub(sub_dict):
    return pd.DataFrame([(k,v[0],v[1]) for k,v in sub_dict.items()], columns=['sub_court', 'sub_court_cleared', 'sub_court_url'])

dfs = []
for p_court, s_dict in sub_dict.items():
    sub_df = df_for_sub(s_dict)
    sub_df['parent_court'] = p_court
    dfs.append(sub_df)
    
df_all = pd.concat(dfs, axis=0)
df_all.index = range(len(df_all))
df_all = pd.merge(df_all, df_courts, how='left', on='parent_court')
df_all.head()

Unnamed: 0,sub_court,sub_court_cleared,sub_court_url,parent_court,parent_court_url
0,Final Appeal (Civil),Final Appeal (Civil),https://legalref.judiciary.hk/lrs/common/ju/ju...,Court of Final Appeal,https://legalref.judiciary.hk/lrs/common/ju/ju...
1,Final Appeal (Criminal),Final Appeal (Criminal),https://legalref.judiciary.hk/lrs/common/ju/ju...,Court of Final Appeal,https://legalref.judiciary.hk/lrs/common/ju/ju...
2,Miscellaneous Proceedings,Miscellaneou Proceeding,https://legalref.judiciary.hk/lrs/common/ju/ju...,Court of Final Appeal,https://legalref.judiciary.hk/lrs/common/ju/ju...
3,Miscellaneous Proceedings (Civil),Miscellaneou Proceeding (Civil),https://legalref.judiciary.hk/lrs/common/ju/ju...,Court of Final Appeal,https://legalref.judiciary.hk/lrs/common/ju/ju...
4,Miscellaneous Proceedings (Criminal),Miscellaneou Proceeding (Criminal),https://legalref.judiciary.hk/lrs/common/ju/ju...,Court of Final Appeal,https://legalref.judiciary.hk/lrs/common/ju/ju...


In [10]:
df_all.sub_court_cleared

0                                  Final Appeal (Civil)
1                               Final Appeal (Criminal)
2                               Miscellaneou Proceeding
3                       Miscellaneou Proceeding (Civil)
4                    Miscellaneou Proceeding (Criminal)
5                                Application for Review
6                            Attorney General Reference
7                                          Civil Appeal
8                                       Criminal Appeal
9                               Miscellaneou Proceeding
10                       Reservation of Question of Law
11                                     Admiralty Action
12                                 Adoption Application
13                                Application for Grant
14    Application to et aside a Statutory Demand (un...
15        Application under the Mental Health Ordinance
16                                Bankruptcy Proceeding
17                                              

## Hiearchy 3: years

In [11]:
# for test case
sub_ = "West Kowloon Magistrate Court Charge Case"

In [12]:
list(df_all[df_all['sub_court_cleared']==sub_]['sub_court_url'])

['https://legalref.judiciary.hk/lrs/common/ju/judgment.jsp?L1=WK&L2=CC&AR=11#A11']

In [206]:
def check_special(sc_cleared, df_all, courts_set, urls_set):
    """Check if sub-court has Pre****"""
    years = subCourt_get_years(sc_cleared, df_all, courts_set)
    urls = subCourt_get_urls(sc_cleared, df_all, urls_set) 
    print(years)
    special_ = [years[idx] for idx, y in enumerate(years) if len(re.findall('Pre', y))!=0]
    if len(special_) != 0:
        has_special = True
        special_year = special_[0]
        special_url = [u for idx, u in enumerate(urls) if len(re.findall('L3=....', u))==0][0]
    else: 
        has_special = False
        special_year = None
        special_url = None
    return has_special, special_year, special_url

        
courts_set = set(df_all['parent_court']) | set(df_all['sub_court'])
urls_set = set(df_all['sub_court_url']) | set(df_all['parent_court_url'])

special_courts = {}
for sc_ in tqdm(df_all['sub_court_cleared']):
    has_special, special_year, special_url = check_special(sc_, df_all, courts_set, urls_set)
    if has_special:
        special_courts[sc_] = special_url
special_courts


  0%|                                                   | 0/88 [00:00<?, ?it/s]

200
200
['Pre2013', '2015', '2013', '2016', '2018', '2017', '2014']


  1%|▍                                          | 1/88 [00:04<07:01,  4.85s/it]

200
200
['Pre2013', '2015', '2013', '2016', '2018', '2017', '2014']


  2%|▉                                          | 2/88 [00:10<07:08,  4.98s/it]

200
200
['2009', '2004', '2013', '2010', '2003', '2014']


  3%|█▍                                         | 3/88 [00:14<06:47,  4.79s/it]

200
200
['2015', '2019', 'Pre2014', '2016', '2017', '2018', '2014']


  5%|█▉                                         | 4/88 [00:19<06:45,  4.82s/it]

200
200
['2015', '2019', 'Pre2014', '2016', '2017', '2018', '2014']


  6%|██▍                                        | 5/88 [00:25<07:00,  5.07s/it]

200
200
['Pre2013', '2015', '2013', '2016', '2017', '2018', '2014']


  7%|██▉                                        | 6/88 [00:29<06:48,  4.99s/it]

200
200
['1986', '1994']


  8%|███▍                                       | 7/88 [00:33<06:14,  4.63s/it]

200
200
['2015', '2019', 'Pre2014', '2016', '2017', '2018', '2014']


  9%|███▉                                       | 8/88 [00:50<11:06,  8.33s/it]

200
200
['2015', '2019', 'Pre2014', '2016', '2017', '2018', '2014']


 10%|████▍                                      | 9/88 [00:58<10:40,  8.11s/it]

200
200
['2009', '2004', '2013', '2010', '2003', '2014']


 11%|████▊                                     | 10/88 [01:11<12:40,  9.75s/it]

200
200
['2018', '1997', '1994']


 12%|█████▎                                    | 11/88 [01:15<10:10,  7.93s/it]

200
200
['Pre2013', '2015', '2013', '2016', '2018', '2017', '2014']


 14%|█████▋                                    | 12/88 [01:19<08:27,  6.68s/it]

200
200
['1970', '1973', '1985', '1978', '1992', '1971', '1988']


 15%|██████▏                                   | 13/88 [01:23<07:21,  5.89s/it]

200
200
['2011', '2015', '2012', '2009', '2010', 'Pre2009', '2014']


 16%|██████▋                                   | 14/88 [01:27<06:35,  5.35s/it]

200
200
['2011', '2012', '2013', '2016', '2010', 'Pre2010', '2014']


 17%|███████▏                                  | 15/88 [01:43<10:36,  8.72s/it]

200
200
['2008', '2011', '2015', '2012', '2016', '2010', '2014']


 18%|███████▋                                  | 16/88 [01:47<08:39,  7.22s/it]

200
200
['Pre2013', '2015', '2013', '2016', '2018', '2017', '2014']


 19%|████████                                  | 17/88 [01:51<07:21,  6.22s/it]

200
200
['Pre2012', '2015', '2012', '2013', '2016', '2017', '2014']


 20%|████████▌                                 | 18/88 [01:55<06:23,  5.47s/it]

200
200
['2012', '2008']


 22%|█████████                                 | 19/88 [01:58<05:40,  4.94s/it]

200
200
['2015', '2019', 'Pre2014', '2016', '2017', '2018', '2014']


 23%|█████████▌                                | 20/88 [02:02<05:11,  4.58s/it]

200
200
['Pre2012', '2015', '2012', '2013', '2016', '2017', '2014']


 24%|██████████                                | 21/88 [02:06<04:58,  4.46s/it]

200
200
['2015', '2019', 'Pre2014', '2016', '2017', '2018', '2014']


 25%|██████████▌                               | 22/88 [02:14<05:57,  5.42s/it]

200
200
['2018', '2009', '2003']


 26%|██████████▉                               | 23/88 [02:36<11:11, 10.33s/it]

200
200
['2015', '2019', 'Pre2014', '2016', '2017', '2018', '2014']


 27%|███████████▍                              | 24/88 [02:40<09:01,  8.47s/it]

200
200
['Pre2013', '2015', '2013', '2016', '2018', '2017', '2014']


 28%|███████████▉                              | 25/88 [02:48<08:36,  8.20s/it]

200
200
['Pre2013', '2015', '2013', '2016', '2018', '2017', '2014']


 30%|████████████▍                             | 26/88 [02:53<07:40,  7.43s/it]

200
200
['2011', '1974', '2013', '2007', '2005']


 31%|████████████▉                             | 27/88 [02:57<06:28,  6.36s/it]

200
200
['2008', '2009', '2004', '2013', '2010', '2005']


 32%|█████████████▎                            | 28/88 [03:01<05:46,  5.77s/it]

200
200
['2002', '2008', '2012', '2009', '2004', '2001', '2003', '2006']


 33%|█████████████▊                            | 29/88 [03:11<06:40,  6.79s/it]

200
200
['2015', '2009', '2016', '2018', '2017', '2010', 'Pre2009']


 34%|██████████████▎                           | 30/88 [03:14<05:42,  5.90s/it]

200
200
['1986', '2000', '2012', '2015', '2009', '2017']


 35%|██████████████▊                           | 31/88 [03:18<04:55,  5.19s/it]

200
200
['2015', '2019', 'Pre2014', '2016', '2017', '2018', '2014']


 36%|███████████████▎                          | 32/88 [03:22<04:25,  4.74s/it]

200
200
['2017', '1999', '2013']


 38%|███████████████▊                          | 33/88 [03:30<05:14,  5.72s/it]

200
200
['2015', '2019', 'Pre2014', '2016', '2017', '2018', '2014']


 39%|████████████████▏                         | 34/88 [03:36<05:15,  5.84s/it]

200
200
['2011', '2012', '2009', '2016', '2018', '2010', 'Pre2009']


 40%|████████████████▋                         | 35/88 [03:40<04:37,  5.24s/it]

200
200
['2015', '2019', 'Pre2014', '2016', '2017', '2018', '2014']


 41%|█████████████████▏                        | 36/88 [03:46<04:53,  5.64s/it]

200
200
['2009', '2004', '2013', '2010', '2003', '2014']


 42%|█████████████████▋                        | 37/88 [03:56<05:50,  6.87s/it]

200
200
['2015', '2019', 'Pre2014', '2016', '2017', '2018', '2014']


 43%|██████████████████▏                       | 38/88 [04:00<04:58,  5.98s/it]

200
200
['1997', '1996', '1994', '1993', '1995', '1991']


 44%|██████████████████▌                       | 39/88 [04:04<04:26,  5.43s/it]

200
200
['Pre2013', '2015', '2013', '2016', '2018', '2017', '2014']


 45%|███████████████████                       | 40/88 [04:08<04:05,  5.11s/it]

200
200
['Pre2013', '2015', '2013', '2016', '2018', '2017', '2014']


 47%|███████████████████▌                      | 41/88 [04:13<03:49,  4.89s/it]

200
200
['2015', '2019', 'Pre2014', '2016', '2017', '2018', '2014']


 48%|████████████████████                      | 42/88 [04:22<04:49,  6.29s/it]

200
200
['2018']


 49%|████████████████████▌                     | 43/88 [04:30<05:02,  6.71s/it]

200
200
['2017']


 50%|█████████████████████                     | 44/88 [04:39<05:27,  7.45s/it]

200
200
['2015', '2019', 'Pre2014', '2016', '2017', '2018', '2014']


 51%|█████████████████████▍                    | 45/88 [04:43<04:34,  6.37s/it]

200
200
['Pre2013', '2015', '2013', '2016', '2018', '2017', '2014']


 52%|█████████████████████▉                    | 46/88 [04:48<04:15,  6.08s/it]

200
200
['2015', '2019', 'Pre2014', '2016', '2017', '2018', '2014']


 53%|██████████████████████▍                   | 47/88 [04:56<04:28,  6.54s/it]

200
200
['2011', '2015', '2012', 'Pre2011', '2013', '2016', '2017']


 55%|██████████████████████▉                   | 48/88 [05:06<04:59,  7.48s/it]

200
200
['Pre2013', '2015', '2013', '2016', '2018', '2017', '2014']


 56%|███████████████████████▍                  | 49/88 [05:55<12:56, 19.91s/it]

200
200
['Pre2013', '2015', '2013', '2016', '2018', '2017', '2014']


 57%|███████████████████████▊                  | 50/88 [06:13<12:22, 19.55s/it]

200
200
['1986', '2000', '2012', '2015', '2009', '2017']


 58%|████████████████████████▎                 | 51/88 [06:19<09:31, 15.44s/it]

200
200
['2009', '2004', '2013', '2010', '2003', '2014']


 59%|████████████████████████▊                 | 52/88 [06:23<07:16, 12.11s/it]

200
200
['2005']


 60%|█████████████████████████▎                | 53/88 [06:27<05:39,  9.69s/it]

200
200
['Pre2013', '2015', '2013', '2016', '2018', '2017', '2014']


 61%|█████████████████████████▊                | 54/88 [06:31<04:28,  7.91s/it]

200
200
['Pre2012', '2015', '2012', '2016', '2018', '2017', '2014']


 62%|██████████████████████████▎               | 55/88 [06:43<04:58,  9.04s/it]

200
200
['Pre2012', '2015', '2012', '2013', '2016', '2017', '2014']


 64%|██████████████████████████▋               | 56/88 [06:47<03:57,  7.43s/it]

200
200
['2011', '2012', '2009', '2016', '2018', '2010', 'Pre2009']


 65%|███████████████████████████▏              | 57/88 [06:55<03:55,  7.58s/it]

200
200
['2009', '2004', '2013', '2010', '2003', '2014']


 66%|███████████████████████████▋              | 58/88 [07:10<04:58,  9.95s/it]

200
200
['Pre2013', '2015', '2013', '2016', '2018', '2017', '2014']


 67%|████████████████████████████▏             | 59/88 [07:17<04:21,  9.01s/it]

200
200
['1981', '1982']


 68%|████████████████████████████▋             | 60/88 [07:21<03:29,  7.47s/it]

200
200
['1985']


 69%|█████████████████████████████             | 61/88 [07:31<03:42,  8.24s/it]

200
200
['1989', '1992']


 70%|█████████████████████████████▌            | 62/88 [07:35<02:59,  6.91s/it]

200
200
['2012', '2013', '2007', '2006', '2005', '2014', 'Pre2005']


 72%|██████████████████████████████            | 63/88 [07:39<02:30,  6.02s/it]

200
200
['1983', '2002', '2000', '2012', '2004', '2001', '2010', '2003']


 73%|██████████████████████████████▌           | 64/88 [07:51<03:10,  7.95s/it]

200
200
['Pre2013', '2015', '2013', '2016', '2018', '2017', '2014']


 74%|███████████████████████████████           | 65/88 [07:55<02:33,  6.66s/it]

200
200
['2011', '2015', '2012', 'Pre2011', '2013', '2016', '2014']


 75%|███████████████████████████████▌          | 66/88 [07:58<02:07,  5.78s/it]

200
200
['1997', 'Pre1987', '1987', '1995', '1988', '1989', '1991']


 76%|███████████████████████████████▉          | 67/88 [08:07<02:19,  6.64s/it]

200
200
['1984', '1983', '1981', '2004', '2001', '1988', '1982']


 77%|████████████████████████████████▍         | 68/88 [08:27<03:31, 10.58s/it]

200
200
['2009', '2016', '2018', '2017', '2010', '2003', 'Pre2003']


 78%|████████████████████████████████▉         | 69/88 [08:31<02:42,  8.56s/it]

200
200
['2002', '2012', 'Pre2002', '2004', '2003', '2006', '2005']


 80%|█████████████████████████████████▍        | 70/88 [08:36<02:18,  7.70s/it]

200
200
['2002', '2000', '1999', 'Pre1999', '2004', '2001', '2003']


 81%|█████████████████████████████████▉        | 71/88 [08:41<01:53,  6.66s/it]

200
200
['1986', '1983', '1981', '1985', '1995', '1982']


 82%|██████████████████████████████████▎       | 72/88 [08:52<02:08,  8.05s/it]

200
200
['1997', '1998', '2000', '1994', '1995']


 83%|██████████████████████████████████▊       | 73/88 [08:56<01:42,  6.83s/it]

200
200
['2015', '2019', 'Pre2014', '2016', '2017', '2018', '2014']


 84%|███████████████████████████████████▎      | 74/88 [08:59<01:22,  5.89s/it]

200
200
['2015', '2019', 'Pre2014', '2016', '2017', '2018', '2014']


 85%|███████████████████████████████████▊      | 75/88 [09:05<01:13,  5.68s/it]

200
200
['2015', '2004', '2001', '2003', '2005']


 86%|████████████████████████████████████▎     | 76/88 [09:08<01:00,  5.06s/it]

200
200
['2015', '2009', '2007', '2006', '2005', '2014', 'Pre2005']


 88%|████████████████████████████████████▊     | 77/88 [09:17<01:06,  6.06s/it]

200
200
['2013']


 89%|█████████████████████████████████████▏    | 78/88 [09:21<00:54,  5.44s/it]

200
200
['2018', '2015', '2011', '2016']


 90%|█████████████████████████████████████▋    | 79/88 [09:25<00:44,  4.97s/it]

200
200
['2013', '2016']


 91%|██████████████████████████████████████▏   | 80/88 [09:29<00:39,  4.92s/it]

200
200
['2017', '2018', '2002', '2016']


 92%|██████████████████████████████████████▋   | 81/88 [09:33<00:32,  4.58s/it]

200
200
['2013']


 93%|███████████████████████████████████████▏  | 82/88 [09:37<00:25,  4.25s/it]

200
200
['2012', '2008']


 94%|███████████████████████████████████████▌  | 83/88 [09:41<00:22,  4.43s/it]

200
200
['2006']


 95%|████████████████████████████████████████  | 84/88 [09:45<00:16,  4.22s/it]

200
200
['2016']


 97%|████████████████████████████████████████▌ | 85/88 [09:49<00:12,  4.07s/it]

200
200
['2012', '2015', '2001']


 98%|█████████████████████████████████████████ | 86/88 [09:53<00:07,  3.96s/it]

200
200
['2005']


 99%|█████████████████████████████████████████▌| 87/88 [09:57<00:04,  4.18s/it]

200
200
['2017']


100%|██████████████████████████████████████████| 88/88 [10:05<00:00,  5.31s/it]


{'Final Appeal (Civil)': 'https://legalref.judiciary.hk/lrs/common/ju/judgment.jsp?EX=T&L1=FA&L2=CV&AR=1#A1',
 'Final Appeal (Criminal)': 'https://legalref.judiciary.hk/lrs/common/ju/judgment.jsp?EX=T&L1=FA&L2=CC&AR=2#A2',
 'Miscellaneou Proceeding (Civil)': 'https://legalref.judiciary.hk/lrs/common/ju/judgment.jsp?EX=T&L1=FA&L2=MV&AR=4#A4',
 'Miscellaneou Proceeding (Criminal)': 'https://legalref.judiciary.hk/lrs/common/ju/judgment.jsp?EX=T&L1=FA&L2=MC&AR=5#A5',
 'Application for Review': 'https://legalref.judiciary.hk/lrs/common/ju/judgment.jsp?EX=T&L1=CA&L2=AR&AR=1#A1',
 'Civil Appeal': 'https://legalref.judiciary.hk/lrs/common/ju/judgment.jsp?EX=T&L1=CA&L2=CV&AR=3#A3',
 'Criminal Appeal': 'https://legalref.judiciary.hk/lrs/common/ju/judgment.jsp?EX=T&L1=CA&L2=CC&AR=4#A4',
 'Admiralty Action': 'https://legalref.judiciary.hk/lrs/common/ju/judgment.jsp?EX=T&L1=HC&L2=AJ&AR=1#A1',
 'Application for Grant': 'https://legalref.judiciary.hk/lrs/common/ju/judgment.jsp?EX=T&L1=HC&L2=AG&AR=3#A

In [209]:
def subCourt_get_years(sc_cleared, url, df_all, all_courts):
    time.sleep(1)
    root = url_to_root(url)
    info_list = root.find('script', string=re.compile("var myMenu")).text.split(",")
    
    now_set = set([BeautifulSoup(i.strip("' ")).text.strip(" ") for i in info_list if len(re.findall('<a name=', i))!=0])
    years = now_set - all_courts
    return list(years)

courts_set = set(df_all['parent_court']) | set(df_all['sub_court'])
url = list(df_all[df_all['sub_court_cleared']==sub_]['sub_court_url'])[0]
years = subCourt_get_years(sub_, url, df_all, courts_set)
years

200


['2017']

In [210]:
def subCourt_get_urls(sub_court, url, df_all, urls_set):
    
    time.sleep(1)
    root = url_to_root(url)
    info_list = root.find('script', string=re.compile("var myMenu")).text.split(",")
    
    potential_urls = set([i.strip("' ") for i in info_list if i.strip("' ")[:5]=='https'])
    urls = potential_urls - urls_set
    return list(urls)

urls_set = set(df_all['sub_court_url']) | set(df_all['parent_court_url'])
url = list(df_all[df_all['sub_court_cleared']==sub_]['sub_court_url'])[0]
urls = subCourt_get_urls(sub_, url, df_all, urls_set)  
urls

200


['https://legalref.judiciary.hk/lrs/common/ju/judgment.jsp?EX=T&L1=WK&L2=CC&L3=2017&AR=11_1#A11_1']

In [211]:
def match_year_urls(years, urls):
    match_dict = {}
    url_pairs = [(re.findall('L3=....', i)[0][-4:], i) for i in urls]
    for y in years:
        for y_url, url in url_pairs:
            if y == y_url:
                match_dict[y] = url
    return match_dict

match_year_urls(years, urls)

{'2017': 'https://legalref.judiciary.hk/lrs/common/ju/judgment.jsp?EX=T&L1=WK&L2=CC&L3=2017&AR=11_1#A11_1'}

In [213]:
def get_year_urls(sub_court, url, df_all, courts_set, urls_set):
    # get years
    years = subCourt_get_years(sub_court, url, df_all, courts_set)
    # get urls
    urls = subCourt_get_urls(sub_court, url, df_all, urls_set)  
    # match them
    match_dict = match_year_urls(years, urls)
    # convert to DataFrame
    data = [(k,v) for k, v in match_dict.items()]
    match = pd.DataFrame(data, columns=['year', 'year_url'])
    match['sub_court_cleared'] = sub_court
    return match

# test case 1
courts_set = set(df_all['parent_court']) | set(df_all['sub_court'])
urls_set = set(df_all['sub_court_url']) | set(df_all['parent_court_url'])
url = list(df_all[df_all['sub_court_cleared']==sub_]['sub_court_url'])[0]
df_test = get_year_urls(sub_, url, df_all, courts_set, urls_set)
df_test

200
200


Unnamed: 0,year,year_url,sub_court_cleared
0,2017,https://legalref.judiciary.hk/lrs/common/ju/judgment.jsp?EX=T&L1=WK&L2=CC&L3=2017&AR=11_1#A11_1,West Kowloon Magistrate Court Charge Case


In [217]:
# test case2
sub_ = 'Magistracy Appeal'
url = special_courts[sub_]
print(url)
df_test = get_year_urls(sub_, url, df_all, courts_set, urls_set)
df_test

https://legalref.judiciary.hk/lrs/common/ju/judgment.jsp?EX=T&L1=HC&L2=MA&AR=23#A23
200
200


Unnamed: 0,year,year_url,sub_court_cleared
0,1986,https://legalref.judiciary.hk/lrs/common/ju/judgment.jsp?EX=T&L1=HC&L2=MA&L3=1986&AR=23_34#A23_34,Magistracy Appeal
1,1984,https://legalref.judiciary.hk/lrs/common/ju/judgment.jsp?EX=T&L1=HC&L2=MA&L3=1984&AR=23_36#A23_36,Magistracy Appeal
2,1983,https://legalref.judiciary.hk/lrs/common/ju/judgment.jsp?EX=T&L1=HC&L2=MA&L3=1983&AR=23_37#A23_37,Magistracy Appeal
3,2011,https://legalref.judiciary.hk/lrs/common/ju/judgment.jsp?EX=T&L1=HC&L2=MA&L3=2011&AR=23_9#A23_9,Magistracy Appeal
4,2000,https://legalref.judiciary.hk/lrs/common/ju/judgment.jsp?EX=T&L1=HC&L2=MA&L3=2000&AR=23_20#A23_20,Magistracy Appeal
5,2012,https://legalref.judiciary.hk/lrs/common/ju/judgment.jsp?EX=T&L1=HC&L2=MA&L3=2012&AR=23_8#A23_8,Magistracy Appeal
6,1998,https://legalref.judiciary.hk/lrs/common/ju/judgment.jsp?EX=T&L1=HC&L2=MA&L3=1998&AR=23_22#A23_22,Magistracy Appeal
7,1992,https://legalref.judiciary.hk/lrs/common/ju/judgment.jsp?EX=T&L1=HC&L2=MA&L3=1992&AR=23_28#A23_28,Magistracy Appeal
8,2001,https://legalref.judiciary.hk/lrs/common/ju/judgment.jsp?EX=T&L1=HC&L2=MA&L3=2001&AR=23_19#A23_19,Magistracy Appeal
9,2007,https://legalref.judiciary.hk/lrs/common/ju/judgment.jsp?EX=T&L1=HC&L2=MA&L3=2007&AR=23_13#A23_13,Magistracy Appeal


In [218]:
dfs = []
for sc in tqdm(df_all['sub_court_cleared']):
    print('for sub_court: ',sc)
    # check if special sc
    if sc in special_courts:
        url = special_courts[sc]
    else:
        url = list(df_all[df_all['sub_court_cleared']==sc]['sub_court_url'])[0]
    match = get_year_urls(sc, url, df_all, courts_set, urls_set)
    dfs.append(match)
    
df_years = pd.concat(dfs, axis=0)
df_years.index = range(len(df_years))
print(df_years.shape)
df_years


  0%|                                                   | 0/88 [00:00<?, ?it/s]

for sub_court:  Final Appeal (Civil)
200
200



  1%|▍                                          | 1/88 [00:07<10:35,  7.30s/it]

for sub_court:  Final Appeal (Criminal)
200
200



  2%|▉                                          | 2/88 [00:20<13:04,  9.13s/it]

for sub_court:  Miscellaneou Proceeding
200
200



  3%|█▍                                         | 3/88 [00:34<14:46, 10.42s/it]

for sub_court:  Miscellaneou Proceeding (Civil)
200
200



  5%|█▉                                         | 4/88 [00:39<12:23,  8.85s/it]

for sub_court:  Miscellaneou Proceeding (Criminal)
200
200



  6%|██▍                                        | 5/88 [00:44<10:37,  7.68s/it]

for sub_court:  Application for Review
200
200



  7%|██▉                                        | 6/88 [00:48<08:58,  6.57s/it]

for sub_court:  Attorney General Reference
200
200



  8%|███▍                                       | 7/88 [00:51<07:36,  5.64s/it]

for sub_court:  Civil Appeal
200
200



  9%|███▉                                       | 8/88 [01:14<14:30, 10.88s/it]

for sub_court:  Criminal Appeal
200
200



 10%|████▍                                      | 9/88 [01:18<11:38,  8.84s/it]

for sub_court:  Miscellaneou Proceeding
200
200



 11%|████▊                                     | 10/88 [01:22<09:33,  7.35s/it]

for sub_court:  Reservation of Question of Law
200
200



 12%|█████▎                                    | 11/88 [01:27<08:33,  6.66s/it]

for sub_court:  Admiralty Action
200
200



 14%|█████▋                                    | 12/88 [01:31<07:20,  5.79s/it]

for sub_court:  Adoption Application
200
200



 15%|██████▏                                   | 13/88 [01:35<06:24,  5.12s/it]

for sub_court:  Application for Grant
200
200



 16%|██████▋                                   | 14/88 [01:42<06:58,  5.65s/it]

for sub_court:  Application to et aside a Statutory Demand (under Bankruptcy Ordinance)
200
200



 17%|███████▏                                  | 15/88 [01:46<06:29,  5.34s/it]

for sub_court:  Application under the Mental Health Ordinance
200
200



 18%|███████▋                                  | 16/88 [01:50<05:53,  4.90s/it]

for sub_court:  Bankruptcy Proceeding
200
200



 19%|████████                                  | 17/88 [01:55<05:40,  4.80s/it]

for sub_court:  Caveat
200
200



 20%|████████▌                                 | 18/88 [02:10<09:14,  7.92s/it]

for sub_court:  Citation Application
200
200



 22%|█████████                                 | 19/88 [02:13<07:38,  6.64s/it]

for sub_court:  Civil Action
200
200



 23%|█████████▌                                | 20/88 [02:18<06:39,  5.88s/it]

for sub_court:  Commercial Action
200
200



 24%|██████████                                | 21/88 [02:25<06:58,  6.25s/it]

for sub_court:  Companie Winding-up Proceeding
200
200



 25%|██████████▌                               | 22/88 [02:31<06:49,  6.20s/it]

for sub_court:  Confidential Miscellaneou Proceeding
200
200



 26%|██████████▉                               | 23/88 [02:35<05:55,  5.47s/it]

for sub_court:  Constitutional and Administrative Law Proceeding
200
200



 27%|███████████▍                              | 24/88 [02:41<06:17,  5.89s/it]

for sub_court:  Construction and Arbitration Proceeding
200
200



 28%|███████████▉                              | 25/88 [02:45<05:35,  5.32s/it]

for sub_court:  Criminal Case
200
200



 30%|████████████▍                             | 26/88 [03:01<08:37,  8.35s/it]

for sub_court:  Estate Duty Appeal
200
200



 31%|████████████▉                             | 27/88 [03:07<07:58,  7.84s/it]

for sub_court:  Ex-parte Application
200
200



 32%|█████████████▎                            | 28/88 [03:16<08:11,  8.18s/it]

for sub_court:  High Court Bankruptcy Interim Order
200
200



 33%|█████████████▊                            | 29/88 [03:21<06:54,  7.02s/it]

for sub_court:  Inland Revenue Appeal
200
200



 34%|██████████████▎                           | 30/88 [03:41<10:29, 10.85s/it]

for sub_court:  Intended Action
200
200



 35%|██████████████▊                           | 31/88 [03:44<08:15,  8.69s/it]

for sub_court:  Labour Tribunal Appeal
200
200



 36%|███████████████▎                          | 32/88 [03:48<06:45,  7.25s/it]

for sub_court:  Legal Aid Appeal
200
200



 38%|███████████████▊                          | 33/88 [03:57<07:00,  7.65s/it]

for sub_court:  Magistracy Appeal
200
200



 39%|████████████████▏                         | 34/88 [04:01<06:04,  6.75s/it]

for sub_court:  Matrimonial Cause
200
200



 40%|████████████████▋                         | 35/88 [04:05<05:13,  5.92s/it]

for sub_court:  Minor Employment Claim Appeal
200
200



 41%|█████████████████▏                        | 36/88 [04:21<07:42,  8.90s/it]

for sub_court:  Miscellaneou Proceeding
200
200



 42%|█████████████████▋                        | 37/88 [04:29<07:23,  8.69s/it]

for sub_court:  Miscellaneou Proceeding (Criminal)
200
200



 43%|██████████████████▏                       | 38/88 [04:33<06:04,  7.30s/it]

for sub_court:  Obscene Article Tribunal Appeal
200
200



 44%|██████████████████▌                       | 39/88 [04:37<05:02,  6.18s/it]

for sub_court:  Personal Injurie Action
200
200



 45%|███████████████████                       | 40/88 [04:43<04:47,  5.99s/it]

for sub_court:  Probate Action
200
200



 47%|███████████████████▌                      | 41/88 [04:46<04:12,  5.38s/it]

for sub_court:  Small Claim Tribunal Appeal
200
200



 48%|████████████████████                      | 42/88 [04:55<04:46,  6.22s/it]

for sub_court:  Competition Tribunal Action
200
200



 49%|████████████████████▌                     | 43/88 [04:59<04:12,  5.60s/it]

for sub_court:  Competition Tribunal Enforcement Action
200
200



 50%|█████████████████████                     | 44/88 [05:03<03:53,  5.31s/it]

for sub_court:  Civil Action
200
200



 51%|█████████████████████▍                    | 45/88 [05:13<04:43,  6.60s/it]

for sub_court:  Criminal Case
200
200



 52%|█████████████████████▉                    | 46/88 [05:17<04:04,  5.81s/it]

for sub_court:  Distraint Case
200
200



 53%|██████████████████████▍                   | 47/88 [05:26<04:35,  6.73s/it]

for sub_court:  District Court Tax Claim
200
200



 55%|██████████████████████▉                   | 48/88 [05:33<04:31,  6.80s/it]

for sub_court:  Employee Compensation Case
200
200



 56%|███████████████████████▍                  | 49/88 [05:37<03:50,  5.91s/it]

for sub_court:  Equal Opportunitie Action
200
200



 57%|███████████████████████▊                  | 50/88 [05:47<04:34,  7.21s/it]

for sub_court:  Intended Action
200
200



 58%|████████████████████████▎                 | 51/88 [05:51<03:53,  6.31s/it]

for sub_court:  Miscellaneou Proceeding
200
200



 59%|████████████████████████▊                 | 52/88 [05:55<03:19,  5.54s/it]

for sub_court:  Occupational Deafne (Compensation) Appeal
200
200



 60%|█████████████████████████▎                | 53/88 [05:59<02:54,  4.98s/it]

for sub_court:  Personal Injurie Action
200
200



 61%|█████████████████████████▊                | 54/88 [06:10<03:56,  6.95s/it]

for sub_court:  Stamp Duty Appeal
200
200



 62%|██████████████████████████▎               | 55/88 [06:20<04:18,  7.85s/it]

for sub_court:  Joint application
200
200



 64%|██████████████████████████▋               | 56/88 [06:28<04:12,  7.89s/it]

for sub_court:  Matrimonial Cause
200
200



 65%|███████████████████████████▏              | 57/88 [06:42<04:58,  9.63s/it]

for sub_court:  Miscellaneou Proceeding
200
200



 66%|███████████████████████████▋              | 58/88 [06:48<04:20,  8.69s/it]

for sub_court:  Building Management Application
200
200



 67%|████████████████████████████▏             | 59/88 [06:55<03:54,  8.08s/it]

for sub_court:  Building Ordinance Application
200
200



 68%|████████████████████████████▋             | 60/88 [07:06<04:11,  8.97s/it]

for sub_court:  Demolished Building Appeal
200
200



 69%|█████████████████████████████             | 61/88 [07:22<05:00, 11.11s/it]

for sub_court:  Demolished Building Application
200
200



 70%|█████████████████████████████▌            | 62/88 [07:27<04:04,  9.40s/it]

for sub_court:  Government Rent Appeal
200
200



 72%|██████████████████████████████            | 63/88 [07:37<03:56,  9.46s/it]

for sub_court:  Housing Ordinance Appeal
200
200



 73%|██████████████████████████████▌           | 64/88 [07:46<03:40,  9.19s/it]

for sub_court:  Land Compulsory Sale Application
200
200



 74%|███████████████████████████████           | 65/88 [07:50<02:55,  7.64s/it]

for sub_court:  Land Resumption Application
200
200



 75%|███████████████████████████████▌          | 66/88 [07:56<02:38,  7.23s/it]

for sub_court:  Landlord  Appeal
200
200



 76%|███████████████████████████████▉          | 67/88 [08:00<02:11,  6.25s/it]

for sub_court:  MTR Ordinance Application
200
200



 77%|████████████████████████████████▍         | 68/88 [08:04<01:50,  5.52s/it]

for sub_court:  Miscellaneou Proceeding Application
200
200



 78%|████████████████████████████████▉         | 69/88 [08:08<01:36,  5.10s/it]

for sub_court:  Miscellaneou Reference Application
200
200



 80%|█████████████████████████████████▍        | 70/88 [08:13<01:29,  4.99s/it]

for sub_court:  New Tenancy Application
200
200



 81%|█████████████████████████████████▉        | 71/88 [08:17<01:22,  4.88s/it]

for sub_court:  Part I Possession Application
200
200



 82%|██████████████████████████████████▎       | 72/88 [08:22<01:16,  4.80s/it]

for sub_court:  Part II Possession Application
200
200



 83%|██████████████████████████████████▊       | 73/88 [08:26<01:08,  4.55s/it]

for sub_court:  Part IV Possession Application
200
200



 84%|███████████████████████████████████▎      | 74/88 [08:35<01:24,  6.04s/it]

for sub_court:  Part V Possession Application
200
200



 85%|███████████████████████████████████▊      | 75/88 [08:39<01:11,  5.50s/it]

for sub_court:  Railway Ordinance Application
200
200



 86%|████████████████████████████████████▎     | 76/88 [08:49<01:21,  6.81s/it]

for sub_court:  Rating Appeal
200
200



 88%|████████████████████████████████████▊     | 77/88 [08:53<01:05,  5.99s/it]

for sub_court:  Coroner Court Death Inquest
200
200



 89%|█████████████████████████████████████▏    | 78/88 [08:59<00:57,  5.74s/it]

for sub_court:  Eastern Magistrate Court Charge Case
200
200



 90%|█████████████████████████████████████▋    | 79/88 [09:03<00:49,  5.48s/it]

for sub_court:  Eastern Magistrate Court Summon Case
200
200



 91%|██████████████████████████████████████▏   | 80/88 [09:08<00:41,  5.20s/it]

for sub_court:  Kowloon City Magistrate Court Charge Case
200
200



 92%|██████████████████████████████████████▋   | 81/88 [09:12<00:34,  4.97s/it]

for sub_court:  Kowloon City Magistrates’ Court Summon Case
200
200



 93%|███████████████████████████████████████▏  | 82/88 [09:30<00:53,  8.90s/it]

for sub_court:  Labour Tribunal Claim
200
200



 94%|███████████████████████████████████████▌  | 83/88 [09:38<00:41,  8.37s/it]

for sub_court:  Obscene Article Tribunal Case
200
200



 95%|████████████████████████████████████████  | 84/88 [09:42<00:28,  7.19s/it]

for sub_court:  Shatin Magistrate Court Charge Case
200
200



 97%|████████████████████████████████████████▌ | 85/88 [09:47<00:19,  6.54s/it]

for sub_court:  Small Claim Tribunal Claim
200
200



 98%|█████████████████████████████████████████ | 86/88 [09:51<00:11,  5.76s/it]

for sub_court:  Tuen Mun Magistrate Court Charge Case
200
200



 99%|█████████████████████████████████████████▌| 87/88 [09:55<00:05,  5.20s/it]

for sub_court:  West Kowloon Magistrate Court Charge Case
200
200



100%|██████████████████████████████████████████| 88/88 [09:59<00:00,  4.78s/it]


(1641, 3)


Unnamed: 0,year,year_url,sub_court_cleared
0,2011,https://legalref.judiciary.hk/lrs/common/ju/judgment.jsp?EX=T&L1=FA&L2=CV&L3=2011&AR=1_8#A1_8,Final Appeal (Civil)
1,1998,https://legalref.judiciary.hk/lrs/common/ju/judgment.jsp?EX=T&L1=FA&L2=CV&L3=1998&AR=1_21#A1_21,Final Appeal (Civil)
2,2012,https://legalref.judiciary.hk/lrs/common/ju/judgment.jsp?EX=T&L1=FA&L2=CV&L3=2012&AR=1_7#A1_7,Final Appeal (Civil)
3,2000,https://legalref.judiciary.hk/lrs/common/ju/judgment.jsp?EX=T&L1=FA&L2=CV&L3=2000&AR=1_19#A1_19,Final Appeal (Civil)
4,2001,https://legalref.judiciary.hk/lrs/common/ju/judgment.jsp?EX=T&L1=FA&L2=CV&L3=2001&AR=1_18#A1_18,Final Appeal (Civil)
5,2007,https://legalref.judiciary.hk/lrs/common/ju/judgment.jsp?EX=T&L1=FA&L2=CV&L3=2007&AR=1_12#A1_12,Final Appeal (Civil)
6,2006,https://legalref.judiciary.hk/lrs/common/ju/judgment.jsp?EX=T&L1=FA&L2=CV&L3=2006&AR=1_13#A1_13,Final Appeal (Civil)
7,2005,https://legalref.judiciary.hk/lrs/common/ju/judgment.jsp?EX=T&L1=FA&L2=CV&L3=2005&AR=1_14#A1_14,Final Appeal (Civil)
8,2014,https://legalref.judiciary.hk/lrs/common/ju/judgment.jsp?EX=T&L1=FA&L2=CV&L3=2014&AR=1_5#A1_5,Final Appeal (Civil)
9,2008,https://legalref.judiciary.hk/lrs/common/ju/judgment.jsp?EX=T&L1=FA&L2=CV&L3=2008&AR=1_11#A1_11,Final Appeal (Civil)


In [222]:
df_years.head()

Unnamed: 0,year,year_url,sub_court_cleared
0,2011,https://legalref.judiciary.hk/lrs/common/ju/judgment.jsp?EX=T&L1=FA&L2=CV&L3=2011&AR=1_8#A1_8,Final Appeal (Civil)
1,1998,https://legalref.judiciary.hk/lrs/common/ju/judgment.jsp?EX=T&L1=FA&L2=CV&L3=1998&AR=1_21#A1_21,Final Appeal (Civil)
2,2012,https://legalref.judiciary.hk/lrs/common/ju/judgment.jsp?EX=T&L1=FA&L2=CV&L3=2012&AR=1_7#A1_7,Final Appeal (Civil)
3,2000,https://legalref.judiciary.hk/lrs/common/ju/judgment.jsp?EX=T&L1=FA&L2=CV&L3=2000&AR=1_19#A1_19,Final Appeal (Civil)
4,2001,https://legalref.judiciary.hk/lrs/common/ju/judgment.jsp?EX=T&L1=FA&L2=CV&L3=2001&AR=1_18#A1_18,Final Appeal (Civil)


In [220]:
# store data
with open('data.pickle', 'wb') as ww:
    pickle.dump(df_years, ww)


In [4]:
f = open('data.pickle', 'rb')
df_years = pickle.load(f)

## Hiearchy 4: details

In [5]:
def get_onePage(url, r):
    data_onepage = [] # initialize
    root = url_to_root(url)
    info = root.find('script', string=re.compile("var myMenu")).text.split(",[")
    B_s = [BeautifulSoup(i) for i in info]
    for B in B_s:
        try:
            sub_court_cleared = r['sub_court_cleared']
            year = r['year']
            outcome = B.find('td', valign="top", width="5%").find('img')['src']
            name = B.find('a', class_="ThemeXPRowAnchor").text

            url = B.find('a', class_="ThemeXPRowAnchor")['href']
            url = url.split(',')[0]
            detail_url = url[re.search("https", url).span()[0]:-2]
            date = B.find('font',color="#006633").text
            persons = B.find('td', valign="top", width="55%").text
            data_onepage.append((sub_court_cleared, year, outcome, name, detail_url, date, persons))
        except AttributeError:
            continue
    return data_onepage


In [6]:
def store_onePage(idx, data_onepage):
    with open(str(idx)+'.pickle', 'wb') as ww:
        pickle.dump(data_onepage, ww)
    return 


In [7]:
def get_detail(df, x, y):
    for (idx, r) in tqdm(df.iterrows(), total=len(df)):      
        if idx >= x:
            url = r['year_url'] 
            print('now in row: ', idx)
            data_onepage = get_onePage(url, r)
            _ = store_onePage(idx, data_onepage)
            if idx >=y:
                break
    return      
        
df_detail = get_detail(df_years, 501, 1000)      

  0%|                                                 | 0/1641 [00:00<?, ?it/s]

now in row:  501


 31%|███████████▌                          | 502/1641 [00:01<00:04, 281.79it/s]

now in row:  502
now in row:  503
now in row:  504
now in row:  505
now in row:  506
now in row:  507
now in row:  508
now in row:  509
now in row:  510


 31%|████████████▏                          | 511/1641 [00:14<08:08,  2.31it/s]

now in row:  511


 31%|████████████▏                          | 512/1641 [00:15<12:11,  1.54it/s]

now in row:  512


 31%|████████████▏                          | 513/1641 [00:16<14:33,  1.29it/s]

now in row:  513


 31%|████████████▏                          | 514/1641 [00:18<16:44,  1.12it/s]

now in row:  514


 31%|████████████▏                          | 515/1641 [00:19<20:05,  1.07s/it]

now in row:  515


 31%|████████████▎                          | 516/1641 [00:20<19:27,  1.04s/it]

now in row:  516


 32%|████████████▎                          | 517/1641 [00:21<19:23,  1.03s/it]

now in row:  517


 32%|████████████▎                          | 518/1641 [00:22<20:58,  1.12s/it]

now in row:  518


 32%|████████████▎                          | 519/1641 [00:23<21:01,  1.12s/it]

now in row:  519


 32%|████████████▎                          | 520/1641 [00:24<20:15,  1.08s/it]

now in row:  520


 32%|████████████▍                          | 521/1641 [00:25<19:05,  1.02s/it]

now in row:  521


 32%|████████████▍                          | 522/1641 [00:26<19:26,  1.04s/it]

now in row:  522


 32%|████████████▍                          | 523/1641 [00:28<21:42,  1.16s/it]

now in row:  523


 32%|████████████▍                          | 524/1641 [00:29<19:48,  1.06s/it]

now in row:  524


 32%|████████████▍                          | 525/1641 [00:33<35:09,  1.89s/it]

now in row:  525


 32%|████████████▌                          | 526/1641 [00:34<34:02,  1.83s/it]

now in row:  526


 32%|████████████▌                          | 527/1641 [00:35<29:06,  1.57s/it]

now in row:  527


 32%|████████████▌                          | 528/1641 [00:36<25:49,  1.39s/it]

now in row:  528


 32%|████████████▌                          | 529/1641 [00:37<24:17,  1.31s/it]

now in row:  529


 32%|████████████▌                          | 530/1641 [00:38<21:43,  1.17s/it]

now in row:  530


 32%|████████████▌                          | 531/1641 [00:39<20:25,  1.10s/it]

now in row:  531


 32%|████████████▋                          | 532/1641 [00:40<19:41,  1.06s/it]

now in row:  532


 32%|████████████▋                          | 533/1641 [00:41<18:48,  1.02s/it]

now in row:  533


 33%|████████████▋                          | 534/1641 [00:43<24:06,  1.31s/it]

now in row:  534


 33%|████████████▋                          | 535/1641 [00:44<24:15,  1.32s/it]

now in row:  535


 33%|████████████▋                          | 536/1641 [00:45<23:18,  1.27s/it]

now in row:  536


 33%|████████████                         | 537/1641 [00:56<1:14:12,  4.03s/it]

now in row:  537


 33%|████████████▏                        | 538/1641 [00:58<1:01:15,  3.33s/it]

now in row:  538


 33%|████████████▊                          | 539/1641 [01:00<54:45,  2.98s/it]

now in row:  539


 33%|████████████▏                        | 540/1641 [01:07<1:18:30,  4.28s/it]

now in row:  540


 33%|████████████▏                        | 541/1641 [01:09<1:04:21,  3.51s/it]

now in row:  541


 33%|████████████▉                          | 542/1641 [01:10<50:12,  2.74s/it]

now in row:  542


 33%|████████████▉                          | 543/1641 [01:11<43:47,  2.39s/it]

now in row:  543


 33%|████████████▉                          | 544/1641 [01:13<41:34,  2.27s/it]

now in row:  544


 33%|████████████▉                          | 545/1641 [01:14<35:05,  1.92s/it]

now in row:  545


 33%|████████████▉                          | 546/1641 [01:15<29:57,  1.64s/it]

now in row:  546


 33%|█████████████                          | 547/1641 [01:18<35:20,  1.94s/it]

now in row:  547


 33%|█████████████                          | 548/1641 [01:20<35:38,  1.96s/it]

now in row:  548


 33%|█████████████                          | 549/1641 [01:22<34:01,  1.87s/it]

now in row:  549


 34%|█████████████                          | 550/1641 [01:23<28:15,  1.55s/it]

now in row:  550


 34%|█████████████                          | 551/1641 [01:23<24:13,  1.33s/it]

now in row:  551


 34%|█████████████                          | 552/1641 [01:25<26:53,  1.48s/it]

now in row:  552


 34%|█████████████▏                         | 553/1641 [01:27<28:58,  1.60s/it]

now in row:  553


 34%|█████████████▏                         | 554/1641 [01:28<24:17,  1.34s/it]

now in row:  554


 34%|█████████████▏                         | 555/1641 [01:29<21:42,  1.20s/it]

now in row:  555


 34%|█████████████▏                         | 556/1641 [01:30<24:05,  1.33s/it]

now in row:  556


 34%|█████████████▏                         | 557/1641 [01:31<23:05,  1.28s/it]

now in row:  557


 34%|█████████████▎                         | 558/1641 [01:38<50:33,  2.80s/it]

now in row:  558


 34%|█████████████▎                         | 559/1641 [01:40<45:05,  2.50s/it]

now in row:  559


 34%|█████████████▎                         | 560/1641 [01:43<50:00,  2.78s/it]

now in row:  560


 34%|█████████████▎                         | 561/1641 [01:45<45:36,  2.53s/it]

now in row:  561


 34%|█████████████▎                         | 562/1641 [01:46<39:16,  2.18s/it]

now in row:  562


 34%|█████████████▍                         | 563/1641 [01:47<32:03,  1.78s/it]

now in row:  563


 34%|█████████████▍                         | 564/1641 [01:49<29:31,  1.65s/it]

now in row:  564


 34%|█████████████▍                         | 565/1641 [01:50<30:52,  1.72s/it]

now in row:  565


 34%|█████████████▍                         | 566/1641 [01:51<26:44,  1.49s/it]

now in row:  566


 35%|█████████████▍                         | 567/1641 [01:52<23:43,  1.33s/it]

now in row:  567


 35%|█████████████▍                         | 568/1641 [01:54<22:53,  1.28s/it]

now in row:  568


 35%|█████████████▌                         | 569/1641 [02:00<52:13,  2.92s/it]

now in row:  569


 35%|█████████████▌                         | 570/1641 [02:02<43:32,  2.44s/it]

now in row:  570


 35%|█████████████▌                         | 571/1641 [02:05<48:32,  2.72s/it]

now in row:  571


 35%|█████████████▌                         | 572/1641 [02:06<38:25,  2.16s/it]

now in row:  572


 35%|█████████████▌                         | 573/1641 [02:07<31:34,  1.77s/it]

now in row:  573


 35%|█████████████▋                         | 574/1641 [02:08<26:40,  1.50s/it]

now in row:  574


 35%|█████████████▋                         | 575/1641 [02:09<25:20,  1.43s/it]

now in row:  575


 35%|█████████████▋                         | 576/1641 [02:10<24:18,  1.37s/it]

now in row:  576


 35%|█████████████▋                         | 577/1641 [02:11<23:02,  1.30s/it]

now in row:  577


 35%|█████████████▋                         | 578/1641 [02:12<22:16,  1.26s/it]

now in row:  578


 35%|█████████████▊                         | 579/1641 [02:13<20:22,  1.15s/it]

now in row:  579


 35%|█████████████▊                         | 580/1641 [02:15<23:29,  1.33s/it]

now in row:  580


 35%|█████████████▊                         | 581/1641 [02:16<20:16,  1.15s/it]

now in row:  581


 35%|█████████████▊                         | 582/1641 [02:17<22:42,  1.29s/it]

now in row:  582


 36%|█████████████▊                         | 583/1641 [02:18<20:31,  1.16s/it]

now in row:  583


 36%|█████████████▉                         | 584/1641 [02:19<19:02,  1.08s/it]

now in row:  584


 36%|█████████████▉                         | 585/1641 [02:20<17:37,  1.00s/it]

now in row:  585


 36%|█████████████▉                         | 586/1641 [02:21<16:39,  1.06it/s]

now in row:  586


 36%|█████████████▉                         | 587/1641 [02:25<36:10,  2.06s/it]

now in row:  587


 36%|█████████████▉                         | 588/1641 [02:27<35:55,  2.05s/it]

now in row:  588


 36%|█████████████▉                         | 589/1641 [02:29<34:42,  1.98s/it]

now in row:  589


 36%|██████████████                         | 590/1641 [02:31<34:17,  1.96s/it]

now in row:  590


 36%|██████████████                         | 591/1641 [02:34<40:47,  2.33s/it]

now in row:  591


 36%|██████████████                         | 592/1641 [02:36<37:28,  2.14s/it]

now in row:  592


 36%|██████████████                         | 593/1641 [02:38<37:05,  2.12s/it]

now in row:  593


 36%|██████████████                         | 594/1641 [02:40<34:06,  1.95s/it]

now in row:  594


 36%|██████████████▏                        | 595/1641 [02:41<32:24,  1.86s/it]

now in row:  595


 36%|██████████████▏                        | 596/1641 [02:43<32:15,  1.85s/it]

now in row:  596


 36%|██████████████▏                        | 597/1641 [02:45<31:59,  1.84s/it]

now in row:  597


 36%|██████████████▏                        | 598/1641 [02:46<27:51,  1.60s/it]

now in row:  598


 37%|██████████████▏                        | 599/1641 [02:49<34:13,  1.97s/it]

now in row:  599


 37%|██████████████▎                        | 600/1641 [02:51<33:15,  1.92s/it]

now in row:  600


 37%|██████████████▎                        | 601/1641 [02:52<31:22,  1.81s/it]

now in row:  601


 37%|█████████████▌                       | 602/1641 [03:00<1:02:00,  3.58s/it]

now in row:  602


 37%|██████████████▎                        | 603/1641 [03:02<53:43,  3.11s/it]

now in row:  603


 37%|█████████████▌                       | 604/1641 [03:09<1:16:09,  4.41s/it]

now in row:  604


 37%|█████████████▋                       | 605/1641 [03:11<1:03:17,  3.67s/it]

now in row:  605


 37%|█████████████▋                       | 606/1641 [03:14<1:00:16,  3.49s/it]

now in row:  606


 37%|██████████████▍                        | 607/1641 [03:16<50:56,  2.96s/it]

now in row:  607


 37%|██████████████▍                        | 608/1641 [03:18<44:49,  2.60s/it]

now in row:  608


 37%|██████████████▍                        | 609/1641 [03:20<43:28,  2.53s/it]

now in row:  609


 37%|██████████████▍                        | 610/1641 [03:22<39:08,  2.28s/it]

now in row:  610


 37%|██████████████▌                        | 611/1641 [03:23<35:02,  2.04s/it]

now in row:  611


 37%|██████████████▌                        | 612/1641 [03:25<32:45,  1.91s/it]

now in row:  612


 37%|██████████████▌                        | 613/1641 [03:27<32:16,  1.88s/it]

now in row:  613


 37%|██████████████▌                        | 614/1641 [03:28<27:29,  1.61s/it]

now in row:  614


 37%|██████████████▌                        | 615/1641 [03:29<26:23,  1.54s/it]

now in row:  615


 38%|██████████████▋                        | 616/1641 [03:30<25:06,  1.47s/it]

now in row:  616


 38%|██████████████▋                        | 617/1641 [03:31<21:44,  1.27s/it]

now in row:  617


 38%|██████████████▋                        | 618/1641 [03:32<19:33,  1.15s/it]

now in row:  618


 38%|██████████████▋                        | 619/1641 [03:34<21:43,  1.28s/it]

now in row:  619


 38%|██████████████▋                        | 620/1641 [03:35<22:15,  1.31s/it]

now in row:  620


 38%|██████████████▊                        | 621/1641 [03:36<20:37,  1.21s/it]

now in row:  621


 38%|██████████████▊                        | 622/1641 [03:37<20:52,  1.23s/it]

now in row:  622


 38%|██████████████▊                        | 623/1641 [03:39<23:37,  1.39s/it]

now in row:  623


 38%|██████████████▊                        | 624/1641 [03:40<22:35,  1.33s/it]

now in row:  624


 38%|██████████████▊                        | 625/1641 [03:42<22:53,  1.35s/it]

now in row:  625


 38%|██████████████▉                        | 626/1641 [03:48<47:08,  2.79s/it]

now in row:  626


 38%|██████████████▉                        | 627/1641 [03:49<39:36,  2.34s/it]

now in row:  627


 38%|██████████████▉                        | 628/1641 [03:50<32:51,  1.95s/it]

now in row:  628


 38%|██████████████▉                        | 629/1641 [03:52<34:44,  2.06s/it]

now in row:  629


 38%|██████████████▉                        | 630/1641 [03:53<29:11,  1.73s/it]

now in row:  630


 38%|██████████████▉                        | 631/1641 [03:55<25:44,  1.53s/it]

now in row:  631


 39%|███████████████                        | 632/1641 [03:56<24:27,  1.45s/it]

now in row:  632


 39%|███████████████                        | 633/1641 [03:57<21:04,  1.25s/it]

now in row:  633


 39%|███████████████                        | 634/1641 [03:57<18:47,  1.12s/it]

now in row:  634


 39%|███████████████                        | 635/1641 [03:59<22:54,  1.37s/it]

now in row:  635


 39%|███████████████                        | 636/1641 [04:01<22:55,  1.37s/it]

now in row:  636


 39%|███████████████▏                       | 637/1641 [04:02<22:40,  1.36s/it]

now in row:  637


 39%|███████████████▏                       | 638/1641 [04:03<22:37,  1.35s/it]

now in row:  638


 39%|███████████████▏                       | 639/1641 [04:04<20:47,  1.24s/it]

now in row:  639


 39%|███████████████▏                       | 640/1641 [04:06<22:23,  1.34s/it]

now in row:  640


 39%|███████████████▏                       | 641/1641 [04:09<31:24,  1.88s/it]

now in row:  641


 39%|███████████████▎                       | 642/1641 [04:10<26:16,  1.58s/it]

now in row:  642


 39%|███████████████▎                       | 643/1641 [04:11<22:39,  1.36s/it]

now in row:  643


 39%|███████████████▎                       | 644/1641 [04:12<19:53,  1.20s/it]

now in row:  644


 39%|███████████████▎                       | 645/1641 [04:13<18:30,  1.11s/it]

now in row:  645


 39%|███████████████▎                       | 646/1641 [04:13<17:45,  1.07s/it]

now in row:  646


 39%|███████████████▍                       | 647/1641 [04:14<16:36,  1.00s/it]

now in row:  647


 39%|███████████████▍                       | 648/1641 [04:15<16:15,  1.02it/s]

now in row:  648


 40%|███████████████▍                       | 649/1641 [04:16<15:45,  1.05it/s]

now in row:  649


 40%|███████████████▍                       | 650/1641 [04:17<15:58,  1.03it/s]

now in row:  650


 40%|███████████████▍                       | 651/1641 [04:18<15:18,  1.08it/s]

now in row:  651


 40%|███████████████▍                       | 652/1641 [04:21<25:40,  1.56s/it]

now in row:  652


 40%|███████████████▌                       | 653/1641 [04:22<21:53,  1.33s/it]

now in row:  653


 40%|███████████████▌                       | 654/1641 [04:23<19:55,  1.21s/it]

now in row:  654


 40%|███████████████▌                       | 655/1641 [04:24<19:38,  1.20s/it]

now in row:  655


 40%|███████████████▌                       | 656/1641 [04:25<17:50,  1.09s/it]

now in row:  656


 40%|███████████████▌                       | 657/1641 [04:26<19:04,  1.16s/it]

now in row:  657


 40%|███████████████▋                       | 658/1641 [04:27<18:59,  1.16s/it]

now in row:  658


 40%|███████████████▋                       | 659/1641 [04:28<17:32,  1.07s/it]

now in row:  659


 40%|███████████████▋                       | 660/1641 [04:29<17:03,  1.04s/it]

now in row:  660


 40%|███████████████▋                       | 661/1641 [04:30<16:30,  1.01s/it]

now in row:  661


 40%|███████████████▋                       | 662/1641 [04:31<16:58,  1.04s/it]

now in row:  662


 40%|███████████████▊                       | 663/1641 [04:33<22:56,  1.41s/it]

now in row:  663


 40%|███████████████▊                       | 664/1641 [04:36<29:41,  1.82s/it]

now in row:  664


 41%|███████████████▊                       | 665/1641 [04:37<26:48,  1.65s/it]

now in row:  665


 41%|███████████████▊                       | 666/1641 [04:38<23:00,  1.42s/it]

now in row:  666


 41%|███████████████▊                       | 667/1641 [04:39<20:43,  1.28s/it]

now in row:  667


 41%|███████████████▉                       | 668/1641 [04:40<19:22,  1.19s/it]

now in row:  668


 41%|███████████████▉                       | 669/1641 [04:41<17:34,  1.08s/it]

now in row:  669


 41%|███████████████▉                       | 670/1641 [04:42<17:35,  1.09s/it]

now in row:  670


 41%|███████████████▉                       | 671/1641 [04:43<16:27,  1.02s/it]

now in row:  671


 41%|███████████████▉                       | 672/1641 [04:44<17:19,  1.07s/it]

now in row:  672


 41%|███████████████▉                       | 673/1641 [04:45<16:30,  1.02s/it]

now in row:  673


 41%|████████████████                       | 674/1641 [04:46<15:30,  1.04it/s]

now in row:  674


 41%|████████████████                       | 675/1641 [04:47<14:50,  1.08it/s]

now in row:  675


 41%|████████████████                       | 676/1641 [04:48<17:37,  1.10s/it]

now in row:  676


 41%|████████████████                       | 677/1641 [04:49<17:26,  1.09s/it]

now in row:  677


 41%|████████████████                       | 678/1641 [04:50<16:38,  1.04s/it]

now in row:  678


 41%|████████████████▏                      | 679/1641 [04:51<15:57,  1.00it/s]

now in row:  679


 41%|████████████████▏                      | 680/1641 [04:52<16:01,  1.00s/it]

now in row:  680


 41%|████████████████▏                      | 681/1641 [04:53<14:42,  1.09it/s]

now in row:  681


 42%|████████████████▏                      | 682/1641 [04:54<13:48,  1.16it/s]

now in row:  682


 42%|████████████████▏                      | 683/1641 [04:54<13:09,  1.21it/s]

now in row:  683


 42%|████████████████▎                      | 684/1641 [04:55<12:58,  1.23it/s]

now in row:  684


 42%|████████████████▎                      | 685/1641 [04:56<12:42,  1.25it/s]

now in row:  685


 42%|████████████████▎                      | 686/1641 [04:57<12:28,  1.28it/s]

now in row:  686


 42%|████████████████▎                      | 687/1641 [04:57<12:12,  1.30it/s]

now in row:  687


 42%|████████████████▎                      | 688/1641 [04:58<12:33,  1.27it/s]

now in row:  688


 42%|████████████████▎                      | 689/1641 [04:59<12:43,  1.25it/s]

now in row:  689


 42%|████████████████▍                      | 690/1641 [05:00<12:35,  1.26it/s]

now in row:  690


 42%|████████████████▍                      | 691/1641 [05:01<13:08,  1.21it/s]

now in row:  691


 42%|████████████████▍                      | 692/1641 [05:02<13:02,  1.21it/s]

now in row:  692


 42%|████████████████▍                      | 693/1641 [05:02<12:36,  1.25it/s]

now in row:  693


 42%|████████████████▍                      | 694/1641 [05:03<12:13,  1.29it/s]

now in row:  694


 42%|████████████████▌                      | 695/1641 [05:07<28:34,  1.81s/it]

now in row:  695


 42%|████████████████▌                      | 696/1641 [05:08<25:22,  1.61s/it]

now in row:  696


 42%|████████████████▌                      | 697/1641 [05:10<23:07,  1.47s/it]

now in row:  697


 43%|████████████████▌                      | 698/1641 [05:10<20:00,  1.27s/it]

now in row:  698


 43%|████████████████▌                      | 699/1641 [05:12<19:27,  1.24s/it]

now in row:  699


 43%|████████████████▋                      | 700/1641 [05:12<17:30,  1.12s/it]

now in row:  700


 43%|████████████████▋                      | 701/1641 [05:13<16:38,  1.06s/it]

now in row:  701


 43%|████████████████▋                      | 702/1641 [05:14<15:48,  1.01s/it]

now in row:  702


 43%|████████████████▋                      | 703/1641 [05:15<15:00,  1.04it/s]

now in row:  703


 43%|████████████████▋                      | 704/1641 [05:16<15:19,  1.02it/s]

now in row:  704


 43%|████████████████▊                      | 705/1641 [05:17<15:10,  1.03it/s]

now in row:  705


 43%|████████████████▊                      | 706/1641 [05:18<14:54,  1.05it/s]

now in row:  706


 43%|████████████████▊                      | 707/1641 [05:19<15:02,  1.04it/s]

now in row:  707


 43%|████████████████▊                      | 708/1641 [05:20<14:22,  1.08it/s]

now in row:  708


 43%|████████████████▊                      | 709/1641 [05:21<14:51,  1.05it/s]

now in row:  709


 43%|████████████████▊                      | 710/1641 [05:22<13:57,  1.11it/s]

now in row:  710


 43%|████████████████▉                      | 711/1641 [05:22<13:27,  1.15it/s]

now in row:  711


 43%|████████████████▉                      | 712/1641 [05:23<13:36,  1.14it/s]

now in row:  712


 43%|████████████████▉                      | 713/1641 [05:24<13:38,  1.13it/s]

now in row:  713


 44%|████████████████▉                      | 714/1641 [05:25<13:24,  1.15it/s]

now in row:  714


 44%|████████████████▉                      | 715/1641 [05:26<13:07,  1.18it/s]

now in row:  715


 44%|█████████████████                      | 716/1641 [05:27<17:03,  1.11s/it]

now in row:  716


 44%|█████████████████                      | 717/1641 [05:29<21:13,  1.38s/it]

now in row:  717


 44%|█████████████████                      | 718/1641 [05:31<20:19,  1.32s/it]

now in row:  718


 44%|█████████████████                      | 719/1641 [05:32<18:44,  1.22s/it]

now in row:  719


 44%|█████████████████                      | 720/1641 [05:33<17:12,  1.12s/it]

now in row:  720


 44%|█████████████████▏                     | 721/1641 [05:33<16:20,  1.07s/it]

now in row:  721


 44%|█████████████████▏                     | 722/1641 [05:35<17:14,  1.13s/it]

now in row:  722


 44%|█████████████████▏                     | 723/1641 [05:36<16:17,  1.06s/it]

now in row:  723


 44%|█████████████████▏                     | 724/1641 [05:37<15:10,  1.01it/s]

now in row:  724


 44%|█████████████████▏                     | 725/1641 [05:37<14:33,  1.05it/s]

now in row:  725


 44%|█████████████████▎                     | 726/1641 [05:38<14:10,  1.08it/s]

now in row:  726


 44%|█████████████████▎                     | 727/1641 [05:39<13:54,  1.10it/s]

now in row:  727


 44%|█████████████████▎                     | 728/1641 [05:40<13:37,  1.12it/s]

now in row:  728


 44%|█████████████████▎                     | 729/1641 [05:41<13:19,  1.14it/s]

now in row:  729


 44%|█████████████████▎                     | 730/1641 [05:42<14:30,  1.05it/s]

now in row:  730


 45%|█████████████████▎                     | 731/1641 [05:43<14:25,  1.05it/s]

now in row:  731


 45%|█████████████████▍                     | 732/1641 [05:44<14:12,  1.07it/s]

now in row:  732


 45%|█████████████████▍                     | 733/1641 [05:49<31:30,  2.08s/it]

now in row:  733


 45%|█████████████████▍                     | 734/1641 [05:50<29:21,  1.94s/it]

now in row:  734


 45%|█████████████████▍                     | 735/1641 [05:51<24:21,  1.61s/it]

now in row:  735


 45%|█████████████████▍                     | 736/1641 [05:52<21:43,  1.44s/it]

now in row:  736


 45%|█████████████████▌                     | 737/1641 [05:53<18:56,  1.26s/it]

now in row:  737


 45%|█████████████████▌                     | 738/1641 [05:54<17:14,  1.15s/it]

now in row:  738


 45%|█████████████████▌                     | 739/1641 [05:55<15:43,  1.05s/it]

now in row:  739


 45%|█████████████████▌                     | 740/1641 [05:55<14:30,  1.03it/s]

now in row:  740


 45%|█████████████████▌                     | 741/1641 [05:56<13:43,  1.09it/s]

now in row:  741


 45%|█████████████████▋                     | 742/1641 [05:57<13:17,  1.13it/s]

now in row:  742


 45%|█████████████████▋                     | 743/1641 [05:58<12:59,  1.15it/s]

now in row:  743


 45%|█████████████████▋                     | 744/1641 [05:59<13:22,  1.12it/s]

now in row:  744


 45%|█████████████████▋                     | 745/1641 [06:00<12:57,  1.15it/s]

now in row:  745


 45%|█████████████████▋                     | 746/1641 [06:00<12:20,  1.21it/s]

now in row:  746


 46%|█████████████████▊                     | 747/1641 [06:01<11:53,  1.25it/s]

now in row:  747


 46%|█████████████████▊                     | 748/1641 [06:02<12:00,  1.24it/s]

now in row:  748


 46%|█████████████████▊                     | 749/1641 [06:03<13:10,  1.13it/s]

now in row:  749


 46%|█████████████████▊                     | 750/1641 [06:04<13:35,  1.09it/s]

now in row:  750


 46%|█████████████████▊                     | 751/1641 [06:05<13:45,  1.08it/s]

now in row:  751


 46%|█████████████████▊                     | 752/1641 [06:06<15:10,  1.02s/it]

now in row:  752


 46%|█████████████████▉                     | 753/1641 [06:07<15:02,  1.02s/it]

now in row:  753


 46%|█████████████████▉                     | 754/1641 [06:09<18:42,  1.27s/it]

now in row:  754


 46%|█████████████████▉                     | 755/1641 [06:10<18:19,  1.24s/it]

now in row:  755


 46%|█████████████████▉                     | 756/1641 [06:11<16:43,  1.13s/it]

now in row:  756


 46%|█████████████████▉                     | 757/1641 [06:12<17:57,  1.22s/it]

now in row:  757


 46%|██████████████████                     | 758/1641 [06:14<17:52,  1.21s/it]

now in row:  758


 46%|██████████████████                     | 759/1641 [06:15<17:51,  1.22s/it]

now in row:  759


 46%|██████████████████                     | 760/1641 [06:16<16:35,  1.13s/it]

now in row:  760


 46%|██████████████████                     | 761/1641 [06:17<18:10,  1.24s/it]

now in row:  761


 46%|██████████████████                     | 762/1641 [06:19<18:40,  1.28s/it]

now in row:  762


 46%|██████████████████▏                    | 763/1641 [06:20<18:54,  1.29s/it]

now in row:  763


 47%|██████████████████▏                    | 764/1641 [06:22<21:20,  1.46s/it]

now in row:  764


 47%|██████████████████▏                    | 765/1641 [06:23<21:32,  1.48s/it]

now in row:  765


 47%|██████████████████▏                    | 766/1641 [06:25<21:34,  1.48s/it]

now in row:  766


 47%|██████████████████▏                    | 767/1641 [06:26<20:04,  1.38s/it]

now in row:  767


 47%|██████████████████▎                    | 768/1641 [06:27<17:36,  1.21s/it]

now in row:  768


 47%|██████████████████▎                    | 769/1641 [06:28<17:55,  1.23s/it]

now in row:  769


 47%|██████████████████▎                    | 770/1641 [06:29<18:23,  1.27s/it]

now in row:  770


 47%|██████████████████▎                    | 771/1641 [06:31<20:21,  1.40s/it]

now in row:  771


 47%|██████████████████▎                    | 772/1641 [06:33<20:10,  1.39s/it]

now in row:  772


 47%|██████████████████▎                    | 773/1641 [06:34<18:49,  1.30s/it]

now in row:  773


 47%|██████████████████▍                    | 774/1641 [06:35<19:31,  1.35s/it]

now in row:  774


 47%|██████████████████▍                    | 775/1641 [06:37<21:40,  1.50s/it]

now in row:  775


 47%|██████████████████▍                    | 776/1641 [06:39<25:56,  1.80s/it]

now in row:  776


 47%|██████████████████▍                    | 777/1641 [06:41<25:53,  1.80s/it]

now in row:  777


 47%|██████████████████▍                    | 778/1641 [06:42<22:57,  1.60s/it]

now in row:  778


 47%|██████████████████▌                    | 779/1641 [06:44<25:01,  1.74s/it]

now in row:  779


 48%|██████████████████▌                    | 780/1641 [06:46<22:59,  1.60s/it]

now in row:  780


 48%|██████████████████▌                    | 781/1641 [06:47<22:49,  1.59s/it]

now in row:  781


 48%|██████████████████▌                    | 782/1641 [06:49<22:27,  1.57s/it]

now in row:  782


 48%|██████████████████▌                    | 783/1641 [06:50<22:09,  1.55s/it]

now in row:  783


 48%|██████████████████▋                    | 784/1641 [06:51<19:20,  1.35s/it]

now in row:  784


 48%|██████████████████▋                    | 785/1641 [06:52<18:03,  1.27s/it]

now in row:  785


 48%|██████████████████▋                    | 786/1641 [06:53<16:16,  1.14s/it]

now in row:  786


 48%|██████████████████▋                    | 787/1641 [06:54<14:16,  1.00s/it]

now in row:  787


 48%|██████████████████▋                    | 788/1641 [06:55<13:55,  1.02it/s]

now in row:  788


 48%|██████████████████▊                    | 789/1641 [06:55<12:35,  1.13it/s]

now in row:  789


 48%|██████████████████▊                    | 790/1641 [06:57<16:00,  1.13s/it]

now in row:  790


 48%|██████████████████▊                    | 791/1641 [06:59<20:29,  1.45s/it]

now in row:  791


 48%|██████████████████▊                    | 792/1641 [07:01<19:42,  1.39s/it]

now in row:  792


 48%|██████████████████▊                    | 793/1641 [07:02<21:37,  1.53s/it]

now in row:  793


 48%|██████████████████▊                    | 794/1641 [07:04<23:14,  1.65s/it]

now in row:  794


 48%|██████████████████▉                    | 795/1641 [07:07<26:15,  1.86s/it]

now in row:  795


 49%|██████████████████▉                    | 796/1641 [07:08<25:46,  1.83s/it]

now in row:  796


 49%|██████████████████▉                    | 797/1641 [07:11<27:14,  1.94s/it]

now in row:  797


 49%|██████████████████▉                    | 798/1641 [07:12<26:55,  1.92s/it]

now in row:  798


 49%|██████████████████▉                    | 799/1641 [07:14<26:35,  1.90s/it]

now in row:  799


 49%|███████████████████                    | 800/1641 [07:16<27:49,  1.99s/it]

now in row:  800


 49%|███████████████████                    | 801/1641 [07:18<26:52,  1.92s/it]

now in row:  801


 49%|███████████████████                    | 802/1641 [07:20<27:06,  1.94s/it]

now in row:  802


 49%|███████████████████                    | 803/1641 [07:22<26:56,  1.93s/it]

now in row:  803


 49%|███████████████████                    | 804/1641 [07:24<27:39,  1.98s/it]

now in row:  804


 49%|███████████████████▏                   | 805/1641 [07:27<29:16,  2.10s/it]

now in row:  805


 49%|███████████████████▏                   | 806/1641 [07:28<28:16,  2.03s/it]

now in row:  806


 49%|███████████████████▏                   | 807/1641 [07:30<27:46,  2.00s/it]

now in row:  807


 49%|███████████████████▏                   | 808/1641 [07:32<27:21,  1.97s/it]

now in row:  808


 49%|███████████████████▏                   | 809/1641 [07:34<24:38,  1.78s/it]

now in row:  809


 49%|███████████████████▎                   | 810/1641 [07:35<24:19,  1.76s/it]

now in row:  810


 49%|███████████████████▎                   | 811/1641 [07:38<27:45,  2.01s/it]

now in row:  811


 49%|███████████████████▎                   | 812/1641 [07:40<27:39,  2.00s/it]

now in row:  812


 50%|███████████████████▎                   | 813/1641 [07:42<26:30,  1.92s/it]

now in row:  813


 50%|███████████████████▎                   | 814/1641 [07:44<26:52,  1.95s/it]

now in row:  814


 50%|███████████████████▎                   | 815/1641 [07:45<26:01,  1.89s/it]

now in row:  815


 50%|███████████████████▍                   | 816/1641 [07:48<30:38,  2.23s/it]

now in row:  816


 50%|███████████████████▍                   | 817/1641 [07:50<28:47,  2.10s/it]

now in row:  817


 50%|███████████████████▍                   | 818/1641 [07:52<27:21,  1.99s/it]

now in row:  818


 50%|███████████████████▍                   | 819/1641 [07:54<27:04,  1.98s/it]

now in row:  819


 50%|███████████████████▍                   | 820/1641 [07:56<26:45,  1.96s/it]

now in row:  820


 50%|███████████████████▌                   | 821/1641 [07:58<26:49,  1.96s/it]

now in row:  821


 50%|███████████████████▌                   | 822/1641 [08:00<25:59,  1.90s/it]

now in row:  822


 50%|██████████████████▌                  | 823/1641 [08:11<1:04:30,  4.73s/it]

now in row:  823


 50%|██████████████████▌                  | 824/1641 [08:25<1:41:12,  7.43s/it]

now in row:  824


 50%|██████████████████▌                  | 825/1641 [08:27<1:21:16,  5.98s/it]

now in row:  825


 50%|██████████████████▌                  | 826/1641 [08:29<1:03:13,  4.65s/it]

now in row:  826


 50%|███████████████████▋                   | 827/1641 [08:30<47:27,  3.50s/it]

now in row:  827


 50%|███████████████████▋                   | 828/1641 [08:31<37:48,  2.79s/it]

now in row:  828


 51%|███████████████████▋                   | 829/1641 [08:32<29:46,  2.20s/it]

now in row:  829


 51%|███████████████████▋                   | 830/1641 [08:32<24:18,  1.80s/it]

now in row:  830


 51%|███████████████████▋                   | 831/1641 [08:34<21:48,  1.62s/it]

now in row:  831


 51%|███████████████████▊                   | 832/1641 [08:35<18:57,  1.41s/it]

now in row:  832


 51%|███████████████████▊                   | 833/1641 [08:35<16:51,  1.25s/it]

now in row:  833


 51%|███████████████████▊                   | 834/1641 [08:36<15:11,  1.13s/it]

now in row:  834


 51%|███████████████████▊                   | 835/1641 [08:37<14:26,  1.07s/it]

now in row:  835


 51%|███████████████████▊                   | 836/1641 [08:38<13:20,  1.01it/s]

now in row:  836


 51%|███████████████████▉                   | 837/1641 [08:39<13:13,  1.01it/s]

now in row:  837


 51%|███████████████████▉                   | 838/1641 [08:40<12:26,  1.08it/s]

now in row:  838


 51%|███████████████████▉                   | 839/1641 [08:41<12:11,  1.10it/s]

now in row:  839


 51%|███████████████████▉                   | 840/1641 [08:42<15:21,  1.15s/it]

now in row:  840


 51%|███████████████████▉                   | 841/1641 [08:43<14:06,  1.06s/it]

now in row:  841


 51%|████████████████████                   | 842/1641 [08:44<14:55,  1.12s/it]

now in row:  842


 51%|████████████████████                   | 843/1641 [08:45<14:09,  1.06s/it]

now in row:  843


 51%|████████████████████                   | 844/1641 [08:46<12:56,  1.03it/s]

now in row:  844


 51%|████████████████████                   | 845/1641 [08:47<12:12,  1.09it/s]

now in row:  845


 52%|████████████████████                   | 846/1641 [08:54<38:19,  2.89s/it]

now in row:  846


 52%|████████████████████▏                  | 847/1641 [08:55<30:19,  2.29s/it]

now in row:  847


 52%|████████████████████▏                  | 848/1641 [08:56<24:58,  1.89s/it]

now in row:  848


 52%|████████████████████▏                  | 849/1641 [08:57<20:59,  1.59s/it]

now in row:  849


 52%|████████████████████▏                  | 850/1641 [08:58<18:47,  1.43s/it]

now in row:  850


 52%|████████████████████▏                  | 851/1641 [09:00<19:01,  1.45s/it]

now in row:  851


 52%|████████████████████▏                  | 852/1641 [09:01<16:52,  1.28s/it]

now in row:  852


 52%|████████████████████▎                  | 853/1641 [09:02<15:53,  1.21s/it]

now in row:  853


 52%|████████████████████▎                  | 854/1641 [09:03<14:22,  1.10s/it]

now in row:  854


 52%|████████████████████▎                  | 855/1641 [09:04<14:20,  1.10s/it]

now in row:  855


 52%|████████████████████▎                  | 856/1641 [09:04<13:20,  1.02s/it]

now in row:  856


 52%|████████████████████▎                  | 857/1641 [09:05<12:31,  1.04it/s]

now in row:  857


 52%|████████████████████▍                  | 858/1641 [09:06<12:34,  1.04it/s]

now in row:  858


 52%|████████████████████▍                  | 859/1641 [09:07<12:05,  1.08it/s]

now in row:  859


 52%|████████████████████▍                  | 860/1641 [09:08<11:41,  1.11it/s]

now in row:  860


 52%|████████████████████▍                  | 861/1641 [09:09<11:40,  1.11it/s]

now in row:  861


 53%|████████████████████▍                  | 862/1641 [09:10<14:25,  1.11s/it]

now in row:  862


 53%|████████████████████▌                  | 863/1641 [09:12<14:32,  1.12s/it]

now in row:  863


 53%|████████████████████▌                  | 864/1641 [09:18<33:22,  2.58s/it]

now in row:  864


 53%|████████████████████▌                  | 865/1641 [09:18<26:39,  2.06s/it]

now in row:  865


 53%|████████████████████▌                  | 866/1641 [09:19<21:56,  1.70s/it]

now in row:  866


 53%|████████████████████▌                  | 867/1641 [09:20<18:34,  1.44s/it]

now in row:  867


 53%|████████████████████▋                  | 868/1641 [09:21<16:17,  1.26s/it]

now in row:  868


 53%|████████████████████▋                  | 869/1641 [09:22<14:50,  1.15s/it]

now in row:  869


 53%|████████████████████▋                  | 870/1641 [09:23<13:37,  1.06s/it]

now in row:  870


 53%|████████████████████▋                  | 871/1641 [09:24<12:53,  1.00s/it]

now in row:  871


 53%|████████████████████▋                  | 872/1641 [09:24<12:20,  1.04it/s]

now in row:  872


 53%|████████████████████▋                  | 873/1641 [09:25<11:56,  1.07it/s]

now in row:  873


 53%|████████████████████▊                  | 874/1641 [09:26<12:34,  1.02it/s]

now in row:  874


 53%|████████████████████▊                  | 875/1641 [09:27<12:04,  1.06it/s]

now in row:  875


 53%|████████████████████▊                  | 876/1641 [09:28<11:46,  1.08it/s]

now in row:  876


 53%|████████████████████▊                  | 877/1641 [09:30<15:54,  1.25s/it]

now in row:  877


 54%|████████████████████▊                  | 878/1641 [09:31<14:27,  1.14s/it]

now in row:  878


 54%|████████████████████▉                  | 879/1641 [09:32<13:55,  1.10s/it]

now in row:  879


 54%|████████████████████▉                  | 880/1641 [09:33<13:11,  1.04s/it]

now in row:  880


 54%|████████████████████▉                  | 881/1641 [09:34<12:16,  1.03it/s]

now in row:  881


 54%|████████████████████▉                  | 882/1641 [09:35<11:43,  1.08it/s]

now in row:  882


 54%|████████████████████▉                  | 883/1641 [09:35<11:41,  1.08it/s]

now in row:  883


 54%|█████████████████████                  | 884/1641 [09:36<11:36,  1.09it/s]

now in row:  884


 54%|█████████████████████                  | 885/1641 [09:37<12:13,  1.03it/s]

now in row:  885


 54%|█████████████████████                  | 886/1641 [09:38<11:28,  1.10it/s]

now in row:  886


 54%|█████████████████████                  | 887/1641 [09:39<11:14,  1.12it/s]

now in row:  887


 54%|█████████████████████                  | 888/1641 [09:40<11:05,  1.13it/s]

now in row:  888


 54%|█████████████████████▏                 | 889/1641 [09:41<11:38,  1.08it/s]

now in row:  889


 54%|█████████████████████▏                 | 890/1641 [09:42<11:12,  1.12it/s]

now in row:  890


 54%|█████████████████████▏                 | 891/1641 [09:43<12:00,  1.04it/s]

now in row:  891


 54%|█████████████████████▏                 | 892/1641 [09:44<10:59,  1.14it/s]

now in row:  892


 54%|█████████████████████▏                 | 893/1641 [09:44<10:21,  1.20it/s]

now in row:  893


 54%|█████████████████████▏                 | 894/1641 [09:45<10:07,  1.23it/s]

now in row:  894


 55%|█████████████████████▎                 | 895/1641 [09:46<10:32,  1.18it/s]

now in row:  895


 55%|█████████████████████▎                 | 896/1641 [09:48<16:32,  1.33s/it]

now in row:  896


 55%|█████████████████████▎                 | 897/1641 [09:49<14:19,  1.15s/it]

now in row:  897


 55%|█████████████████████▎                 | 898/1641 [09:50<14:00,  1.13s/it]

now in row:  898


 55%|█████████████████████▎                 | 899/1641 [09:51<13:40,  1.11s/it]

now in row:  899


 55%|█████████████████████▍                 | 900/1641 [09:52<13:50,  1.12s/it]

now in row:  900


 55%|█████████████████████▍                 | 901/1641 [09:53<13:17,  1.08s/it]

now in row:  901


 55%|█████████████████████▍                 | 902/1641 [09:54<13:03,  1.06s/it]

now in row:  902


 55%|█████████████████████▍                 | 903/1641 [09:56<13:29,  1.10s/it]

now in row:  903


 55%|█████████████████████▍                 | 904/1641 [09:57<14:49,  1.21s/it]

now in row:  904


 55%|█████████████████████▌                 | 905/1641 [09:59<15:57,  1.30s/it]

now in row:  905


 55%|█████████████████████▌                 | 906/1641 [10:00<15:19,  1.25s/it]

now in row:  906


 55%|█████████████████████▌                 | 907/1641 [10:01<15:20,  1.25s/it]

now in row:  907


 55%|█████████████████████▌                 | 908/1641 [10:05<25:45,  2.11s/it]

now in row:  908


 55%|█████████████████████▌                 | 909/1641 [10:06<20:41,  1.70s/it]

now in row:  909


 55%|█████████████████████▋                 | 910/1641 [10:07<18:24,  1.51s/it]

now in row:  910


 56%|█████████████████████▋                 | 911/1641 [10:08<16:38,  1.37s/it]

now in row:  911


 56%|█████████████████████▋                 | 912/1641 [10:09<15:44,  1.30s/it]

now in row:  912


 56%|█████████████████████▋                 | 913/1641 [10:11<16:52,  1.39s/it]

now in row:  913


 56%|█████████████████████▋                 | 914/1641 [10:12<15:36,  1.29s/it]

now in row:  914


 56%|█████████████████████▋                 | 915/1641 [10:13<16:02,  1.33s/it]

now in row:  915


 56%|█████████████████████▊                 | 916/1641 [10:15<16:18,  1.35s/it]

now in row:  916


 56%|█████████████████████▊                 | 917/1641 [10:16<17:33,  1.46s/it]

now in row:  917


 56%|█████████████████████▊                 | 918/1641 [10:17<15:44,  1.31s/it]

now in row:  918


 56%|█████████████████████▊                 | 919/1641 [10:18<15:21,  1.28s/it]

now in row:  919


 56%|█████████████████████▊                 | 920/1641 [10:20<17:42,  1.47s/it]

now in row:  920


 56%|█████████████████████▉                 | 921/1641 [10:21<15:02,  1.25s/it]

now in row:  921


 56%|█████████████████████▉                 | 922/1641 [10:22<13:52,  1.16s/it]

now in row:  922


 56%|█████████████████████▉                 | 923/1641 [10:23<12:22,  1.03s/it]

now in row:  923


 56%|█████████████████████▉                 | 924/1641 [10:24<11:29,  1.04it/s]

now in row:  924


 56%|█████████████████████▉                 | 925/1641 [10:24<11:09,  1.07it/s]

now in row:  925


 56%|██████████████████████                 | 926/1641 [10:25<10:31,  1.13it/s]

now in row:  926


 56%|██████████████████████                 | 927/1641 [10:27<13:06,  1.10s/it]

now in row:  927


 57%|██████████████████████                 | 928/1641 [10:29<16:22,  1.38s/it]

now in row:  928


 57%|██████████████████████                 | 929/1641 [10:31<17:37,  1.49s/it]

now in row:  929


 57%|██████████████████████                 | 930/1641 [10:31<14:59,  1.26s/it]

now in row:  930


 57%|██████████████████████▏                | 931/1641 [10:33<16:49,  1.42s/it]

now in row:  931


 57%|██████████████████████▏                | 932/1641 [10:35<18:12,  1.54s/it]

now in row:  932


 57%|██████████████████████▏                | 933/1641 [10:37<19:04,  1.62s/it]

now in row:  933


 57%|██████████████████████▏                | 934/1641 [10:40<24:17,  2.06s/it]

now in row:  934


 57%|██████████████████████▏                | 935/1641 [10:42<23:28,  1.99s/it]

now in row:  935


 57%|██████████████████████▏                | 936/1641 [10:44<23:47,  2.02s/it]

now in row:  936


 57%|██████████████████████▎                | 937/1641 [10:50<36:54,  3.15s/it]

now in row:  937


 57%|██████████████████████▎                | 938/1641 [10:50<28:41,  2.45s/it]

now in row:  938


 57%|██████████████████████▎                | 939/1641 [10:56<39:48,  3.40s/it]

now in row:  939


 57%|██████████████████████▎                | 940/1641 [11:00<41:05,  3.52s/it]

now in row:  940


 57%|██████████████████████▎                | 941/1641 [11:02<35:45,  3.06s/it]

now in row:  941


 57%|██████████████████████▍                | 942/1641 [11:03<28:03,  2.41s/it]

now in row:  942


 57%|██████████████████████▍                | 943/1641 [11:04<22:42,  1.95s/it]

now in row:  943


 58%|██████████████████████▍                | 944/1641 [11:05<21:25,  1.84s/it]

now in row:  944


 58%|██████████████████████▍                | 945/1641 [11:07<20:44,  1.79s/it]

now in row:  945


 58%|██████████████████████▍                | 946/1641 [11:08<20:14,  1.75s/it]

now in row:  946


 58%|██████████████████████▌                | 947/1641 [11:10<19:44,  1.71s/it]

now in row:  947


 58%|██████████████████████▌                | 948/1641 [11:12<19:47,  1.71s/it]

now in row:  948


 58%|██████████████████████▌                | 949/1641 [11:14<20:22,  1.77s/it]

now in row:  949


 58%|██████████████████████▌                | 950/1641 [11:14<16:53,  1.47s/it]

now in row:  950


 58%|██████████████████████▌                | 951/1641 [11:15<14:25,  1.25s/it]

now in row:  951


 58%|██████████████████████▋                | 952/1641 [11:17<16:25,  1.43s/it]

now in row:  952


 58%|██████████████████████▋                | 953/1641 [11:18<14:23,  1.26s/it]

now in row:  953


 58%|██████████████████████▋                | 954/1641 [11:19<13:30,  1.18s/it]

now in row:  954


 58%|██████████████████████▋                | 955/1641 [11:20<12:39,  1.11s/it]

now in row:  955


 58%|██████████████████████▋                | 956/1641 [11:26<30:15,  2.65s/it]

now in row:  956


 58%|██████████████████████▋                | 957/1641 [11:28<28:01,  2.46s/it]

now in row:  957


 58%|██████████████████████▊                | 958/1641 [11:35<43:14,  3.80s/it]

now in row:  958


 58%|██████████████████████▊                | 959/1641 [11:37<36:26,  3.21s/it]

now in row:  959


 59%|██████████████████████▊                | 960/1641 [11:38<28:42,  2.53s/it]

now in row:  960


 59%|██████████████████████▊                | 961/1641 [11:39<22:46,  2.01s/it]

now in row:  961


 59%|██████████████████████▊                | 962/1641 [11:39<18:42,  1.65s/it]

now in row:  962


 59%|██████████████████████▉                | 963/1641 [11:41<17:40,  1.56s/it]

now in row:  963


 59%|██████████████████████▉                | 964/1641 [11:42<17:54,  1.59s/it]

now in row:  964


 59%|██████████████████████▉                | 965/1641 [11:43<15:16,  1.36s/it]

now in row:  965


 59%|██████████████████████▉                | 966/1641 [11:44<14:04,  1.25s/it]

now in row:  966


 59%|██████████████████████▉                | 967/1641 [11:45<12:40,  1.13s/it]

now in row:  967


 59%|███████████████████████                | 968/1641 [11:46<13:14,  1.18s/it]

now in row:  968


 59%|███████████████████████                | 969/1641 [11:51<24:02,  2.15s/it]

now in row:  969


 59%|███████████████████████                | 970/1641 [11:52<20:37,  1.84s/it]

now in row:  970


 59%|███████████████████████                | 971/1641 [11:53<17:36,  1.58s/it]

now in row:  971


 59%|███████████████████████                | 972/1641 [11:55<19:39,  1.76s/it]

now in row:  972


 59%|███████████████████████                | 973/1641 [11:56<17:21,  1.56s/it]

now in row:  973


 59%|███████████████████████▏               | 974/1641 [11:57<15:09,  1.36s/it]

now in row:  974


 59%|███████████████████████▏               | 975/1641 [11:58<13:13,  1.19s/it]

now in row:  975


 59%|███████████████████████▏               | 976/1641 [11:59<13:24,  1.21s/it]

now in row:  976


 60%|███████████████████████▏               | 977/1641 [12:00<12:10,  1.10s/it]

now in row:  977


 60%|███████████████████████▏               | 978/1641 [12:01<12:12,  1.10s/it]

now in row:  978


 60%|███████████████████████▎               | 979/1641 [12:02<11:15,  1.02s/it]

now in row:  979


 60%|███████████████████████▎               | 980/1641 [12:03<11:29,  1.04s/it]

now in row:  980


 60%|███████████████████████▎               | 981/1641 [12:04<11:09,  1.01s/it]

now in row:  981


 60%|███████████████████████▎               | 982/1641 [12:05<11:12,  1.02s/it]

now in row:  982


 60%|███████████████████████▎               | 983/1641 [12:06<12:08,  1.11s/it]

now in row:  983


 60%|███████████████████████▍               | 984/1641 [12:07<11:11,  1.02s/it]

now in row:  984


 60%|███████████████████████▍               | 985/1641 [12:08<10:39,  1.03it/s]

now in row:  985


 60%|███████████████████████▍               | 986/1641 [12:09<10:43,  1.02it/s]

now in row:  986


 60%|███████████████████████▍               | 987/1641 [12:10<11:13,  1.03s/it]

now in row:  987


 60%|███████████████████████▍               | 988/1641 [12:11<11:17,  1.04s/it]

now in row:  988


 60%|███████████████████████▌               | 989/1641 [12:12<11:05,  1.02s/it]

now in row:  989


 60%|███████████████████████▌               | 990/1641 [12:13<10:29,  1.03it/s]

now in row:  990


 60%|███████████████████████▌               | 991/1641 [12:14<09:56,  1.09it/s]

now in row:  991


 60%|███████████████████████▌               | 992/1641 [12:16<12:47,  1.18s/it]

now in row:  992


 61%|███████████████████████▌               | 993/1641 [12:17<12:30,  1.16s/it]

now in row:  993


 61%|███████████████████████▌               | 994/1641 [12:18<12:33,  1.16s/it]

now in row:  994


 61%|███████████████████████▋               | 995/1641 [12:19<12:16,  1.14s/it]

now in row:  995


 61%|███████████████████████▋               | 996/1641 [12:20<12:17,  1.14s/it]

now in row:  996


 61%|███████████████████████▋               | 997/1641 [12:21<11:09,  1.04s/it]

now in row:  997


 61%|███████████████████████▋               | 998/1641 [12:22<12:10,  1.14s/it]

now in row:  998


 61%|███████████████████████▋               | 999/1641 [12:23<11:21,  1.06s/it]

now in row:  999


 61%|███████████████████████▏              | 1000/1641 [12:25<12:29,  1.17s/it]

now in row:  1000


In [21]:
def collect_data(length):
    data_ = []
    for i in range(length):
        f = open(str(i)+'.pickle', 'rb')
        one = pickle.load(f)
        data_.extend(one)
    return data_

length = 11
data_ = collect_data(length)
data_

[('Final Appeal (Civil)',
  '2011',
  "\\'https://legalref.judiciary.hk/lrs/images/ThemeXP/Epage.gif\\'",
  'FACV1/2011',
  'https://legalref.judiciary.hk/lrs/common/ju/ju_frame.jsp?DIS=79800',
  '(06/01/2012)',
  'THE CATHOLIC DIOCESE OF HONG KONG ALSO KNOWN AS THE BISHOP OF THE ROMAN CATHOLIC CHURCH IN HONG KONG INCORPORATION  v. SECRETARY FOR JUSTICE \xa0'),
 ('Final Appeal (Civil)',
  '2011',
  "\\'https://legalref.judiciary.hk/lrs/images/ThemeXP/Epage.gif\\'",
  'FACV1/2011',
  'https://legalref.judiciary.hk/lrs/common/ju/ju_frame.jsp?DIS=78563',
  '(13/10/2011)',
  'THE CATHOLIC DIOCESE OF HONG KONG ALSO KNOWN AS THE BISHOP OF THE ROMAN CATHOLIC CHURCH IN HONG KONG INCORPORATION v. SECRETARY FOR JUSTICE \xa0Reported in :(2011) 14 HKCFAR 754'),
 ('Final Appeal (Civil)',
  '2011',
  "\\'https://legalref.judiciary.hk/lrs/images/ThemeXP/ETpage.gif\\'",
  'FACV2/2011',
  'https://legalref.judiciary.hk/lrs/common/ju/ju_frame.jsp?DIS=79162',
  '(23/11/2011)',
  'WEALTH DUKE LTD AND OTHE

In [22]:
def get_dataframe(data_):
    df = pd.DataFrame(data_, columns=['sub_court_cleared', 'year', 'outcome', \
                                      'name', 'detail_url', 'date', 'persons'])
    return df
df_details = get_dataframe(data_)
print(df_details.shape)
df_details.head(3)

(356, 7)


Unnamed: 0,sub_court_cleared,year,outcome,name,detail_url,date,persons
0,Final Appeal (Civil),2011,\'https://legalref.judiciary.hk/lrs/images/ThemeXP/Epage.gif\',FACV1/2011,https://legalref.judiciary.hk/lrs/common/ju/ju_frame.jsp?DIS=79800,(06/01/2012),THE CATHOLIC DIOCESE OF HONG KONG ALSO KNOWN AS THE BISHOP OF THE ROMAN CATHOLIC CHURCH IN HONG KONG INCORPORATION v. SECRETARY FOR JUSTICE
1,Final Appeal (Civil),2011,\'https://legalref.judiciary.hk/lrs/images/ThemeXP/Epage.gif\',FACV1/2011,https://legalref.judiciary.hk/lrs/common/ju/ju_frame.jsp?DIS=78563,(13/10/2011),THE CATHOLIC DIOCESE OF HONG KONG ALSO KNOWN AS THE BISHOP OF THE ROMAN CATHOLIC CHURCH IN HONG KONG INCORPORATION v. SECRETARY FOR JUSTICE Reported in :(2011) 14 HKCFAR 754
2,Final Appeal (Civil),2011,\'https://legalref.judiciary.hk/lrs/images/ThemeXP/ETpage.gif\',FACV2/2011,https://legalref.judiciary.hk/lrs/common/ju/ju_frame.jsp?DIS=79162,(23/11/2011),WEALTH DUKE LTD AND OTHERS v. BANK OF CHINA (HONG KONG) LTD Reported in :(2011) 14 HKCFAR 863


In [None]:
with open('detail.pickle', 'wb') as ww:
    pickle.dump(df_detail, ww)