# Scrabu Prototype (for a List of Shipment Numbers)

This notebook is for creating a prototype of the Scrabu project. The goal is to download DHL pages for a specific shipment number and scrap the shipment information from it.

#### Configure Logger

In [4]:
from scrabu import generate_shipment_numbers #340434188193324407|340434188193323500
from scrabu import request
from scrabu import html_to_json
from scrabu import shipment_details
from scrabu import save_dictionary
from scrabu import process_shipment_number
import concurrent

In [5]:
# Multi-threading
def main(shipment_number=None, size=None, max_workers=None, start_url="https://www.dhl.de/int-verfolgen/search?language=de&lang=de&domain=de&piececode="):
    shipment_numbers = generate_shipment_numbers(shipment_number=shipment_number, size=size)

    with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
        future_to_url = {executor.submit(process_shipment_number, sn): sn for sn in shipment_numbers}
        for future in concurrent.futures.as_completed(future_to_url):
            url = future_to_url[future]
            try:
                data = future.result()
            except Exception as exc:
                print('%r generated an exception: %s' % (url, exc))

In [6]:
main(shipment_number=340434188193333560, size=1000, max_workers=4) #340434188193324407 | 00340434188193333560

01:24:21 INFO: Generating shipment numbers with seed: 340434188193333560
01:24:22 INFO: Generated 1000 unique shipment numbers
01:24:23 INFO: Writing file ../data/00340434188193333584.json
01:24:23 INFO: Writing file ../data/00340434188193333591.json
01:24:23 INFO: Writing file ../data/00340434188193333577.json
01:24:23 INFO: Writing file ../data/00340434188193333607.json
01:24:24 INFO: Writing file ../data/00340434188193333614.json
01:24:24 INFO: Writing file ../data/00340434188193333621.json
01:24:24 INFO: Writing file ../data/00340434188193333645.json
01:24:24 INFO: Writing file ../data/00340434188193333638.json
01:24:26 INFO: Writing file ../data/00340434188193333669.json
01:24:26 INFO: Writing file ../data/00340434188193333676.json
01:24:26 INFO: Writing file ../data/00340434188193333652.json
01:24:26 INFO: Writing file ../data/00340434188193333683.json
01:24:27 INFO: Writing file ../data/00340434188193333690.json
01:24:27 INFO: Writing file ../data/00340434188193333706.json
01:24

01:25:07 INFO: Writing file ../data/00340434188193334888.json
01:25:08 INFO: Writing file ../data/00340434188193334895.json
01:25:08 INFO: Writing file ../data/00340434188193334901.json
01:25:08 INFO: Writing file ../data/00340434188193334918.json
01:25:08 INFO: Writing file ../data/00340434188193334925.json
01:25:09 INFO: Writing file ../data/00340434188193334949.json
01:25:09 INFO: Writing file ../data/00340434188193334932.json
01:25:09 INFO: Writing file ../data/00340434188193334956.json
01:25:10 INFO: Writing file ../data/00340434188193334963.json
01:25:11 INFO: Writing file ../data/00340434188193334970.json
01:25:11 INFO: Writing file ../data/00340434188193334987.json
01:25:11 INFO: Writing file ../data/00340434188193334994.json
01:25:11 INFO: Writing file ../data/00340434188193335007.json
01:25:12 INFO: Writing file ../data/00340434188193335021.json
01:25:12 INFO: Writing file ../data/00340434188193335014.json
01:25:12 INFO: Writing file ../data/00340434188193335038.json
01:25:12

01:25:53 INFO: Writing file ../data/00340434188193336219.json
01:25:54 INFO: Writing file ../data/00340434188193336226.json
01:25:54 INFO: Writing file ../data/00340434188193336233.json
01:25:54 INFO: Writing file ../data/00340434188193336240.json
01:25:55 INFO: Writing file ../data/00340434188193336257.json
01:25:55 INFO: Writing file ../data/00340434188193336264.json
01:25:55 INFO: Writing file ../data/00340434188193336271.json
01:25:55 INFO: Writing file ../data/00340434188193336288.json
01:25:56 INFO: Writing file ../data/00340434188193336295.json
01:25:56 INFO: Writing file ../data/00340434188193336301.json
01:25:56 INFO: Writing file ../data/00340434188193336318.json
01:25:56 INFO: Writing file ../data/00340434188193336325.json
01:25:57 INFO: Writing file ../data/00340434188193336332.json
01:25:58 INFO: Writing file ../data/00340434188193336349.json
01:25:58 INFO: Writing file ../data/00340434188193336356.json
01:25:58 INFO: Writing file ../data/00340434188193336363.json
01:25:59

01:26:39 INFO: Writing file ../data/00340434188193337544.json
01:26:39 INFO: Writing file ../data/00340434188193337551.json
01:26:40 INFO: Writing file ../data/00340434188193337568.json
01:26:40 INFO: Writing file ../data/00340434188193337575.json
01:26:40 INFO: Writing file ../data/00340434188193337582.json
01:26:40 INFO: Writing file ../data/00340434188193337599.json
01:26:41 INFO: Writing file ../data/00340434188193337605.json
01:26:41 INFO: Writing file ../data/00340434188193337612.json
01:26:42 INFO: Writing file ../data/00340434188193337629.json
01:26:42 INFO: Writing file ../data/00340434188193337636.json
01:26:43 INFO: Writing file ../data/00340434188193337643.json
01:26:43 INFO: Writing file ../data/00340434188193337650.json
01:26:43 INFO: Writing file ../data/00340434188193337667.json
01:26:43 INFO: Writing file ../data/00340434188193337674.json
01:26:44 INFO: Writing file ../data/00340434188193337681.json
01:26:44 INFO: Writing file ../data/00340434188193337698.json
01:26:44

01:27:26 INFO: Writing file ../data/00340434188193338879.json
01:27:27 INFO: Writing file ../data/00340434188193338886.json
01:27:27 INFO: Writing file ../data/00340434188193338909.json
01:27:28 INFO: Writing file ../data/00340434188193338893.json
01:27:28 INFO: Writing file ../data/00340434188193338916.json
01:27:28 INFO: Writing file ../data/00340434188193338923.json
01:27:29 INFO: Writing file ../data/00340434188193338930.json
01:27:29 INFO: Writing file ../data/00340434188193338954.json
01:27:29 INFO: Writing file ../data/00340434188193338947.json
01:27:30 INFO: Writing file ../data/00340434188193338961.json
01:27:30 INFO: Writing file ../data/00340434188193338978.json
01:27:30 INFO: Writing file ../data/00340434188193338992.json
01:27:30 INFO: Writing file ../data/00340434188193338985.json
01:27:31 INFO: Writing file ../data/00340434188193339005.json
01:27:31 INFO: Writing file ../data/00340434188193339012.json
01:27:32 INFO: Writing file ../data/00340434188193339029.json
01:27:32

01:28:11 INFO: Writing file ../data/00340434188193340209.json
01:28:11 INFO: Writing file ../data/00340434188193340216.json
01:28:12 INFO: Writing file ../data/00340434188193340223.json
01:28:12 INFO: Writing file ../data/00340434188193340230.json
01:28:12 INFO: Writing file ../data/00340434188193340254.json
01:28:12 INFO: Writing file ../data/00340434188193340247.json
01:28:13 INFO: Writing file ../data/00340434188193340261.json
01:28:13 INFO: Writing file ../data/00340434188193340278.json
01:28:13 INFO: Writing file ../data/00340434188193340285.json
01:28:13 INFO: Writing file ../data/00340434188193340292.json
01:28:14 INFO: Writing file ../data/00340434188193340308.json
01:28:14 INFO: Writing file ../data/00340434188193340315.json
01:28:15 INFO: Writing file ../data/00340434188193340322.json
01:28:15 INFO: Writing file ../data/00340434188193340339.json
01:28:16 INFO: Writing file ../data/00340434188193340346.json
01:28:16 INFO: Writing file ../data/00340434188193340353.json
01:28:16

01:28:55 INFO: Writing file ../data/00340434188193341534.json
01:28:55 INFO: Writing file ../data/00340434188193341541.json
01:28:55 INFO: Writing file ../data/00340434188193341558.json
01:28:56 INFO: Writing file ../data/00340434188193341565.json
01:28:57 INFO: Writing file ../data/00340434188193341572.json
01:28:57 INFO: Writing file ../data/00340434188193341589.json
01:28:57 INFO: Writing file ../data/00340434188193341596.json
01:28:58 INFO: Writing file ../data/00340434188193341602.json
01:28:58 INFO: Writing file ../data/00340434188193341619.json
01:28:58 INFO: Writing file ../data/00340434188193341626.json
01:28:58 INFO: Writing file ../data/00340434188193341633.json
01:28:59 INFO: Writing file ../data/00340434188193341640.json
01:28:59 INFO: Writing file ../data/00340434188193341657.json
01:29:00 INFO: Writing file ../data/00340434188193341664.json
01:29:00 INFO: Writing file ../data/00340434188193341688.json
01:29:00 INFO: Writing file ../data/00340434188193341671.json
01:29:01

01:29:41 INFO: Writing file ../data/00340434188193342876.json
01:29:41 INFO: Writing file ../data/00340434188193342883.json
01:29:41 INFO: Writing file ../data/00340434188193342869.json
01:29:41 INFO: Writing file ../data/00340434188193342890.json
01:29:42 INFO: Writing file ../data/00340434188193342906.json
01:29:42 INFO: Writing file ../data/00340434188193342913.json
01:29:42 INFO: Writing file ../data/00340434188193342937.json
01:29:42 INFO: Writing file ../data/00340434188193342920.json
01:29:44 INFO: Writing file ../data/00340434188193342944.json
01:29:44 INFO: Writing file ../data/00340434188193342951.json
01:29:44 INFO: Writing file ../data/00340434188193342975.json
01:29:44 INFO: Writing file ../data/00340434188193342968.json
01:29:45 INFO: Writing file ../data/00340434188193342982.json
01:29:45 INFO: Writing file ../data/00340434188193342999.json
01:29:45 INFO: Writing file ../data/00340434188193343019.json
01:29:45 INFO: Writing file ../data/00340434188193343002.json
01:29:46