# Scrabu Prototype (for a List of Shipment Numbers)

This notebook is for creating a prototype of the Scrabu project. The goal is to download DHL pages for a specific shipment number and scrap the shipment information from it.

#### Configure Logger

In [1]:
from scrabu import generate_shipment_numbers #340434188193324407|340434188193323500
from scrabu import request
from scrabu import html_to_json
from scrabu import shipment_details
from scrabu import save_dictionary
from scrabu import process_shipment_number
import concurrent

In [2]:
# Multi-threading
def main(shipment_number=None, size=None, max_workers=None, start_url="https://www.dhl.de/int-verfolgen/search?language=de&lang=de&domain=de&piececode="):
    shipment_numbers = generate_shipment_numbers(shipment_number=shipment_number, size=size)

    with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
        future_to_url = {executor.submit(process_shipment_number, sn): sn for sn in shipment_numbers}
        for future in concurrent.futures.as_completed(future_to_url):
            url = future_to_url[future]
            try:
                data = future.result()
            except Exception as exc:
                print('%r generated an exception: %s' % (url, exc))

In [3]:
main(shipment_number=340434188193324407, size=1000, max_workers=1)

09:56:17 INFO: Generating shipment numbers with seed: 340434188193324407
09:56:17 INFO: Generated 1000 unique shipment numbers
09:56:19 INFO: Writing file ../data/00340434188193324414.json
09:56:20 INFO: Writing file ../data/00340434188193324421.json
09:56:22 INFO: Writing file ../data/00340434188193324438.json
09:56:23 INFO: Writing file ../data/00340434188193324445.json
09:56:25 INFO: Writing file ../data/00340434188193324452.json
09:56:26 INFO: Writing file ../data/00340434188193324469.json
09:56:27 INFO: Writing file ../data/00340434188193324476.json
09:56:29 INFO: Writing file ../data/00340434188193324483.json
09:56:30 INFO: Writing file ../data/00340434188193324490.json
09:56:32 INFO: Writing file ../data/00340434188193324506.json
09:56:33 INFO: Writing file ../data/00340434188193324513.json
09:56:34 INFO: Writing file ../data/00340434188193324520.json
09:56:36 INFO: Writing file ../data/00340434188193324537.json
09:56:37 INFO: Writing file ../data/00340434188193324544.json
09:56

09:59:28 INFO: Writing file ../data/00340434188193325725.json
09:59:29 INFO: Writing file ../data/00340434188193325732.json
09:59:30 INFO: Writing file ../data/00340434188193325749.json
09:59:32 INFO: Writing file ../data/00340434188193325756.json
09:59:33 INFO: Writing file ../data/00340434188193325763.json
09:59:35 INFO: Writing file ../data/00340434188193325770.json
09:59:36 INFO: Writing file ../data/00340434188193325787.json
09:59:37 INFO: Writing file ../data/00340434188193325794.json
09:59:39 INFO: Writing file ../data/00340434188193325800.json
09:59:40 INFO: Writing file ../data/00340434188193325817.json
09:59:41 INFO: Writing file ../data/00340434188193325824.json
09:59:43 INFO: Writing file ../data/00340434188193325831.json
09:59:44 INFO: Writing file ../data/00340434188193325848.json
09:59:45 INFO: Writing file ../data/00340434188193325855.json
09:59:47 INFO: Writing file ../data/00340434188193325862.json
09:59:48 INFO: Writing file ../data/00340434188193325879.json
09:59:49

10:02:33 INFO: Writing file ../data/00340434188193327057.json
10:02:34 INFO: Writing file ../data/00340434188193327064.json
10:02:35 INFO: Writing file ../data/00340434188193327071.json
10:02:37 INFO: Writing file ../data/00340434188193327088.json
10:02:38 INFO: Writing file ../data/00340434188193327095.json
10:02:40 INFO: Writing file ../data/00340434188193327101.json
10:02:41 INFO: Writing file ../data/00340434188193327118.json
10:02:42 INFO: Writing file ../data/00340434188193327125.json
10:02:44 INFO: Writing file ../data/00340434188193327132.json
10:02:45 INFO: Writing file ../data/00340434188193327149.json
10:02:46 INFO: Writing file ../data/00340434188193327156.json
10:02:48 INFO: Writing file ../data/00340434188193327163.json
10:02:49 INFO: Writing file ../data/00340434188193327170.json
10:02:51 INFO: Writing file ../data/00340434188193327187.json
10:02:52 INFO: Writing file ../data/00340434188193327194.json
10:02:54 INFO: Writing file ../data/00340434188193327200.json
10:02:55

10:05:36 INFO: Writing file ../data/00340434188193328375.json
10:05:38 INFO: Writing file ../data/00340434188193328382.json
10:05:39 INFO: Writing file ../data/00340434188193328399.json
10:05:41 INFO: Writing file ../data/00340434188193328405.json
10:05:42 INFO: Writing file ../data/00340434188193328412.json
10:05:44 INFO: Writing file ../data/00340434188193328429.json
10:05:45 INFO: Writing file ../data/00340434188193328436.json
10:05:47 INFO: Writing file ../data/00340434188193328443.json
10:05:48 INFO: Writing file ../data/00340434188193328450.json
10:05:49 INFO: Writing file ../data/00340434188193328467.json
10:05:51 INFO: Writing file ../data/00340434188193328474.json
10:05:52 INFO: Writing file ../data/00340434188193328481.json
10:05:54 INFO: Writing file ../data/00340434188193328498.json
10:05:55 INFO: Writing file ../data/00340434188193328504.json
10:05:56 INFO: Writing file ../data/00340434188193328511.json
10:05:58 INFO: Writing file ../data/00340434188193328528.json
10:05:59

10:08:42 INFO: Writing file ../data/00340434188193329709.json
10:08:44 INFO: Writing file ../data/00340434188193329716.json
10:08:45 INFO: Writing file ../data/00340434188193329723.json
10:08:46 INFO: Writing file ../data/00340434188193329730.json
10:08:48 INFO: Writing file ../data/00340434188193329747.json
10:08:49 INFO: Writing file ../data/00340434188193329754.json
10:08:50 INFO: Writing file ../data/00340434188193329761.json
10:08:52 INFO: Writing file ../data/00340434188193329778.json
10:08:53 INFO: Writing file ../data/00340434188193329785.json
10:08:55 INFO: Writing file ../data/00340434188193329792.json
10:08:56 INFO: Writing file ../data/00340434188193329808.json
10:08:57 INFO: Writing file ../data/00340434188193329815.json
10:08:59 INFO: Writing file ../data/00340434188193329822.json
10:09:00 INFO: Writing file ../data/00340434188193329839.json
10:09:01 INFO: Writing file ../data/00340434188193329846.json
10:09:03 INFO: Writing file ../data/00340434188193329853.json
10:09:04

10:11:44 INFO: Writing file ../data/00340434188193331030.json
10:11:46 INFO: Writing file ../data/00340434188193331047.json
10:11:47 INFO: Writing file ../data/00340434188193331054.json
10:11:49 INFO: Writing file ../data/00340434188193331061.json
10:11:50 INFO: Writing file ../data/00340434188193331078.json
10:11:51 INFO: Writing file ../data/00340434188193331085.json
10:11:53 INFO: Writing file ../data/00340434188193331092.json
10:11:54 INFO: Writing file ../data/00340434188193331108.json
10:11:55 INFO: Writing file ../data/00340434188193331115.json
10:11:57 INFO: Writing file ../data/00340434188193331122.json
10:11:58 INFO: Writing file ../data/00340434188193331139.json
10:11:59 INFO: Writing file ../data/00340434188193331146.json
10:12:01 INFO: Writing file ../data/00340434188193331153.json
10:12:02 INFO: Writing file ../data/00340434188193331160.json
10:12:03 INFO: Writing file ../data/00340434188193331177.json
10:12:05 INFO: Writing file ../data/00340434188193331184.json
10:12:06

10:14:46 INFO: Writing file ../data/00340434188193332365.json
10:14:47 INFO: Writing file ../data/00340434188193332372.json
10:14:48 INFO: Writing file ../data/00340434188193332389.json
10:14:50 INFO: Writing file ../data/00340434188193332396.json
10:14:51 INFO: Writing file ../data/00340434188193332402.json
10:14:53 INFO: Writing file ../data/00340434188193332419.json
10:14:54 INFO: Writing file ../data/00340434188193332426.json
10:14:55 INFO: Writing file ../data/00340434188193332433.json
10:14:57 INFO: Writing file ../data/00340434188193332440.json
10:14:58 INFO: Writing file ../data/00340434188193332457.json
10:14:59 INFO: Writing file ../data/00340434188193332464.json
10:15:01 INFO: Writing file ../data/00340434188193332471.json
10:15:03 INFO: Writing file ../data/00340434188193332488.json
10:15:05 INFO: Writing file ../data/00340434188193332495.json
10:15:06 INFO: Writing file ../data/00340434188193332501.json
10:15:07 INFO: Writing file ../data/00340434188193332518.json
10:15:09

10:17:48 INFO: Writing file ../data/00340434188193333690.json
10:17:49 INFO: Writing file ../data/00340434188193333706.json
10:17:50 INFO: Writing file ../data/00340434188193333713.json
10:17:52 INFO: Writing file ../data/00340434188193333720.json
10:17:53 INFO: Writing file ../data/00340434188193333737.json
10:17:54 INFO: Writing file ../data/00340434188193333744.json
10:17:56 INFO: Writing file ../data/00340434188193333751.json
10:17:57 INFO: Writing file ../data/00340434188193333768.json
10:17:59 INFO: Writing file ../data/00340434188193333775.json
10:18:00 INFO: Writing file ../data/00340434188193333782.json
10:18:01 INFO: Writing file ../data/00340434188193333799.json
10:18:03 INFO: Writing file ../data/00340434188193333805.json
10:18:04 INFO: Writing file ../data/00340434188193333812.json
10:18:05 INFO: Writing file ../data/00340434188193333829.json
10:18:07 INFO: Writing file ../data/00340434188193333836.json
10:18:08 INFO: Writing file ../data/00340434188193333843.json
10:18:09