# Scrabu Prototype (for a List of Shipment Numbers)

This notebook is for creating a prototype of the Scrabu project. The goal is to download DHL pages for a specific shipment number and scrap the shipment information from it.

#### Configure Logger

In [1]:
from scrabu import generate_shipment_numbers #340434188193324407|340434188193323500
from scrabu import request
from scrabu import html_to_json
from scrabu import shipment_details
from scrabu import save_dictionary
from scrabu import process_shipment_number
import concurrent

In [2]:
# Multi-threading
def main(shipment_number=None, size=None, max_workers=None, start_url="https://www.dhl.de/int-verfolgen/search?language=de&lang=de&domain=de&piececode="):
    shipment_numbers = generate_shipment_numbers(shipment_number=shipment_number, size=size)

    with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
        future_to_url = {executor.submit(process_shipment_number, sn): sn for sn in shipment_numbers}
        for future in concurrent.futures.as_completed(future_to_url):
            url = future_to_url[future]
            try:
                data = future.result()
            except Exception as exc:
                print('%r generated an exception: %s' % (url, exc))

In [3]:
main(shipment_number=340434188193324407, size=10, max_workers=1)

05:49:49 INFO: Generating shipment numbers with seed: 340434188193324407
05:49:49 INFO: Generated 10 unique shipment numbers
05:49:50 INFO: Writing file ../data/00340434188193324414.json
05:49:51 INFO: Writing file ../data/00340434188193324421.json
05:49:52 INFO: Writing file ../data/00340434188193324438.json
05:49:54 INFO: Writing file ../data/00340434188193324445.json
05:49:55 INFO: Writing file ../data/00340434188193324452.json
05:49:56 INFO: Writing file ../data/00340434188193324469.json
05:49:57 INFO: Writing file ../data/00340434188193324476.json
05:49:59 INFO: Writing file ../data/00340434188193324483.json
05:50:00 INFO: Writing file ../data/00340434188193324490.json
05:50:02 INFO: Writing file ../data/00340434188193324506.json
