# Scrabu Prototype (for a List of Shipment Numbers)

This notebook is for creating a prototype of the Scrabu project. The goal is to download DHL pages for a specific shipment number and scrap the shipment information from it.

#### Configure Logger

In [None]:
from scrabu import generate_shipment_numbers 
from scrabu import request
from scrabu import html_to_json
from scrabu import shipment_details
from scrabu import save_dictionary
from scrabu import process_shipment_number
import concurrent

In [None]:
# Multi-threading
def main(shipment_number=None, size=None, max_workers=None, start_url="https://www.dhl.de/int-verfolgen/search?language=de&lang=de&domain=de&piececode="):
    
    shipment_numbers = generate_shipment_numbers(shipment_number=shipment_number, size=size)

    with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
        future_to_url = {executor.submit(process_shipment_number, sn): sn for sn in shipment_numbers}
        for future in concurrent.futures.as_completed(future_to_url):
            url = future_to_url[future]
            try:
                data = future.result()
            except Exception as exc:
                print('%r generated an exception: %s' % (url, exc))

In [None]:
list_of_shipment_numbers = [340434174857037035, 340434188193324407, 340434188193323500, 340434188193333560, 340434188193343569, 340434154352820677, 340434174857780306, 340434174857972756, 340434154847504129, 340434193824427518, 340434193824499577, 340434193824500372, 384279070327316725, 340434311100776215, 340434188193365875]
for shipment_number in list_of_shipment_numbers:
    main(shipment_number=shipment_number, size=40, max_workers=4) 