# Scrabu Prototype (for a List of Shipment Numbers)

This notebook is for creating a prototype of the Scrabu project. The goal is to download DHL pages for a specific shipment number and scrap the shipment information from it.

#### Configure Logger

In [1]:
from scrabu import generate_shipment_numbers #340434188193324407|340434188193323500
from scrabu import request
from scrabu import html_to_json
from scrabu import shipment_details
from scrabu import save_dictionary
from scrabu import process_shipment_number
import concurrent

In [2]:
# Multi-threading
def main(shipment_number=None, size=None, max_workers=None, start_url="https://www.dhl.de/int-verfolgen/search?language=de&lang=de&domain=de&piececode="):
    list_of_shipment_numbers = [340434174857037035, 340434188193324407, 340434188193323500, 340434188193333560, 
                               340434188193343569]
    for shipment_number in list_of_shipment_numbers:
        shipment_numbers = generate_shipment_numbers(shipment_number=shipment_number, size=size)

        with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
            future_to_url = {executor.submit(process_shipment_number, sn): sn for sn in shipment_numbers}
            for future in concurrent.futures.as_completed(future_to_url):
                url = future_to_url[future]
                try:
                    data = future.result()
                except Exception as exc:
                    print('%r generated an exception: %s' % (url, exc))

In [None]:
main(shipment_number=340434188193333560, size=1000, max_workers=4) #340434188193324407 | 00340434188193333560

05:09:38 INFO: Generating shipment numbers with seed: 340434174857037035
05:09:38 INFO: Generated 1000 unique shipment numbers
05:09:39 INFO: No events found for shipment number 00340434174857037073
05:09:39 INFO: No events found for shipment number 00340434174857037059
05:09:39 INFO: No events found for shipment number 00340434174857037042
05:09:39 INFO: No events found for shipment number 00340434174857037066
05:09:41 INFO: No events found for shipment number 00340434174857037103
05:09:41 INFO: No events found for shipment number 00340434174857037097
05:09:41 INFO: No events found for shipment number 00340434174857037110
05:09:41 INFO: No events found for shipment number 00340434174857037080
05:09:42 INFO: No events found for shipment number 00340434174857037127
05:09:42 INFO: No events found for shipment number 00340434174857037134
05:09:42 INFO: No events found for shipment number 00340434174857037158
05:09:42 INFO: No events found for shipment number 00340434174857037141
05:09:43 

05:10:18 INFO: No events found for shipment number 00340434174857038186
05:10:18 INFO: No events found for shipment number 00340434174857038209
05:10:18 INFO: No events found for shipment number 00340434174857038155
05:10:18 INFO: No events found for shipment number 00340434174857038193
05:10:19 INFO: No events found for shipment number 00340434174857038216
05:10:20 INFO: No events found for shipment number 00340434174857038247
05:10:20 INFO: No events found for shipment number 00340434174857038223
05:10:20 INFO: No events found for shipment number 00340434174857038230
05:10:20 INFO: No events found for shipment number 00340434174857038254
05:10:21 INFO: No events found for shipment number 00340434174857038261
05:10:21 INFO: No events found for shipment number 00340434174857038278
05:10:21 INFO: No events found for shipment number 00340434174857038285
05:10:21 INFO: No events found for shipment number 00340434174857038292
05:10:22 INFO: No events found for shipment number 0034043417485