# Copyright Disclaimer

We, the authors of this work, hereby disclaim all copyright interest in the notebooks and code submitted as part of the PRC Data Challenge 2024, which predicts Estimated Aircraft Take-Off Mass. 

Signed, 

Antonio P. Barata <br>
Bernard Bronmans <br>
Victor Ciulei <br>

28.10.2024

In [1]:
import os
from pathlib import Path
from pyopensky.s3 import S3Client
from concurrent.futures import ThreadPoolExecutor

def download_object(s3, obj, data_folder):
    s3.download_object(
        obj,
        filename=Path(data_folder),
    )

def parallel_download(s3=None, objects=None, data_folder=None, max_workers=None):
    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        for obj in objects:
            executor.submit(
                download_object, 
                s3, 
                obj, 
                data_folder
            )

s3 = S3Client()
data_folder = os.path.join(os.getcwd(), "data")
already_downloaded = os.listdir(data_folder)
objects_to_download = [
    obj for obj in s3.s3client.list_objects("competition-data", recursive=True)
    if obj.object_name not in already_downloaded
]
parallel_download(
    s3=s3, 
    objects=objects_to_download, 
    data_folder=data_folder, 
    max_workers=8 # larger numbers may choke the server
)


100%|██████████████████████████████████████████████████████████████████████████████████| 28/28 [00:02<00:00, 11.24Mb/s]
