# Downloading data

In [1]:
# !pip install "cloudpathlib[azure]" loguru tqdm typer

In [1]:
from pathlib import Path

from cloudpathlib import AzureBlobClient, AzureBlobPath
from loguru import logger
from tqdm.contrib.concurrent import process_map

def download_path(path: AzureBlobPath):
    """Downloads a single cloud path."""
    try:
        if path.is_file():
            path.fspath  # downloads cloud asset to local_cache_dir
        return {"path": path, "status": "success"}
    except Exception as exc:
        logger.debug(f"Failed to download {path}. {exc}")
        return {"path": path, "status": "failed", "message": str(exc)}


sas_url = "https://cloudcoverdatawesteurope.blob.core.windows.net/public?se=2022-08-01T12%3A00Z&sp=rl&sv=2018-11-09&sr=c&sig=DrqaBLSI9t1nnx1sekyPaMgsqMiO9%2BBzjU/JwDhfQ64%3D"
cloud_directory = "az://."
local_directory = "data"
"""Downloads the challenge dataset to your local machine."""
if Path(sas_url).exists():
    logger.info(f"Loading SAS URL from {sas_url}")
    sas_url = Path(sas_url).read_text().strip()

client = AzureBlobClient(account_url=sas_url, local_cache_dir=local_directory)
directory = client.CloudPath(cloud_directory)
logger.info("Retrieving path list.")
path_list = [
    path
    for path in directory.rglob("*")
    if path._path.suffix.lower() in (".tif", ".geojson")
]

2021-12-20 22:21:03.727 | INFO     | __main__:<module>:28 - Retrieving path list.


In [2]:
import os
missing_path_list = [blob for blob in path_list if not os.path.exists(blob.as_uri().replace("az://./", "data/"))]
len(missing_path_list)

10

In [3]:
logger.info(f"Downloading {len(missing_path_list)} files.")
results = process_map(download_path, missing_path_list, total=len(missing_path_list), chunksize=10)
failures = [result for result in results if result["status"] == "failed"]
if len(failures) > 0:
    logger.warning(f"{len(failures)} files failed to download.")

2021-12-20 22:22:31.175 | INFO     | __main__:<module>:1 - Downloading 10 files.


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=10.0), HTML(value='')))


