In [1]:
from tqdm.auto import tqdm
import requests
from pathlib import Path
import zipfile

In [2]:
dataset_dir = Path("dataset")
dataset_dir.mkdir(exist_ok=True)

In [3]:
# data is from here https://zenodo.org/records/5040271
scenes = {
    "url": "https://zenodo.org/records/5040271/files/Landsat8_L1.zip?download=1",
    "filename": "Landsat8_L1.zip",
}
val_data = {
    "url": "https://zenodo.org/records/5040271/files/PixBox-L8-CMIX.zip?download=1",
    "filename": "PixBox-L8-CMIX.zip",
}

In [4]:
def download_file(url: str, filepath: Path, force_download=False):
    if filepath.exists() and not force_download:
        print(f"File {filepath} already exists. Skipping download.")
        return
    response = requests.get(url, stream=True)

    total_size = int(response.headers.get("content-length", 0))
    block_size = 1024

    with tqdm(
        total=total_size, unit="B", unit_scale=True, desc=f"Downloading {filepath.name}"
    ) as progress_bar:
        with open(filepath, "wb") as file:
            for data in response.iter_content(block_size):
                progress_bar.update(len(data))
                file.write(data)

In [None]:
def download_and_extract(url: str, file_path: Path, force_download=False):
    """
    Download a file from a URL and extract it if it's a zip file.
    """
    download_file(url, file_path, force_download)

    try:
        with zipfile.ZipFile(file_path, "r") as zip_ref:
            print(f"Extracting {file_path.name}...")
            if file_path.name == "Landsat8_L1.zip":
                zip_ref.extractall(dataset_dir / "Landsat8_L1")
            else:
                zip_ref.extractall(dataset_dir)
    except zipfile.BadZipFile:
        assert (
            force_download is not True
        ), """Faild to unzip even after a 
        fresh download."""
        print(f"Error: {file_path} is not a valid zip file.")
        print("Trying to download again...")
        download_and_extract(url, file_path, force_download=True)
        return

In [6]:
for data in [val_data, scenes]:
    url = data["url"]
    filename = data["filename"]
    file_path = dataset_dir / filename
    download_and_extract(url, file_path)

File dataset/PixBox-L8-CMIX.zip already exists. Skipping download.
File dataset/Landsat8_L1.zip already exists. Skipping download.
