## Download files

This script will download the data required to reproduce the main figures.

> In order to download the files, you will need the base-url that is provided with the submission of the manuscript. 
>
> Once the paper is accepted, the files will be made publicly available. 


In [None]:
import getpass

base_url = getpass.getpass("Please enter the base URL")

In [None]:
import os
import requests
import gzip
import shutil
from tqdm import tqdm


def download_and_extract_file(file: str, base_url: str, force: bool = False):
    """Downloads and extracts a file from a specified base URL.
    Args:
        file (str): Name of the file to download.
        base_url (str): Base URL where the file is located.
        force (bool, optional): If True, re-downloads the file even if
                                it exists locally. Defaults to False.
    Raises:
        requests.exceptions.RequestException: If there is an error during download.
        (OSError, ValueError): If there is an error during extraction.
    """
    print(f"Checking file {file}")
    # Ensure base_url ends with a slash
    if not base_url.endswith("/"):
        base_url += "/"
    full_url = f"{base_url}{file}"
    basename = os.path.basename(file)
    download_path = f"{file}"
    if basename.endswith(".gz"):
        download_path_extracted = download_path[:-3]
    else:
        download_path_extracted = download_path

    # Check if file exists locally and compare sizes
    local_file_exists = os.path.exists(download_path_extracted)
    should_download = force

    if local_file_exists and not force:
        try:
            # Get the size of the online file
            response = requests.head(full_url)
            response.raise_for_status()
            online_size = int(response.headers.get("content-length", 0))

            # Get the size of the local file
            local_size = os.path.getsize(download_path_extracted)

            if online_size != local_size:
                print(
                    f"   Local file size ({local_size} bytes) differs from online file size ({online_size} bytes)."
                )
                should_download = True
            else:
                print(f"   File {file} is already downloaded and has the correct size.")
                return
        except requests.exceptions.RequestException as e:
            print(f"   Error checking online file: {e}")
            return

    if should_download:
        if local_file_exists:
            os.remove(download_path_extracted)
            print(f"      Removing existing file: {download_path_extracted}")
        print(f"   Downloading file: {file}")
    else:
        print(f"   File {file} is already downloaded and up to date.")
        return

    # Create the destination folder if it doesn't exist
    os.makedirs(os.path.dirname(download_path), exist_ok=True)

    try:
        response = requests.get(full_url, stream=True)
        response.raise_for_status()  # Raise an exception for error status codes
        total_size = int(response.headers.get("content-length", 0))

        with open(download_path, "wb") as f, tqdm(
            desc=file,
            total=total_size,
            unit="iB",
            unit_scale=True,
            unit_divisor=1024,
        ) as progress_bar:
            for chunk in response.iter_content(chunk_size=8192):
                size = f.write(chunk)
                progress_bar.update(size)

        if basename.endswith(".gz"):
            print(f"Extracting {download_path}")
            with gzip.open(download_path, "rb") as f_in:
                with open(download_path_extracted, "wb") as f_out:
                    shutil.copyfileobj(f_in, f_out)
            os.remove(download_path)  # Remove the compressed archive
            print(f"Extraction complete: {download_path_extracted}")

    except requests.exceptions.RequestException as e:
        print(f"   Error downloading file: {e}")
    except (OSError, ValueError) as e:
        print(f"   Error extracting file: {e}")

In [None]:
files = [
    "xenium_output/day8_r2/morphology_mip.ome.tif",
    "xenium_output/day8_r2/experiment.xenium",
    "xenium_output/human_09_r2/morphology_mip.ome.tif",
    "xenium_output/human_09_r2/experiment.xenium",
    "transcripts/transcripts_figure_5c.csv",
    "images/day8_r2_h_and_e_alignment_gan.npy",
    "images/human_09_r2_IF_alignment.npy",
    "images/human_09_r2_h_and_e_alignment_gan.npy",
    "images/day8_r2_IF_alignment.npy",
    "adata/human.h5ad",
    "adata/human_09_r2_with_transcripts.h5ad",
    "adata/tgfb.h5ad",
    "adata/day8_r2_with_transcripts.h5ad",
    "adata/timecourse.h5ad",
    "adata/uninfected.h5ad",
    "adata/perturb.h5ad",
    "adata/visium_hd.h5ad",
    "IF/timecourse/day 120.txt",
    "IF/timecourse/day 060.txt",
    "IF/timecourse/day 006.txt",
    "IF/timecourse/day 007.txt",
    "IF/timecourse/day 005.txt",
]

In [None]:
for f in files:
    download_and_extract_file(f, base_url, force=False)