# Dataset downloader for confocal demo

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Abhishek-Gupta-GitHub/confocal_microscopy-copilot/blob/main/notebooks/download_datasets.ipynb)

This notebook downloads and generates small confocal-style datasets into the `data/` folder:
- External: endothelial 3D confocal z-stack + OME-TIFF sample.
- DeepTrack2: tutorial 2D/3D example files.
- DeepTrack2: synthetic particle data (2D+t).



Python (repo handling)

In [8]:
import os
from pathlib import Path

IN_COLAB = "google.colab" in str(get_ipython())
ROOT = Path(".").resolve()

if IN_COLAB:
    print("Running in Colab. Cloning repo...")
    GITHUB_USER = "Abhishek-Gupta-GitHub"
    REPO_NAME = "confocal_microscopy-copilot"
    REPO_URL = f"https://github.com/{GITHUB_USER}/{REPO_NAME}.git"

    if not Path("/content", REPO_NAME).exists():
        !git clone {REPO_URL} /content/{REPO_NAME}
    %cd /content/{REPO_NAME}
    ROOT = Path(".").resolve()
else:
    print("Running locally. Make sure you run this from the repo root.")
    print("Current working directory:", os.getcwd())

ROOT, (ROOT / "data").mkdir(exist_ok=True)



Running locally. Make sure you run this from the repo root.
Current working directory: c:\Users\akumarg\Desktop\microscopy hackathon\confocal_microscopy-copilot


(WindowsPath('C:/Users/akumarg/Desktop/microscopy hackathon/confocal_microscopy-copilot'),
 None)

Python (imports and helpers)

In [4]:
import json
import urllib.request
from pathlib import Path

ROOT = Path(".").resolve()
DATA = ROOT / "data"
DATA.mkdir(exist_ok=True)
print("Data directory:", DATA)

def download_file(url: str, out_path: Path):
    out_path.parent.mkdir(parents=True, exist_ok=True)
    if out_path.exists():
        print(f"Already exists: {out_path}")
        return
    print(f"Downloading {url} -> {out_path}")
    urllib.request.urlretrieve(url, out_path)
    print("Done.")

def save_metadata(path: Path, meta: dict):
    meta_path = path.parent / "metadata.json"
    if meta_path.exists():
        print(f"Metadata exists: {meta_path}")
        return
    with open(meta_path, "w") as f:
        json.dump(meta, f, indent=2)
    print(f"Saved metadata: {meta_path}")


Data directory: C:\Users\akumarg\Desktop\microscopy hackathon\confocal_microscopy-copilot\data


Python (ImageJ public confocal dataset)

In [25]:
def get_imagej_confocal_stack():
    """
    Example confocal z-series from ImageJ public datasets (real biological data).[web:131][web:113]
    """
    url = "https://wsr.imagej.net/ij/images/confocal-series.zip"

    out_dir = DATA / "imagej_confocal"
    zip_file = out_dir / "confocal-series.zip"

    download_file(url, zip_file, use_wget=True)

    meta = {
        "description": "Confocal z-series stack from ImageJ demo images (e.g., C. elegans embryo).",
        "source": "ImageJ public demo images.",
        "url": "https://imagej.net/plugins/public-data-sets",
        "pixel_size_xy_um": 1.0,   # keep as placeholder unless you know true calibration
        "z_step_um": 1.0,
        "frame_interval_s": None,
        "approx_particle_diameter_um": 2.0,
        "sample_type": "confocal z-series (biology)",
        "dimensions": "3D",
        "notes": "Unzip to get individual TIFF slices; stack them in your loader."
    }
    save_metadata(zip_file, meta)

get_imagej_confocal_stack()





Already exists: C:\Users\akumarg\Desktop\microscopy hackathon\confocal_microscopy-copilot\data\imagej_confocal\confocal-series.zip
Metadata exists: C:\Users\akumarg\Desktop\microscopy hackathon\confocal_microscopy-copilot\data\imagej_confocal\metadata.json


Python (DeepTrack2 tutorial data: clone + copy)

In [15]:
DT_CLONE_DIR = ROOT / "_deeptrack2_tmp"
DEEPTRACK_REPO = "https://github.com/DeepTrackAI/DeepTrack2.git"
DT_TUTORIAL_DIR = DT_CLONE_DIR / "tutorials" / "1-getting-started"

def clone_deeptrack2():
    if DT_CLONE_DIR.exists():
        print(f"DeepTrack2 already cloned at {DT_CLONE_DIR}")
        return
    print(f"Cloning DeepTrack2 into {DT_CLONE_DIR} ...")
    run_cmd(["git", "clone", "--depth", "1", DEEPTRACK_REPO, str(DT_CLONE_DIR)])
    print("Clone finished.")

def copy_file(src: Path, dst: Path):
    dst.parent.mkdir(parents=True, exist_ok=True)
    if dst.exists():
        print(f"Already exists: {dst}")
        return
    print(f"Copying {src} -> {dst}")
    shutil.copy2(src, dst)

def prepare_deeptrack_2d():
    data_dir = DT_TUTORIAL_DIR / "data"
    if not data_dir.exists():
        print(f"WARNING: tutorial data directory not found: {data_dir}")
        return
    candidates = sorted(data_dir.glob("*2D*")) or sorted(data_dir.glob("*"))
    if not candidates:
        print(f"No files found in {data_dir}")
        return
    src_file = candidates[0]
    out_dir = DATA / "deeptrack_2D"
    out_file = out_dir / src_file.name

    copy_file(src_file, out_file)

    meta = {
        "description": "2D particle example from DeepTrack2 'getting-started' tutorial.",
        "source": "DeepTrack2 tutorials/1-getting-started.",
        "url": "https://github.com/DeepTrackAI/DeepTrack2",
        "pixel_size_xy_um": 1.0,
        "z_step_um": None,
        "frame_interval_s": 1.0,
        "approx_particle_diameter_um": 2.0,
        "sample_type": "synthetic particle image (2D)",
        "dimensions": "2D or 2D+t (depending on file)",
        "notes": "Copied from DeepTrack2 tutorial data; refine metadata once inspected."
    }
    save_metadata(out_dir / "metadata.json", meta)

def prepare_deeptrack_3d():
    data_dir = DT_TUTORIAL_DIR / "data"
    if not data_dir.exists():
        print(f"WARNING: tutorial data directory not found: {data_dir}")
        return
    candidates = sorted(data_dir.glob("*3D*")) or sorted(data_dir.glob("*"))
    if not candidates:
        print(f"No files found in {data_dir}")
        return
    src_file = candidates[0]
    out_dir = DATA / "deeptrack_3D"
    out_file = out_dir / src_file.name

    copy_file(src_file, out_file)

    meta = {
        "description": "3D (or multi-plane) particle example from DeepTrack2 'getting-started' tutorial.",
        "source": "DeepTrack2 tutorials/1-getting-started.",
        "url": "https://github.com/DeepTrackAI/DeepTrack2",
        "pixel_size_xy_um": 1.0,
        "z_step_um": 1.0,
        "frame_interval_s": 1.0,
        "approx_particle_diameter_um": 2.0,
        "sample_type": "synthetic particle volume (3D)",
        "dimensions": "3D or 3D+t (depending on file)",
        "notes": "Copied from DeepTrack2 tutorial data; refine metadata once inspected."
    }
    save_metadata(out_dir / "metadata.json", meta)

clone_deeptrack2()
prepare_deeptrack_2d()
prepare_deeptrack_3d()


Cloning DeepTrack2 into C:\Users\akumarg\Desktop\microscopy hackathon\confocal_microscopy-copilot\_deeptrack2_tmp ...
Running: git clone --depth 1 https://github.com/DeepTrackAI/DeepTrack2.git C:\Users\akumarg\Desktop\microscopy hackathon\confocal_microscopy-copilot\_deeptrack2_tmp
Clone finished.


 Python (DeepTrack2 synthetic generator)

In [20]:
!pip install deeptrack

Defaulting to user installation because normal site-packages is not writeable



[notice] A new release of pip is available: 23.2.1 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip


NumPy synthetic generator

In [23]:
import numpy as np
import tifffile

def generate_synthetic_gaussian_2D_t():
    """
    Generate a small 2D+t synthetic particle dataset:
    - T frames of size (Y, X)
    - A few diffusing Gaussian blobs
    Saved to data/synthetic_gaussian/.
    """
    out_dir = DATA / "synthetic_gaussian"
    out_dir.mkdir(parents=True, exist_ok=True)
    out_file_npy = out_dir / "synthetic_2D_t.npy"
    out_file_tif = out_dir / "synthetic_2D_t.tif"

    # Parameters
    image_size = 64
    n_frames = 50
    n_particles = 10
    diffusion_sigma_px = 1.0  # step std per frame
    blob_sigma_px = 1.5

    # Initialize random initial positions
    rng = np.random.default_rng(42)
    positions = rng.uniform(8, image_size - 8, size=(n_particles, 2))

    def draw_frame(pos):
        y, x = np.indices((image_size, image_size))
        frame = np.zeros((image_size, image_size), dtype=np.float32)
        for (py, px) in pos:
            frame += np.exp(-((x - px) ** 2 + (y - py) ** 2) / (2 * blob_sigma_px ** 2))
        # Normalize and add small noise
        frame /= frame.max() + 1e-6
        frame += rng.normal(0, 0.02, size=frame.shape).astype(np.float32)
        frame = np.clip(frame, 0, 1)
        return frame

    stack = []
    for t in range(n_frames):
        stack.append(draw_frame(positions))
        # Brownian step
        positions += rng.normal(0, diffusion_sigma_px, size=positions.shape)
        positions = np.clip(positions, 5, image_size - 5)

    stack = np.stack(stack, axis=0)  # (T, Y, X)
    np.save(out_file_npy, stack)
    tifffile.imwrite(out_file_tif, (stack * 65535).astype("uint16"))

    meta = {
        "description": "Synthetic 2D+t Gaussian-blob particle dataset (Brownian motion).",
        "source": "Generated locally using NumPy (no DeepTrack generator).",
        "url": "https://soft-matter.github.io/trackpy",  # conceptually similar to Trackpy demos[web:139]
        "pixel_size_xy_um": 0.1,
        "z_step_um": None,
        "frame_interval_s": 0.1,
        "approx_particle_diameter_um": 0.5,
        "sample_type": "synthetic Brownian particles (2D+t)",
        "dimensions": "2D+t",
        "notes": "Designed to be similar in spirit to Trackpy sample videos."
    }
    save_metadata(out_file_tif, meta)

    print("Generated synthetic stack:", stack.shape)
    print("Saved:", out_file_npy, "and", out_file_tif)

generate_synthetic_gaussian_2D_t()



Saved metadata: C:\Users\akumarg\Desktop\microscopy hackathon\confocal_microscopy-copilot\data\synthetic_gaussian\metadata.json
Generated synthetic stack: (50, 64, 64)
Saved: C:\Users\akumarg\Desktop\microscopy hackathon\confocal_microscopy-copilot\data\synthetic_gaussian\synthetic_2D_t.npy and C:\Users\akumarg\Desktop\microscopy hackathon\confocal_microscopy-copilot\data\synthetic_gaussian\synthetic_2D_t.tif


Python (quick tree + metadata check)

In [24]:
from pprint import pprint

print("Data folder tree:\n")
for p in sorted(DATA.rglob("*")):
    print(p.relative_to(ROOT))

print("\nExample metadata content:\n")
for meta_file in DATA.rglob("metadata.json"):
    print("----", meta_file.relative_to(ROOT))
    with open(meta_file) as f:
        pprint(json.load(f))
    print()


Data folder tree:

data\endosomes_2D_t
data\imagej_confocal
data\imagej_confocal\confocal-series.zip
data\imagej_confocal\metadata.json
data\ome_tiff_samples
data\synthetic_deeptrack
data\synthetic_gaussian
data\synthetic_gaussian\metadata.json
data\synthetic_gaussian\synthetic_2D_t.npy
data\synthetic_gaussian\synthetic_2D_t.tif

Example metadata content:

---- data\imagej_confocal\metadata.json
{'approx_particle_diameter_um': 2.0,
 'description': 'Confocal z-series stack from ImageJ demo images (e.g., C. '
                'elegans embryo).',
 'dimensions': '3D',
 'frame_interval_s': None,
 'notes': 'Unzip to get individual TIFF slices; stack them in your loader.',
 'pixel_size_xy_um': 1.0,
 'sample_type': 'confocal z-series (biology)',
 'source': 'ImageJ public demo images.',
 'url': 'https://imagej.net/plugins/public-data-sets',
 'z_step_um': 1.0}

---- data\synthetic_gaussian\metadata.json
{'approx_particle_diameter_um': 0.5,
 'description': 'Synthetic 2D+t Gaussian-blob particle da