In [1]:
import numpy as np
import h5py
from pathlib import Path
import imageio as iio
import re

In [2]:
def natural_sort(l):
    """Sort a list of strings in a way that considers numerical values within the strings.
    
    For example, natural_sort(["img2.png", "img10.png", "img1.png"])
    will return ["img1.png", "img2.png", "img10.png"].
    
    Args:
        l (list): List of strings to sort.
    
    Returns:
        list: List of sorted strings.
    """
    l = [x.as_posix() if isinstance(x, Path) else x for x in l]
    convert = lambda text: int(text) if text.isdigit() else text.lower()
    alphanum_key = lambda key: [convert(c) for c in re.split('([0-9]+)', key)]
    return sorted(l, key=alphanum_key)

In [3]:
base_src_dir = "Arabidopsis"  # Adjust to your source directory
base_dst_dir = "Arabidopsis_time_lapse_videos_20240807/h5s_preds_by_frame"  # Adjust to your destination directory
genotypes = range(1,7) # 1 to 6
img_numbers = range(1, 73)  # 1 to 72
days = range(1, 19)  # 1 to 18
overwrite = False # When overwrite=True existing files are overwritten
print(f"genotypes: {genotypes}")
print(f"image numbers: {img_numbers}")
print(f"days: {days}")

genotypes: range(1, 7)
image numbers: range(1, 73)
days: range(1, 19)


In [4]:
base_src_dir = Path(base_src_dir)
base_dst_dir = Path(base_dst_dir)

# Create the destination directory if it doesn't exist
base_dst_dir.mkdir(parents=True, exist_ok=True)
print(f"Destination directory: {base_dst_dir}")

# Iterate over genotypes and replicates
for genotype in genotypes:
    for replicate_dir in (base_src_dir / str(genotype)).glob('*d/R*'):
        replicate = replicate_dir.name
        h5_name = base_dst_dir / f"g{genotype}_r{replicate}.h5"
        print(f"\nProcessing genotype {genotype}, replicate {replicate}")

        if not overwrite and h5_name.exists():
            print(f"Skipping {h5_name} as it already exists and overwrite is False.")
            continue

        images = []

        # Iterate over days
        for day in days:
            print(f"day: {day}")
            day_dir = base_src_dir / str(genotype) / f"{day}d" / replicate
            if not day_dir.exists():
                continue
            
            # Collect image paths for the current day
            day_images = []
            missing_images = False
            for img_number in img_numbers:
                img_path = day_dir / f"{img_number}.png"
                if img_path.exists():
                    day_images.append(img_path)
                else:
                    print(f"  Missing image: {img_path}")
                    missing_images = True

            # Skip this day if any images are missing
            if missing_images:
                print(f"Skipping day {day} for genotype {genotype}, replicate {replicate} due to missing images.")
                continue
            
            # Sort the images for the current day
            day_images = natural_sort(day_images)
            
            # Read and append images to the list
            for img_path in day_images:
                images.append(iio.imread(img_path))

        if images:
            vol = np.stack(images, axis=0)  # Stack images to create a volume (slices, height, width)
            # Save as h5
            with h5py.File(h5_name, "w") as f:
                f.create_dataset(
                    "vol",
                    data=np.expand_dims(vol, axis=-1),  # Add channel dimension
                    compression="gzip",  # Using gzip compression
                    compression_opts=1  # Compression level
                )
            print(f"Time-lapse video for genotype {genotype}, replicate {replicate} saved successfully as {h5_name}.")
        else:
            print(f"No images found for genotype {genotype}, replicate {replicate}. Skipping.")



Destination directory: Arabidopsis_time_lapse_videos_20240807\h5s_preds_by_frame

Processing genotype 1, replicate R1
day: 1




day: 2
day: 3
day: 4
day: 5
day: 6
day: 7
day: 8
day: 9
day: 10
day: 11
day: 12
  Missing image: Arabidopsis\1\12d\R1\2.png
  Missing image: Arabidopsis\1\12d\R1\3.png
  Missing image: Arabidopsis\1\12d\R1\4.png
  Missing image: Arabidopsis\1\12d\R1\5.png
  Missing image: Arabidopsis\1\12d\R1\6.png
  Missing image: Arabidopsis\1\12d\R1\7.png
  Missing image: Arabidopsis\1\12d\R1\8.png
  Missing image: Arabidopsis\1\12d\R1\9.png
  Missing image: Arabidopsis\1\12d\R1\10.png
  Missing image: Arabidopsis\1\12d\R1\11.png
  Missing image: Arabidopsis\1\12d\R1\12.png
  Missing image: Arabidopsis\1\12d\R1\13.png
  Missing image: Arabidopsis\1\12d\R1\14.png
  Missing image: Arabidopsis\1\12d\R1\15.png
  Missing image: Arabidopsis\1\12d\R1\16.png
  Missing image: Arabidopsis\1\12d\R1\17.png
  Missing image: Arabidopsis\1\12d\R1\18.png
  Missing image: Arabidopsis\1\12d\R1\19.png
  Missing image: Arabidopsis\1\12d\R1\20.png
  Missing image: Arabidopsis\1\12d\R1\21.png
  Missing image: Arabidopsis