In [1]:
import numpy as np
import h5py
from pathlib import Path
import imageio as iio
import re
import logging

In [2]:
# set up logging
logging.basicConfig(level=logging.INFO)

In [3]:
def natural_sort(l):
    """Sort a list of strings in a way that considers numerical values within the strings.
    
    For example, natural_sort(["img2.png", "img10.png", "img1.png"])
    will return ["img1.png", "img2.png", "img10.png"].
    
    Args:
        l (list): List of strings to sort.
    
    Returns:
        list: List of sorted strings.
    """
    l = [x.as_posix() if isinstance(x, Path) else x for x in l]
    convert = lambda text: int(text) if text.isdigit() else text.lower()
    alphanum_key = lambda key: [convert(c) for c in re.split('([0-9]+)', key)]
    return sorted(l, key=alphanum_key)

In [4]:
base_src_dir = "H:/users/eberrigan/20250225_Elohim_Bello_Exp02_timelapse_images/Rice"  # Adjust to your source directory
base_dst_dir = "H:/users/eberrigan/20250225_Elohim_Bello_Exp02_timelapse_images/Rice_time_lapse_videos_20250225/h5s_preds_by_frame"  # Adjust to your destination directory
genotypes = range(1,7) # 1 to 6
img_numbers = range(1, 73)  # 1 to 72
days = range(1, 15)  # 1 to 14
replicates = range(1, 7) # 1 to 6
overwrite = False # When overwrite=True existing files are overwritten
logging.info(f"genotypes: {genotypes}")
logging.info(f"image numbers: {img_numbers}")
logging.info(f"days: {days}")

INFO:root:genotypes: range(1, 7)
INFO:root:image numbers: range(1, 73)
INFO:root:days: range(1, 15)


In [5]:
# This dataset is organized as genotype -> replicate -> timepoint -> image
# genotypes are numbered 1 to 6
# replicates are numbered 1 to 6
# timepoints are numbered 1 to 19
# images are numbered 1 to 72

base_src_dir = Path(base_src_dir)
base_dst_dir = Path(base_dst_dir)

# Create the destination directory if it doesn't exist
base_dst_dir.mkdir(parents=True, exist_ok=True)
logging.info(f"Destination directory: {base_dst_dir}")

# Iterate over genotypes and replicates
for genotype in genotypes:
    for replicate in replicates:
        h5_name = base_dst_dir / f"g{genotype}_r{replicate}.h5"
        logging.info(f"\nProcessing genotype {genotype}, replicate {replicate}")

        if not overwrite and h5_name.exists():
            logging.info(f"Skipping {h5_name} as it already exists and overwrite is False.")
            continue

        images = []

        # Iterate over days
        for day in days:
            day_dir = base_src_dir / str(genotype) / str(replicate) / str(day)
            logging.info(f"Day directory: {day_dir}")
            if not day_dir.exists():
                continue
            
            # Collect image paths for the current day
            day_images = []
            missing_images = False
            for img_number in img_numbers:
                img_path = day_dir / f"{img_number}.png"
                if img_path.exists():
                    day_images.append(img_path)
                else:
                    logging.warning(f"Missing image: {img_path}")
                    missing_images = True

            # Skip this day if any images are missing
            if missing_images:
                logging.warning(f"Skipping day {day} for genotype {genotype}, replicate {replicate} due to missing images.")
                continue
            
            # Sort the images for the current day
            day_images = natural_sort(day_images)
            
            # Read and append images to the list
            for img_path in day_images:
                try:
                    img = iio.imread(img_path)
                    images.append(img)
                except Exception as e:
                    logging.error(f"Error reading {img_path}: {e}")
                    continue

        if images:
            try:
                vol = np.stack(images, axis=0)  # Stack images to create a volume (slices, height, width)

                # Save as h5
                with h5py.File(h5_name, "w") as f:
                    f.create_dataset(
                        "vol",
                        data=np.expand_dims(vol, axis=-1),  # Add channel dimension
                        compression="gzip",  # Using gzip compression
                        compression_opts=1  # Higher compression level
                    )
                logging.info(f"Time-lapse video for genotype {genotype}, replicate {replicate} saved successfully as {h5_name}.")
            except ValueError as e:
                logging.error(f"Error stacking images for {h5_name}: {e}")
        else:
            logging.info(f"No images found for genotype {genotype}, replicate {replicate}. Skipping.")



INFO:root:Destination directory: H:\users\eberrigan\20250225_Elohim_Bello_Exp02_timelapse_images\Rice_time_lapse_videos_20250225\h5s_preds_by_frame
INFO:root:
Processing genotype 1, replicate 1
INFO:root:Day directory: H:\users\eberrigan\20250225_Elohim_Bello_Exp02_timelapse_images\Rice\1\1\1
  img = iio.imread(img_path)
INFO:root:Day directory: H:\users\eberrigan\20250225_Elohim_Bello_Exp02_timelapse_images\Rice\1\1\2
INFO:root:Day directory: H:\users\eberrigan\20250225_Elohim_Bello_Exp02_timelapse_images\Rice\1\1\3
INFO:root:Day directory: H:\users\eberrigan\20250225_Elohim_Bello_Exp02_timelapse_images\Rice\1\1\4
INFO:root:Day directory: H:\users\eberrigan\20250225_Elohim_Bello_Exp02_timelapse_images\Rice\1\1\5
INFO:root:Day directory: H:\users\eberrigan\20250225_Elohim_Bello_Exp02_timelapse_images\Rice\1\1\6
INFO:root:Day directory: H:\users\eberrigan\20250225_Elohim_Bello_Exp02_timelapse_images\Rice\1\1\7
INFO:root:Day directory: H:\users\eberrigan\20250225_Elohim_Bello_Exp02_timel