In [None]:
import numpy as np
import h5py
from pathlib import Path
import imageio as iio
import re

In [2]:
def natural_sort(l):
    """Sort a list of strings in a way that considers numerical values within the strings.
    
    For example, natural_sort(["img2.png", "img10.png", "img1.png"])
    will return ["img1.png", "img2.png", "img10.png"].
    
    Args:
        l (list): List of strings to sort.
    
    Returns:
        list: List of sorted strings.
    """
    l = [x.as_posix() if isinstance(x, Path) else x for x in l]
    convert = lambda text: int(text) if text.isdigit() else text.lower()
    alphanum_key = lambda key: [convert(c) for c in re.split('([0-9]+)', key)]
    return sorted(l, key=alphanum_key)

In [3]:
base_src_dir = "Rice"  # Adjust to your source directory
base_dst_dir = "Rice_time_lapse_videos_one_frame_per_day_20240808/h5s_preds_by_frame"  # Adjust to your destination directory
genotypes = range(1,7) # 1 to 6
img_numbers = range(1, 73)  # 1 to 72
days = range(1, 15)  # 1 to 14
overwrite = False # When overwrite=True existing files are overwritten
print(f"genotypes: {genotypes}")
print(f"image numbers: {img_numbers}")
print(f"days: {days}")

genotypes: range(1, 7)
image numbers: range(1, 73)
days: range(1, 15)


In [4]:
base_src_dir = Path(base_src_dir)
base_dst_dir = Path(base_dst_dir)

# Create the destination directory if it doesn't exist
base_dst_dir.mkdir(parents=True, exist_ok=True)
print(f"Destination directory: {base_dst_dir}")

# Iterate over genotypes and replicates
for genotype in genotypes:
    for replicate_dir in (base_src_dir / str(genotype)).glob('*d/R*'):
        replicate = replicate_dir.name
        h5_name = base_dst_dir / f"g{genotype}_r{replicate}.h5"
        print(f"\nProcessing genotype {genotype}, replicate {replicate}")

        if not overwrite and h5_name.exists():
            print(f"Skipping {h5_name} as it already exists and overwrite is False.")
            continue

        available_frames = []
        selected_images = []

        # Iterate over days
        for day in days:
            print(f"day: {day}")
            day_dir = base_src_dir / str(genotype) / f"{day}d" / replicate
            if not day_dir.exists():
                continue
            
            # Collect available image indices for the current day
            day_images = []
            for img_number in img_numbers:
                img_path = day_dir / f"{img_number}.png"
                if img_path.exists():
                    day_images.append(img_number)

            if day_images:
                available_frames.append(set(day_images))

        # Find the common frame index available across all days
        if available_frames:
            common_frames = set.intersection(*available_frames)
            if not common_frames:
                print(f"No common frames available for genotype {genotype}, replicate {replicate}. Skipping.")
                continue

            # Select the lowest common frame index (or any other criteria you prefer)
            selected_frame = min(common_frames)
            print(f"Selected frame {selected_frame} for genotype {genotype}, replicate {replicate}.")

            # Iterate over days again to collect the selected frame images
            for day in days:
                day_dir = base_src_dir / str(genotype) / f"{day}d" / replicate
                img_path = day_dir / f"{selected_frame}.png"
                if img_path.exists():
                    selected_images.append(iio.imread(img_path))
                else:
                    print(f"  Missing selected frame {selected_frame} for day {day}.")

        if selected_images:
            vol = np.stack(selected_images, axis=0)  # Stack images to create a volume (slices, height, width)
            # Save as h5
            with h5py.File(h5_name, "w") as f:
                f.create_dataset(
                    "vol",
                    data=np.expand_dims(vol, axis=-1),  # Add channel dimension
                    compression="gzip",  # Using gzip compression
                    compression_opts=1  # Compression level
                )
            print(f"Time-lapse video for genotype {genotype}, replicate {replicate} saved successfully as {h5_name}.")
        else:
            print(f"No images found for genotype {genotype}, replicate {replicate}. Skipping.")

Destination directory: Rice_time_lapse_videos_one_frame_per_day_20240808\h5s_preds_by_frame

Processing genotype 1, replicate R1
Skipping Rice_time_lapse_videos_one_frame_per_day_20240808\h5s_preds_by_frame\g1_rR1.h5 as it already exists and overwrite is False.

Processing genotype 1, replicate R2
Skipping Rice_time_lapse_videos_one_frame_per_day_20240808\h5s_preds_by_frame\g1_rR2.h5 as it already exists and overwrite is False.

Processing genotype 1, replicate R3
Skipping Rice_time_lapse_videos_one_frame_per_day_20240808\h5s_preds_by_frame\g1_rR3.h5 as it already exists and overwrite is False.

Processing genotype 1, replicate R4
Skipping Rice_time_lapse_videos_one_frame_per_day_20240808\h5s_preds_by_frame\g1_rR4.h5 as it already exists and overwrite is False.

Processing genotype 1, replicate R5
day: 1
day: 2
day: 3
day: 4
day: 5
day: 6
day: 7
day: 8
day: 9
day: 10
day: 11
day: 12
day: 13
day: 14
Selected frame 1 for genotype 1, replicate R5.




Time-lapse video for genotype 1, replicate R5 saved successfully as Rice_time_lapse_videos_one_frame_per_day_20240808\h5s_preds_by_frame\g1_rR5.h5.

Processing genotype 1, replicate R1
Skipping Rice_time_lapse_videos_one_frame_per_day_20240808\h5s_preds_by_frame\g1_rR1.h5 as it already exists and overwrite is False.

Processing genotype 1, replicate R2
Skipping Rice_time_lapse_videos_one_frame_per_day_20240808\h5s_preds_by_frame\g1_rR2.h5 as it already exists and overwrite is False.

Processing genotype 1, replicate R3
Skipping Rice_time_lapse_videos_one_frame_per_day_20240808\h5s_preds_by_frame\g1_rR3.h5 as it already exists and overwrite is False.

Processing genotype 1, replicate R4
Skipping Rice_time_lapse_videos_one_frame_per_day_20240808\h5s_preds_by_frame\g1_rR4.h5 as it already exists and overwrite is False.

Processing genotype 1, replicate R5
Skipping Rice_time_lapse_videos_one_frame_per_day_20240808\h5s_preds_by_frame\g1_rR5.h5 as it already exists and overwrite is False.

P

In [5]:
# base_src_dir = Path(base_src_dir)
# base_dst_dir = Path(base_dst_dir)

# # Create the destination directory if it doesn't exist
# base_dst_dir.mkdir(parents=True, exist_ok=True)
# print(f"Destination directory: {base_dst_dir}")

# # "Arabidopsis\6\1d\R1\1.png‚Äù
# # "<Species>\<genotype_int>\<age_in_days>d\R<replicate_number>\<frame_index>.png"

# # Iterate over treatments and replicates
# for treatment_replicate_dir in base_src_dir.glob('*/*'):  # Adjust if necessary to match your directory structure
#     treatment_replicate = treatment_replicate_dir.name
#     print(f"\nProcessing treatment and replicate: {treatment_replicate}")
#     tr_dst_dir = base_dst_dir / treatment_replicate
#     tr_dst_dir.mkdir(parents=True, exist_ok=True)
    
#     # Iterate over image numbers
#     for img_number in img_numbers:
#         h5_name = tr_dst_dir / f"frame_{img_number}.h5"
#         print(f"  Processing frame {img_number}...")

#         if not overwrite and h5_name.exists():
#             print(f"  Skipping frame {img_number} as it already exists and overwrite is False.")
#             continue

#         images = []
#         # Get a list of all day directories, sort them naturally
#         day_dirs = natural_sort([x for x in base_src_dir.glob('*') if x.is_dir()])
#         print(f"  Days found and sorted: {[Path(day).name for day in day_dirs]}")
#         # Iterate over each sorted day directory
#         for day_dir in day_dirs:
#             img_path = Path(day_dir) / treatment_replicate / f"{img_number}.png"
#             if img_path.exists():
#                 images.append(iio.imread(img_path))
#             else:
#                 print(f"  Missing image: {img_path}")

#         if images:
#             vol = np.stack(images, axis=0)  # Stack images to create a volume (slices, height, width)
#             # Save as h5
#             with h5py.File(h5_name, "w") as f:
#                 f.create_dataset(
#                     "vol",
#                     data=np.expand_dims(vol, axis=-1),  # Add channel dimension
#                     compression="gzip",  # Using gzip compression
#                     compression_opts=1  # Compression level
#                 )
#             print(f"  Frame {img_number} saved successfully.")
#         else:
#             print(f"  No images found for frame {img_number} in {treatment_replicate}. Skipping.")
