## Extract and crop 2D Data from RAW .CZI
The main goal is to create 2D images from 3D data to do the training 

In [2]:
import os
import numpy as np
from czifile import imread as read_czi
from tifffile import imread, imwrite
from skimage.io import imsave
from tqdm import tqdm
import warnings
from collections import defaultdict

parent_path = r'C:\Users\Alex\Desktop\Mailis_lightsheet'
data_path = os.path.join(parent_path, 'data')                # Raw CZI files
extracted_path = os.path.join(parent_path, 'extracted_tiff') # Slices extracted from CZI
cropped_path = os.path.join(parent_path, 'cropped_tiff')     # Final cropped 2D images

os.makedirs(extracted_path, exist_ok=True)
os.makedirs(cropped_path, exist_ok=True)

### Utility to pad image index (for nice ordering)

In [3]:

def zero_pad(i, width=3):
    return f"{i:0{width}d}"

warnings.simplefilter("ignore", UserWarning)

### Extract 2D slices from 3D .CZI files 

In [4]:
czi_files = [f for f in os.listdir(data_path) if f.lower().endswith('.czi')]
print(f"\n Found {len(czi_files)} CZI files to process.\n")

for file_idx, filename in enumerate(czi_files, 1):
    print(f"[{file_idx}/{len(czi_files)}] Extracting slices from: {filename}")
    in_path = os.path.join(data_path, filename)
    image = read_czi(in_path)
    image = np.squeeze(image)

    if image.ndim == 4:
        image = image[0]  # Select channel 0 if present

    if image.ndim != 3:
        raise ValueError(f"Unexpected image shape: {image.shape} in {filename}")

    for i in tqdm(range(0, image.shape[0], 25), desc='Saving Z-slices'):
        image2d = image[i, :, :]
        out_name = f"{os.path.splitext(filename)[0]}_z{zero_pad(i)}.tif"
        out_path = os.path.join(extracted_path, out_name)
        imsave(out_path, image2d.astype(np.uint16))

print("\n Step 1 complete: 2D slices extracted from CZI files.")



 Found 16 CZI files to process.

[1/16] Extracting slices from: M20E1.czi


Saving Z-slices: 100%|█████████████████████████████████████████████████████████████████| 16/16 [00:02<00:00,  7.48it/s]


[2/16] Extracting slices from: M20E2.czi


Saving Z-slices: 100%|█████████████████████████████████████████████████████████████████| 19/19 [00:01<00:00,  9.73it/s]


[3/16] Extracting slices from: M20E3.czi


Saving Z-slices: 100%|█████████████████████████████████████████████████████████████████| 19/19 [00:01<00:00, 12.50it/s]


[4/16] Extracting slices from: M20E4.czi


Saving Z-slices: 100%|█████████████████████████████████████████████████████████████████| 17/17 [00:01<00:00,  9.69it/s]


[5/16] Extracting slices from: M20E6.czi


Saving Z-slices: 100%|█████████████████████████████████████████████████████████████████| 18/18 [00:01<00:00, 12.35it/s]


[6/16] Extracting slices from: M20E6bis.czi


Saving Z-slices: 100%|█████████████████████████████████████████████████████████████████| 18/18 [00:01<00:00, 13.40it/s]


[7/16] Extracting slices from: M21E2.czi


Saving Z-slices: 100%|█████████████████████████████████████████████████████████████████| 19/19 [00:01<00:00, 13.23it/s]


[8/16] Extracting slices from: M21E3.czi


Saving Z-slices: 100%|█████████████████████████████████████████████████████████████████| 18/18 [00:01<00:00, 16.11it/s]


[9/16] Extracting slices from: M21E4.czi


Saving Z-slices: 100%|█████████████████████████████████████████████████████████████████| 20/20 [00:01<00:00, 12.31it/s]


[10/16] Extracting slices from: M21E5.czi


Saving Z-slices: 100%|█████████████████████████████████████████████████████████████████| 18/18 [00:01<00:00, 10.79it/s]


[11/16] Extracting slices from: M21E6.czi


Saving Z-slices: 100%|█████████████████████████████████████████████████████████████████| 17/17 [00:01<00:00, 10.94it/s]


[12/16] Extracting slices from: MockE1.czi


Saving Z-slices: 100%|█████████████████████████████████████████████████████████████████| 26/26 [00:02<00:00,  9.30it/s]


[13/16] Extracting slices from: MockE2.czi


Saving Z-slices: 100%|█████████████████████████████████████████████████████████████████| 15/15 [00:01<00:00,  8.06it/s]


[14/16] Extracting slices from: MockE3.czi


Saving Z-slices: 100%|█████████████████████████████████████████████████████████████████| 22/22 [00:02<00:00, 10.51it/s]


[15/16] Extracting slices from: MockE4.czi


Saving Z-slices: 100%|█████████████████████████████████████████████████████████████████| 16/16 [00:01<00:00,  8.78it/s]


[16/16] Extracting slices from: MockE5.czi


Saving Z-slices: 100%|█████████████████████████████████████████████████████████████████| 15/15 [00:01<00:00,  8.87it/s]


 Step 1 complete: 2D slices extracted from CZI files.





### STEP 2: Crop all 2D TIFF images to the same (Y, X) size 

In [6]:
# Group by prefix (e.g. M20E1_z025 → M20E1)
stacks = defaultdict(list)
for fname in sorted(os.listdir(extracted_path)):
    if fname.endswith('.tif'):
        prefix = fname.split('_z')[0]
        stacks[prefix].append(os.path.join(extracted_path, fname))

# Find smallest shape among all images
min_shape = None
for file_list in stacks.values():
    for path in file_list:
        img = imread(path)
        if min_shape is None:
            min_shape = img.shape
        else:
            min_shape = np.minimum(min_shape, img.shape)

print(f"\n  Cropping all images to size: {min_shape}")

# Crop and save to final output folder
for prefix, files in stacks.items():
    for f in sorted(files):
        img = imread(f)
        cropped = img[:min_shape[0], :min_shape[1]]
        out_name = os.path.basename(f)
        out_path = os.path.join(cropped_path, out_name)
        imwrite(out_path, cropped.astype(np.uint16))

print(f"\n Step 2 complete: All cropped images saved to: {cropped_path}")


  Cropping all images to size: [2478 1856]

 Step 2 complete: All cropped images saved to: C:\Users\Alex\Desktop\Mailis_lightsheet\cropped_tiff
