In [14]:
import zarr
import numpy as np
import cv2
import matplotlib.pyplot as plt
from skimage.exposure import rescale_intensity 
from ipywidgets import HBox, Image
from IPython.display import display
import io

def convert_to_8bit(vol):
    pmin = np.percentile(vol, 0.0)
    pmax = np.percentile(vol, 99.9)
    return rescale_intensity(vol, in_range=(pmin, pmax), out_range=np.uint8)

paths_r = ['TS_5_4', 'TS_6_4', 'TS_6_6', 'TS_69_2', 'TS_73_6', 'TS_86_3' , 'TS_99_9']

In [15]:


for r in paths_r:
    set_type = 'train'
    zarr_path = f'data/{set_type}/static/ExperimentRuns/{r}/VoxelSpacing10.000/denoised.zarr'

    try:
        vol = zarr.open(zarr_path, mode='r')
        vol = vol[0]
        vol2 = convert_to_8bit(vol)
        n_imgs = vol2.shape[0]

        print(f"This is {r} and there are {n_imgs} images")

        image_widgets = []
        for i in range(n_imgs):
            # Create tmp_img directly from the slice with correct dimensions
            tmp_img = vol2[i]
            inp_arr = np.stack([tmp_img] * 3, axis=-1)

            # Remove resizing
            inp_arr = cv2.resize(inp_arr, (640, 640))

            # Convert to PNG bytes for display with ipywidgets.Image
            img_bytes = io.BytesIO()
            plt.imsave(img_bytes, inp_arr, format='png')
            img_widget = Image(value=img_bytes.getvalue())
            image_widgets.append(img_widget)

        # Display the images using HBox for horizontal scrolling
        hbox = HBox(image_widgets)
        print(f"Displaying slices for {r}:")
        display(hbox)

    except Exception as e:
        print(f"An error occurred while processing {r}: {e}")

print("Finished displaying images.")

This is TS_5_4 and there are 184 images
Displaying slices for TS_5_4:


HBox(children=(Image(value=b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x02\x80\x00\x00\x02\x80\x08\x06\x00\x…

This is TS_6_4 and there are 184 images
Displaying slices for TS_6_4:


HBox(children=(Image(value=b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x02\x80\x00\x00\x02\x80\x08\x06\x00\x…

This is TS_6_6 and there are 184 images
Displaying slices for TS_6_6:


HBox(children=(Image(value=b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x02\x80\x00\x00\x02\x80\x08\x06\x00\x…

This is TS_69_2 and there are 184 images
Displaying slices for TS_69_2:


HBox(children=(Image(value=b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x02\x80\x00\x00\x02\x80\x08\x06\x00\x…

This is TS_73_6 and there are 184 images
Displaying slices for TS_73_6:


HBox(children=(Image(value=b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x02\x80\x00\x00\x02\x80\x08\x06\x00\x…

This is TS_86_3 and there are 184 images
Displaying slices for TS_86_3:


HBox(children=(Image(value=b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x02\x80\x00\x00\x02\x80\x08\x06\x00\x…

This is TS_99_9 and there are 184 images
Displaying slices for TS_99_9:


HBox(children=(Image(value=b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x02\x80\x00\x00\x02\x80\x08\x06\x00\x…

Finished displaying images.


In [16]:

import os


output_dir = 'yolo_data/images'

# Create the main output directory if it doesn't exist
os.makedirs(output_dir, exist_ok=True)
if not os.path.exists('yolo_data/images/TS_5_4'):
    for r in paths_r:
        set_type = 'train'
        zarr_path = f'data/{set_type}/static/ExperimentRuns/{r}/VoxelSpacing10.000/denoised.zarr'
        ts_output_dir = os.path.join(output_dir, r)
        os.makedirs(ts_output_dir, exist_ok=True)  # Create subfolder for each TS

        try:
            vol = zarr.open(zarr_path, mode='r')
            vol = vol[0]
            vol2 = convert_to_8bit(vol)
            n_imgs = vol2.shape[0]

            print(f"Processing {r} and there are {n_imgs} images")

            for i in range(n_imgs):
                tmp_img = vol2[i]
                inp_arr = np.stack([tmp_img] * 3, axis=-1)
                inp_arr = cv2.resize(inp_arr, (640, 640))

                # Save the image to the corresponding subfolder
                image_filename = os.path.join(ts_output_dir, f"{i:04d}.png") # Added formatting for index
                cv2.imwrite(image_filename, inp_arr)

            print(f"Saved {n_imgs} images for {r} in {ts_output_dir}")

        except Exception as e:
            print(f"An error occurred while processing {r}: {e}")

    print("Finished saving images.")
else:
    print('Images are already extracted')

Processing TS_5_4 and there are 184 images
Saved 184 images for TS_5_4 in yolo_data/images\TS_5_4
Processing TS_6_4 and there are 184 images
Saved 184 images for TS_6_4 in yolo_data/images\TS_6_4
Processing TS_6_6 and there are 184 images
Saved 184 images for TS_6_6 in yolo_data/images\TS_6_6
Processing TS_69_2 and there are 184 images
Saved 184 images for TS_69_2 in yolo_data/images\TS_69_2
Processing TS_73_6 and there are 184 images
Saved 184 images for TS_73_6 in yolo_data/images\TS_73_6
Processing TS_86_3 and there are 184 images
Saved 184 images for TS_86_3 in yolo_data/images\TS_86_3
Processing TS_99_9 and there are 184 images
Saved 184 images for TS_99_9 in yolo_data/images\TS_99_9
Finished saving images.


In [10]:
import os
import json

def find_min_max_xyz(path):
    """
    Finds the minimum and maximum 'x', 'y', and 'z' values in all JSON files within the given path.

    Args:
        path: The path to the directory containing subfolders with JSON files.

    Returns:
        A tuple containing the minimum and maximum 'x', 'y', and 'z' values found,
        or None if no 'x', 'y', or 'z' values are found.
    """
    min_x = float('inf')
    max_x = float('-inf')
    min_y = float('inf')
    max_y = float('-inf')
    min_z = float('inf')
    max_z = float('-inf')
    found_x = False
    found_y = False
    found_z = False

    for root, _, files in os.walk(path):
        for file in files:
            if file.endswith(".json"):
                filepath = os.path.join(root, file)
                try:
                    with open(filepath, 'r') as f:
                        data = json.load(f)
                        if 'points' in data and isinstance(data['points'], list):
                            for point in data['points']:
                                if 'location' in point and isinstance(point['location'], dict):
                                    location = point['location']
                                    if 'x' in location:
                                        x_value = location['x']
                                        min_x = min(min_x, x_value)
                                        max_x = max(max_x, x_value)
                                        found_x = True
                                    if 'y' in location:
                                        y_value = location['y']
                                        min_y = min(min_y, y_value)
                                        max_y = max(max_y, y_value)
                                        found_y = True
                                    if 'z' in location:
                                        z_value = location['z']
                                        min_z = min(min_z, z_value)
                                        max_z = max(max_z, z_value)
                                        found_z = True
                except json.JSONDecodeError:
                    print(f"Error decoding JSON in file: {filepath}")
                except Exception as e:
                    print(f"Error processing file {filepath}: {e}")

    if found_x or found_y or found_z:
        return (min_x if found_x else None,
                max_x if found_x else None,
                min_y if found_y else None,
                max_y if found_y else None,
                min_z if found_z else None,
                max_z if found_z else None)
    else:
        return None

if __name__ == "__main__":
    path_to_data = r"data\train\overlay\ExperimentRuns"

    result = find_min_max_xyz(path_to_data)

    if result:
        min_x, max_x, min_y, max_y, min_z, max_z = result
        if min_x is not None:
            print(f"Minimum 'x' value found: {min_x}")
        if max_x is not None:
            print(f"Maximum 'x' value found: {max_x}")
            print(50*'-')
        if min_y is not None:
            print(f"Minimum 'y' value found: {min_y}")
        if max_y is not None:
            print(f"Maximum 'y' value found: {max_y}")
            print(50*'-')
        if min_z is not None:
            print(f"Minimum 'z' value found: {min_z}")
        if max_z is not None:
            print(f"Maximum 'z' value found: {max_z}")
    else:
        print(f"No 'x', 'y', or 'z' values found in the JSON files within the path: {path_to_data}")

Minimum 'x' value found: 69.928
Maximum 'x' value found: 6229.592
--------------------------------------------------
Minimum 'y' value found: 86.606
Maximum 'y' value found: 6266.944
--------------------------------------------------
Minimum 'z' value found: 39.085
Maximum 'z' value found: 1549.302


In [17]:
i2p = {
  0 : 'virus',
  1 : 'apo-ferritin',
  2 : 'beta-galactosidase',
  3 : 'ribosome',
  4 : 'thyroglobulin'
}

p2i = {
  'virus' : 0 ,
  'apo-ferritin': 1 ,
  'beta-galactosidase' : 2 ,
  'ribosome' : 3 ,
  'thyroglobulin' : 4 
}

particle_radius = {
        'virus': 140,
        'apo-ferritin': 60,
        'beta-galactosidase': 90,
        'ribosome': 150,
        'thyroglobulin': 130,   
    }

# the radius devided by the voxel spacing, and the result devided by the X shape of the input image (640x640), and then x2 to get the diameter represented in that image
width_height_particles = { 
        'virus': ((particle_radius['virus']/10)/640) * 2, 
        'apo-ferritin': ((particle_radius['apo-ferritin']/10)/640) * 2,
        'beta-galactosidase': ((particle_radius['beta-galactosidase']/10)/640) * 2,
        'ribosome': ((particle_radius['ribosome']/10)/640) * 2,
        'thyroglobulin': ((particle_radius['thyroglobulin']/10)/640) * 2,   
    
}

In [21]:
for i in range(5):
  print(width_height_particles[i2p[i]])

0.04375
0.01875
0.028125
0.046875
0.040625


In [7]:
from ultralytics import YOLO
model = YOLO("yolo11x.pt")
model.info()

Downloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11x.pt to 'yolo11x.pt'...


100%|██████████| 109M/109M [00:02<00:00, 50.2MB/s] 


YOLO11x summary: 631 layers, 56,966,176 parameters, 0 gradients, 196.0 GFLOPs


(631, 56966176, 0, 195.9587328)

In [12]:
img_path = 'Untitled.png' # 'TS_69_2_1100.png'
results = model(img_path)
results[0].show()


image 1/1 e:\ML Projects\Kaggle\CryoET\Untitled.png: 640x640 (no detections), 355.3ms
Speed: 3.7ms preprocess, 355.3ms inference, 0.5ms postprocess per image at shape (1, 3, 640, 640)
