In [26]:
import zarr
import numpy as np
import cv2
import matplotlib.pyplot as plt
from skimage.exposure import rescale_intensity 
from ipywidgets import HBox, Image
from IPython.display import display
import io
import os
import json

def convert_to_8bit(vol):
    pmin = np.percentile(vol, 0.0)
    pmax = np.percentile(vol, 99.9)
    return rescale_intensity(vol, in_range=(pmin, pmax), out_range=np.uint8)

paths_r = ['TS_5_4', 'TS_6_4', 'TS_6_6', 'TS_69_2', 'TS_73_6', 'TS_86_3' , 'TS_99_9']

In [None]:


for r in paths_r:
    set_type = 'train'
    zarr_path = f'data/{set_type}/static/ExperimentRuns/{r}/VoxelSpacing10.000/denoised.zarr'

    try:
        vol = zarr.open(zarr_path, mode='r')
        vol = vol[0]
        vol2 = convert_to_8bit(vol)
        n_imgs = vol2.shape[0]

        print(f"This is {r} and there are {n_imgs} images")

        image_widgets = []
        for i in range(n_imgs):
            # Create tmp_img directly from the slice with correct dimensions
            tmp_img = vol2[i]
            inp_arr = np.stack([tmp_img] * 3, axis=-1)

            # Remove resizing
            inp_arr = cv2.resize(inp_arr, (640, 640))

            # Convert to PNG bytes for display with ipywidgets.Image
            img_bytes = io.BytesIO()
            plt.imsave(img_bytes, inp_arr, format='png')
            img_widget = Image(value=img_bytes.getvalue())
            image_widgets.append(img_widget)

        # Display the images using HBox for horizontal scrolling
        hbox = HBox(image_widgets)
        print(f"Displaying slices for {r}:")
        display(hbox)

    except Exception as e:
        print(f"An error occurred while processing {r}: {e}")

print("Finished displaying images.")

In [None]:



output_dir = 'yolo_data/images'

# Create the main output directory if it doesn't exist
os.makedirs(output_dir, exist_ok=True)
if not os.path.exists('yolo_data/images/TS_5_4'):
    for r in paths_r:
        set_type = 'train'
        zarr_path = f'data/{set_type}/static/ExperimentRuns/{r}/VoxelSpacing10.000/denoised.zarr'
        ts_output_dir = os.path.join(output_dir, r)
        os.makedirs(ts_output_dir, exist_ok=True)  # Create subfolder for each TS

        try:
            vol = zarr.open(zarr_path, mode='r')
            vol = vol[0]
            vol2 = convert_to_8bit(vol)
            n_imgs = vol2.shape[0]

            print(f"Processing {r} and there are {n_imgs} images")

            for i in range(n_imgs):
                tmp_img = vol2[i]
                inp_arr = np.stack([tmp_img] * 3, axis=-1)
                inp_arr = cv2.resize(inp_arr, (640, 640))

                # Save the image to the corresponding subfolder
                image_filename = os.path.join(ts_output_dir, f"{i}.png") # Added formatting for index
                cv2.imwrite(image_filename, inp_arr)

            print(f"Saved {n_imgs} images for {r} in {ts_output_dir}")

        except Exception as e:
            print(f"An error occurred while processing {r}: {e}")

    print("Finished saving images.")
else:
    print('Images are already extracted')

Processing TS_5_4 and there are 184 images
Saved 184 images for TS_5_4 in yolo_data/images\TS_5_4
Processing TS_6_4 and there are 184 images
Saved 184 images for TS_6_4 in yolo_data/images\TS_6_4
Processing TS_6_6 and there are 184 images
Saved 184 images for TS_6_6 in yolo_data/images\TS_6_6
Processing TS_69_2 and there are 184 images
Saved 184 images for TS_69_2 in yolo_data/images\TS_69_2
Processing TS_73_6 and there are 184 images
Saved 184 images for TS_73_6 in yolo_data/images\TS_73_6
Processing TS_86_3 and there are 184 images
Saved 184 images for TS_86_3 in yolo_data/images\TS_86_3
Processing TS_99_9 and there are 184 images
Saved 184 images for TS_99_9 in yolo_data/images\TS_99_9
Finished saving images.


In [10]:


def find_min_max_xyz(path):
    """
    Finds the minimum and maximum 'x', 'y', and 'z' values in all JSON files within the given path.

    Args:
        path: The path to the directory containing subfolders with JSON files.

    Returns:
        A tuple containing the minimum and maximum 'x', 'y', and 'z' values found,
        or None if no 'x', 'y', or 'z' values are found.
    """
    min_x = float('inf')
    max_x = float('-inf')
    min_y = float('inf')
    max_y = float('-inf')
    min_z = float('inf')
    max_z = float('-inf')
    found_x = False
    found_y = False
    found_z = False

    for root, _, files in os.walk(path):
        for file in files:
            if file.endswith(".json"):
                filepath = os.path.join(root, file)
                try:
                    with open(filepath, 'r') as f:
                        data = json.load(f)
                        if 'points' in data and isinstance(data['points'], list):
                            for point in data['points']:
                                if 'location' in point and isinstance(point['location'], dict):
                                    location = point['location']
                                    if 'x' in location:
                                        x_value = location['x']
                                        min_x = min(min_x, x_value)
                                        max_x = max(max_x, x_value)
                                        found_x = True
                                    if 'y' in location:
                                        y_value = location['y']
                                        min_y = min(min_y, y_value)
                                        max_y = max(max_y, y_value)
                                        found_y = True
                                    if 'z' in location:
                                        z_value = location['z']
                                        min_z = min(min_z, z_value)
                                        max_z = max(max_z, z_value)
                                        found_z = True
                except json.JSONDecodeError:
                    print(f"Error decoding JSON in file: {filepath}")
                except Exception as e:
                    print(f"Error processing file {filepath}: {e}")

    if found_x or found_y or found_z:
        return (min_x if found_x else None,
                max_x if found_x else None,
                min_y if found_y else None,
                max_y if found_y else None,
                min_z if found_z else None,
                max_z if found_z else None)
    else:
        return None

if __name__ == "__main__":
    path_to_data = r"data\train\overlay\ExperimentRuns"

    result = find_min_max_xyz(path_to_data)

    if result:
        min_x, max_x, min_y, max_y, min_z, max_z = result
        if min_x is not None:
            print(f"Minimum 'x' value found: {min_x}")
        if max_x is not None:
            print(f"Maximum 'x' value found: {max_x}")
            print(50*'-')
        if min_y is not None:
            print(f"Minimum 'y' value found: {min_y}")
        if max_y is not None:
            print(f"Maximum 'y' value found: {max_y}")
            print(50*'-')
        if min_z is not None:
            print(f"Minimum 'z' value found: {min_z}")
        if max_z is not None:
            print(f"Maximum 'z' value found: {max_z}")
    else:
        print(f"No 'x', 'y', or 'z' values found in the JSON files within the path: {path_to_data}")

Minimum 'x' value found: 69.928
Maximum 'x' value found: 6229.592
--------------------------------------------------
Minimum 'y' value found: 86.606
Maximum 'y' value found: 6266.944
--------------------------------------------------
Minimum 'z' value found: 39.085
Maximum 'z' value found: 1549.302


In [50]:
i2p = {
  0 : 'virus-like-particle',
  1 : 'apo-ferritin',
  2 : 'beta-galactosidase',
  3 : 'ribosome',
  4 : 'thyroglobulin'
}

p2i = {
  'virus-like-particle' : 0 ,
  'apo-ferritin': 1 ,
  'beta-galactosidase' : 2 ,
  'ribosome' : 3 ,
  'thyroglobulin' : 4 
}

particle_radius = {
        'virus-like-particle': 140,
        'apo-ferritin': 60,
        'beta-galactosidase': 90,
        'ribosome': 150,
        'thyroglobulin': 130,   
    }

# the radius devided by the voxel spacing, and the result devided by the X shape of the input image (630x630), and then x2 to get the diameter represented in that image
width_height_particles = { 
        'virus-like-particle': ((particle_radius['virus-like-particle']/10)/630) * 2, 
        'apo-ferritin': ((particle_radius['apo-ferritin']/10)/630) * 2,
        'beta-galactosidase': ((particle_radius['beta-galactosidase']/10)/630) * 2,
        'ribosome': ((particle_radius['ribosome']/10)/630) * 2,
        'thyroglobulin': ((particle_radius['thyroglobulin']/10)/630) * 2,   
    
}

In [44]:
base_dir = r'data\train\overlay\ExperimentRuns'

def round_to_nearest_ten(z):
    """Rounds an integer to the nearest multiple of 10."""
    remainder = z % 10
    if remainder < 5:
        return z - remainder
    else:
        return z + (10 - remainder)

for subdir, dirs, files in os.walk(base_dir):
    for filename in files:
        if filename.endswith('.json'):
            filepath = os.path.join(subdir, filename)
            try:
                with open(filepath, 'r') as f:
                    data = json.load(f)

                # Check if 'points' exists and is a list
                if 'points' in data and isinstance(data['points'], list):
                    for point in data['points']:
                        if 'location' in point and 'z' in point['location']:
                            z_value = point['location']['z']
                            rounded_z = round_to_nearest_ten(int(z_value))
                            rounded_z = rounded_z / 10
                            particle_name = filename.replace('.json', '')

                            if particle_name in particle_radius:
                                radius = particle_radius[particle_name]
                                radius_offset = radius / 10

                                lower_range = rounded_z - radius_offset
                                upper_range = rounded_z + radius_offset

                                print(f"File: {filename}")
                                print(f"  Rounded Z: {rounded_z}")
                                print(f"  Particle Radius ({particle_name}): {radius}")
                                print(f"  Range: [{lower_range}, {upper_range}]")
                            else:
                                print(f"Warning: Particle radius not found for {particle_name} in {filename}")
                            # Break after processing the first valid 'location' in 'points'
                            break
                        else:
                            print(f"Warning: 'location' or 'z' key not found in a 'point' in {filename}")
                else:
                    print(f"Warning: 'points' key not found or is not a list in {filename}")

            except json.JSONDecodeError:
                print(f"Error: Could not decode JSON in {filename}")
            except Exception as e:
                print(f"An error occurred while processing {filename}: {e}")

File: apo-ferritin.json
  Rounded Z: 60.0
  Particle Radius (apo-ferritin): 60
  Range: [54.0, 66.0]
File: beta-galactosidase.json
  Rounded Z: 37.0
  Particle Radius (beta-galactosidase): 90
  Range: [28.0, 46.0]
File: ribosome.json
  Rounded Z: 60.0
  Particle Radius (ribosome): 150
  Range: [45.0, 75.0]
File: thyroglobulin.json
  Rounded Z: 28.0
  Particle Radius (thyroglobulin): 130
  Range: [15.0, 41.0]
File: virus-like-particle.json
  Rounded Z: 64.0
  Particle Radius (virus-like-particle): 140
  Range: [50.0, 78.0]
File: apo-ferritin.json
  Rounded Z: 109.0
  Particle Radius (apo-ferritin): 60
  Range: [103.0, 115.0]
File: beta-galactosidase.json
  Rounded Z: 91.0
  Particle Radius (beta-galactosidase): 90
  Range: [82.0, 100.0]
File: ribosome.json
  Rounded Z: 81.0
  Particle Radius (ribosome): 150
  Range: [66.0, 96.0]
File: thyroglobulin.json
  Rounded Z: 55.0
  Particle Radius (thyroglobulin): 130
  Range: [42.0, 68.0]
File: virus-like-particle.json
  Rounded Z: 80.0
  Parti

In [54]:


base_path = r'yolo_data\labels'  # Use raw string to avoid issues with backslashes
if not os.path.exists(r'yolo_data\labels\TS_5_4'):
    for folder_name in paths_r:
        # Construct the path for the subfolder
        subfolder_path = os.path.join(base_path, folder_name)

        # Create the subfolder if it doesn't exist
        os.makedirs(subfolder_path, exist_ok=True)  # exist_ok=True avoids errors if the folder already exists

        # Create the .txt files within the subfolder
        for i in range(184):  # range(184) generates numbers from 0 to 183
            file_name = f"{i}.txt"
            file_path = os.path.join(subfolder_path, file_name)

            # Create an empty .txt file
            with open(file_path, 'w') as f:
                pass  # 'pass' does nothing, effectively creating an empty file

    print("Subfolders and files created successfully.")
else:
    print('Empty .txt files has already been created')

Subfolders and files created successfully.


In [55]:


base_dir = r'data\train\overlay\ExperimentRuns'
output_base_dir = r'yolo_data\labels'



def round_to_nearest_ten(z):
    """Rounds an integer to the nearest multiple of 10."""
    remainder = z % 10
    if remainder < 5:
        return z - remainder
    else:
        return z + (10 - remainder)

for subdir, dirs, files in os.walk(base_dir):
    for filename in files:
        if filename == 'beta-amylase.json':
            continue  # Skip beta-amylase.json files

        if filename.endswith('.json'):
            filepath = os.path.join(subdir, filename)
            try:
                with open(filepath, 'r') as f:
                    data = json.load(f)

                # Extract TS folder name
                parts = subdir.split(os.sep)
                if 'ExperimentRuns' in parts and 'Picks' in parts:
                    ts_folder_index = parts.index('ExperimentRuns') + 1
                    if ts_folder_index < len(parts):
                        ts_folder_name = parts[ts_folder_index]
                        output_dir = os.path.join(output_base_dir, ts_folder_name)
                        os.makedirs(output_dir, exist_ok=True)
                    else:
                        print(f"Warning: Could not determine TS folder name for {filename}")
                        continue
                else:
                    print(f"Warning: Unexpected subdirectory structure for {filename}")
                    continue

                particle_name = filename.replace('.json', '')

                if particle_name not in p2i:
                    print(f"Warning: Particle name '{particle_name}' not found in p2i dictionary for {filename}")
                    continue

                if particle_name not in particle_radius:
                    print(f"Warning: Particle radius not found for {particle_name} in {filename}")
                    continue

                radius = particle_radius[particle_name]
                radius_offset = radius / 10

                # Check if 'points' exists and is a list
                if 'points' in data and isinstance(data['points'], list):
                    for point in data['points']:
                        if 'location' in point and 'x' in point['location'] and 'y' in point['location'] and 'z' in point['location']:
                            x_value = point['location']['x']
                            y_value = point['location']['y']
                            z_value = point['location']['z']

                            rounded_z = round_to_nearest_ten(int(z_value))
                            rounded_z_div_10 = rounded_z // 10

                            lower_range = max(0 , round_to_nearest_ten(int(rounded_z - radius)) // 10)
                            upper_range = min(183, round_to_nearest_ten(int(rounded_z + radius)) // 10)

                            class_id = p2i[particle_name]
                            x_center = x_value / 10 / 630
                            y_center = y_value / 10 / 630
                            width = width_height_particles[particle_name]
                            height = width_height_particles[particle_name]

                            label_line = f"{class_id} {x_center} {y_center} {width} {height}\n"

                            for i in range(lower_range, upper_range + 1):
                                output_filepath = os.path.join(output_dir, f"{i}.txt")
                                with open(output_filepath, 'a') as outfile:
                                    outfile.write(label_line)

                        else:
                            print(f"Warning: Missing 'location', 'x', 'y', or 'z' key in a 'point' in {filename}")
                else:
                    print(f"Warning: 'points' key not found or is not a list in {filename}")

            except json.JSONDecodeError:
                print(f"Error: Could not decode JSON in {filename}")
            except Exception as e:
                print(f"An error occurred while processing {filename}: {e}")

In [51]:
for i in range(5):
  print(width_height_particles[i2p[i]])

0.044444444444444446
0.01904761904761905
0.02857142857142857
0.047619047619047616
0.04126984126984127


Subfolders and files created successfully.


In [31]:
def round_to_nearest_ten(z):
  """
  Rounds an integer to the nearest multiple of 10.

  Args:
    z: The integer to round.

  Returns:
    The rounded integer.
  """
  remainder = z % 10
  if remainder < 5:
    return z - remainder  # Round down
  else:
    return z + (10 - remainder)  # Round up



In [38]:
p2i['virus']

0

In [36]:
round(60.5)

60

In [7]:
from ultralytics import YOLO
model = YOLO("yolo11x.pt")
model.info()

Downloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11x.pt to 'yolo11x.pt'...


100%|██████████| 109M/109M [00:02<00:00, 50.2MB/s] 


YOLO11x summary: 631 layers, 56,966,176 parameters, 0 gradients, 196.0 GFLOPs


(631, 56966176, 0, 195.9587328)

In [12]:
img_path = 'Untitled.png' # 'TS_69_2_1100.png'
results = model(img_path)
results[0].show()


image 1/1 e:\ML Projects\Kaggle\CryoET\Untitled.png: 640x640 (no detections), 355.3ms
Speed: 3.7ms preprocess, 355.3ms inference, 0.5ms postprocess per image at shape (1, 3, 640, 640)
