In [56]:
import nibabel as nib
input_path = r"d:\Kananat\Data\training_dataset_3D\training_dataset_plus_openAccess\test\1\010_left_baseline_mirrored_preprocessed.nii.gz"
img = nib.load(input_path)
data = img.get_fdata()
print(data.shape)

(255, 255, 255)


In [57]:
voxel_size = img.header.get_zooms()
print(f"Voxel size: {voxel_size}")

Voxel size: (0.110117644, 0.110117644, 0.11011754)


In [62]:
from skimage import morphology
import numpy as np
import networkx as nx
import os

def find_closest_endpoint(skeleton, reference_point):
    """
    Find the endpoint in the skeleton that is closest to the reference point.
    
    Parameters:
    -----------
    skeleton : numpy.ndarray
        Binary 3D array containing the skeletonized structure
    reference_point : tuple
        (x, y, z) coordinate of the reference point
    
    Returns:
    --------
    closest_endpoint : tuple
        (x, y, z) coordinate of the closest endpoint
    """
    # Create a graph from the skeleton
    G = nx.Graph()
    
    # Get coordinates of skeleton voxels
    points = np.transpose(np.where(skeleton))
    
    # Map each point to a unique node ID
    point_to_node = {}
    for i, point in enumerate(points):
        point_tuple = tuple(point)
        point_to_node[point_tuple] = i
        G.add_node(i, pos=point_tuple)
    
    # Add edges between neighboring voxels
    for point_tuple, node_id in point_to_node.items():
        x, y, z = point_tuple
        # Check 26-connected neighbors
        for dx in [-1, 0, 1]:
            for dy in [-1, 0, 1]:
                for dz in [-1, 0, 1]:
                    if dx == 0 and dy == 0 and dz == 0:
                        continue
                        
                    neighbor = (x + dx, y + dy, z + dz)
                    if neighbor in point_to_node:
                        G.add_edge(node_id, point_to_node[neighbor])
    
    # Find endpoints (nodes with only one connection)
    endpoints = [n for n, d in G.degree() if d == 1]
    
    if not endpoints:
        print("Warning: No endpoints found in the skeleton. Using the centroid instead.")
        # If no endpoints, use the centroid of the skeleton
        coords = np.array(np.where(skeleton)).T
        centroid = tuple(np.mean(coords, axis=0).astype(int))
        return centroid
    
    # Get coordinates of endpoints
    endpoint_coords = [G.nodes[n]['pos'] for n in endpoints]
    
    # Find the endpoint closest to the reference point
    closest_endpoint = min(endpoint_coords, 
                          key=lambda p: np.sqrt((p[0]-reference_point[0])**2 + 
                                              (p[1]-reference_point[1])**2 + 
                                              (p[2]-reference_point[2])**2))
    
    return closest_endpoint

def crop_around_point(image, point, crop_size):

    """
    Crop a region of specified size around a point.
    
    Parameters:
    -----------
    image : numpy.ndarray
        3D array to crop from
    point : tuple
        (x, y, z) coordinate of the center point
    crop_size : int
        Size of the crop region (n) for cropping [x-n:x+n, y-n:y+n, z-n:z+n]
    
    Returns:
    --------
    cropped : numpy.ndarray
        Cropped region
    crop_coords : tuple
        ((x_min, x_max), (y_min, y_max), (z_min, z_max)) coordinates of the crop
    """
    x, y, z = point
    
    # Calculate crop boundaries
    x_min = max(0, x - crop_size)
    x_max = min(image.shape[0], x + crop_size + 1)
    y_min = max(0, y - crop_size)
    y_max = min(image.shape[1], y + crop_size + 1)
    z_min = max(0, z - crop_size)
    z_max = min(image.shape[2], z + crop_size + 1)
    
    # Crop the image
    cropped = image[x_min:x_max, y_min:y_max, z_min:z_max]
    
    return cropped

def skeletonized_cropping(input_file, crop_size=50, output_folder=None, threshold=None):
    """
    Load a CBCT image, binarize it, apply 3D skeletonization, find the endpoint
    closest to (shape[0]//2, 0, 0), and crop around that point.
    
    Parameters:
    -----------
    input_file : str
        Path to the .nii.gz input file
    crop_size : int
        Size of the crop region (n) for cropping [x-n:x+n, y-n:y+n, z-n:z+n]
    output_file : str, optional
        Path to save the result. If None, will create file with '_cropped' suffix
    threshold : float, optional
        Threshold value for binarization. If None, Otsu's method will be used
    """
    # Load the NIFTI image
    # print(f"Loading {input_file}...")
    img = nib.load(input_file)
    data = img.get_fdata()
    
    # Get image properties for later saving
    affine = img.affine
    header = img.header
    
    # Determine threshold if not provided
    if threshold is None:
        from skimage.filters import threshold_otsu
        threshold = threshold_otsu(data)
        print(f"Using Otsu's threshold: {threshold}")
    
    # Convert to binary image
    # print("Converting to binary image...")
    binary = data > threshold
    
    # Apply 3D skeletonization
    # print("Applying 3D skeletonization (this may take a while)...")
    skeleton = morphology.skeletonize(binary)
    
    # Find the endpoint closest to the reference point
    reference_point = (data.shape[0]//2, data.shape[1], data.shape[2]//2)
    # print(f"Finding endpoint closest to reference point {reference_point}...")
    endpoint = find_closest_endpoint(skeleton, reference_point)
    # print(f"Found closest endpoint at {endpoint}")
    
    # Crop the region around the endpoint
    # print(f"Cropping region of size {crop_size} around endpoint...")
    cropped_image = crop_around_point(data, endpoint, crop_size)
    #cropped_image[cropped_image == 0] = -4000  # Set background to -4000
    
    # Save the full skeleton if requested
    file_name = os.path.basename(input_file)
    
    # Save the cropped region
    output_file = os.path.join(output_folder, file_name)
    
    # print(f"Saving cropped region to {output_file}...")
    cropped_img = nib.Nifti1Image(cropped_image.astype(np.int16), affine, header)
    nib.save(cropped_img, output_file)
    
    # print(f"Crop coordinates: {crop_coords}")
    
    return output_file

In [63]:
from pathlib import Path

input_folder = Path(r"D:\Kananat\Data\processing_More_data\to_add")
output_folder = Path(r"D:\Kananat\Data\processing_More_data\to_add_cropped")

i = 0
for file in input_folder.glob("*.nii.gz"):

    i+=1
    print(f"{i} : {file}")
    skeletonized_cropping(file, crop_size=112, output_folder=output_folder, threshold=-1999)

1 : D:\Kananat\Data\processing_More_data\to_add\50-11620_20250213_L.nii.gz
2 : D:\Kananat\Data\processing_More_data\to_add\50-11620_20250213_R.nii.gz
3 : D:\Kananat\Data\processing_More_data\to_add\56-30067_20240131_R.nii.gz
4 : D:\Kananat\Data\processing_More_data\to_add\58-41636_20160129_L.nii.gz
5 : D:\Kananat\Data\processing_More_data\to_add\58-41636_20160129_R.nii.gz
6 : D:\Kananat\Data\processing_More_data\to_add\59-2326_20160220_L.nii.gz
7 : D:\Kananat\Data\processing_More_data\to_add\59-2326_20160220_R.nii.gz
8 : D:\Kananat\Data\processing_More_data\to_add\59-6269_20160326_L.nii.gz
9 : D:\Kananat\Data\processing_More_data\to_add\59-6269_20160326_R.nii.gz
10 : D:\Kananat\Data\processing_More_data\to_add\60-34373_20241008_L.nii.gz
11 : D:\Kananat\Data\processing_More_data\to_add\60-34373_20241008_R.nii.gz
12 : D:\Kananat\Data\processing_More_data\to_add\63-0010001_20240527_L.nii.gz
13 : D:\Kananat\Data\processing_More_data\to_add\63-0010001_20240527_R.nii.gz
14 : D:\Kananat\Data\

In [None]:
import os
from pathlib import Path
from typing import Optional, List

def find_patient_split(dataset_path: str, patient_id: str) -> Optional[str]:
    """
    Find which split (train/val/test) contains images for a given patient.
    
    Args:
        dataset_path: Path to the dataset root directory
        patient_id: The patient ID to search for
        
    Returns:
        'train', 'val', 'test', or None if patient not found
    """
    dataset_path = Path(dataset_path)
    splits = ['train', 'val', 'test']
    
    for split in splits:
        split_dir = dataset_path / split
        if not split_dir.exists():
            continue
        
        # Iterate through class folders inside split
        for class_dir in split_dir.iterdir():
            if not class_dir.is_dir():
                continue
                
            # Check all .nii.gz files in this class folder
            for file in class_dir.glob('*.nii.gz'):
                # Extract patient ID from filename (format: patientID_date.nii.gz)
                filename = file.stem.replace('.nii', '')  # Remove .nii from .nii.gz
                file_patient_id = filename.split('_')[0]
                
                if file_patient_id == patient_id:
                    return split
    
    return None

In [6]:
dataset_path = r"D:\Kananat\Data\training_dataset_3D\training_dataset_OA"
patient_id = "58-41636 R"

# Find which split the patient is in
split = find_patient_split(dataset_path, patient_id)
if split:
    print(f"Patient {patient_id} found in: {split}")
else:
    print(f"Patient {patient_id} not found in dataset")
    

Patient 58-41636 R not found in dataset


In [46]:
import csv

dataset_path = r"D:\Kananat\Data\training_dataset_3D\training_dataset_OA"
input_csv = r"d:\Kananat\Data\processing_More_data\Sorted_More_data_raw\Labels.csv"
# Read, modify, and write back
with open(input_csv, 'r') as infile:
    reader = csv.DictReader(infile)
    
    # Store modified rows
    i = 0
    for row in reader:
        # Make your changes here
        i += 1
        print(f"\nProcessing row {i}: {row['ID']}")
        parts = row['ID'].split(' ')
        search_id = f"{parts[0]} {parts[1]}"

        split = find_patient_split(dataset_path, search_id)
        if split:
            print(f"Patient {search_id} found in: {split}")


Processing row 1: 58-41636 R 20160129

Processing row 2: 58-41636 L 20160129

Processing row 3: 59-2326 R 20160220

Processing row 4: 59-2326 L 20160220

Processing row 5: 59-6269 R 20160326

Processing row 6: 59-6269 L 20160326
Patient 59-6269 L found in: train

Processing row 7: 67-700034 R 20240127

Processing row 8: 67-700034 L 20240127

Processing row 9: 56-30067 R 20240131

Processing row 10: 63-9054 R 20240308

Processing row 11: 63-9054 L 20240308

Processing row 12: 66-22465 R 20240527

Processing row 13: 67-9653 L 20240701

Processing row 14: 60-34373 R 20241008

Processing row 15: 60-34373 L 20241008

Processing row 16: 66-8695 R 20241114

Processing row 17: 67-10136 R 20241208

Processing row 18: 65-4618 R 20241218

Processing row 19: 65-4618 L 20241218

Processing row 20: 50-11620 R 20250213

Processing row 21: 50-11620 L 20250213

Processing row 22: 67-24015 R 20250213

Processing row 23: 67-24015 L 20250213

Processing row 24: 68-700050 R 20250226

Processing row 25: 68

In [19]:
from pathlib import Path

input_folder = Path(r"D:\Kananat\Data\Last0\Sorted_More_data_nii")
i = 0

for file in input_folder.glob("*.nii.gz"):
    old_name = str(file.name).replace(".nii.gz", "").split("_")  # Get filename without extension
    new_name = f"{old_name[0][:2]}-{old_name[0][2:]} {old_name[4]} {old_name[1]}{old_name[2]}{old_name[3]}.nii.gz"

    i+=1
    print(f"{i} : {old_name}  -->  {new_name}")
    file.rename(input_folder / new_name)

1 : ['5011620', '2025', '02', '13', 'L']  -->  50-11620 L 20250213.nii.gz
2 : ['5011620', '2025', '02', '13', 'R']  -->  50-11620 R 20250213.nii.gz
3 : ['5218852', '2016', '05', '26', 'L']  -->  52-18852 L 20160526.nii.gz
4 : ['5218852', '2016', '05', '26', 'R']  -->  52-18852 R 20160526.nii.gz
5 : ['5237193', '2016', '04', '05', 'L']  -->  52-37193 L 20160405.nii.gz
6 : ['5237193', '2016', '04', '05', 'R']  -->  52-37193 R 20160405.nii.gz
7 : ['5425927', '2016', '04', '24', 'L']  -->  54-25927 L 20160424.nii.gz
8 : ['5425927', '2016', '04', '24', 'R']  -->  54-25927 R 20160424.nii.gz
9 : ['5450', '2016', '02', '26', 'L']  -->  54-50 L 20160226.nii.gz
10 : ['5450', '2016', '02', '26', 'R']  -->  54-50 R 20160226.nii.gz
11 : ['5630067', '2024', '01', '31', 'R']  -->  56-30067 R 20240131.nii.gz
12 : ['5724983', '2016', '07', '02', 'L']  -->  57-24983 L 20160702.nii.gz
13 : ['5724983', '2016', '07', '02', 'R']  -->  57-24983 R 20160702.nii.gz
14 : ['5734770', '2016', '01', '17', 'L']  -->