#### builld the dataset

In [2]:
import os
import pydicom

def detect_scan_direction(folder):
    slices = []
    for f in sorted(os.listdir(folder)):
        path = os.path.join(folder, f)
        ds = pydicom.dcmread(path)
        if "ImagePositionPatient" not in ds:
            continue
        x = ds.ImagePositionPatient[0]  # X coordinate
        slices.append((f, x))
    
    if len(slices) < 2:
        print("Not enough slices to determine direction.")
        return

    first_x = slices[0][1]
    last_x = slices[-1][1]

    if first_x < last_x:
        print("Scan direction: Right → Left")
    else:
        print("Scan direction: Left → Right")

    for name, x in slices:
        print(f"{name}: X = {x:.2f}")

# Example usage
# detect_scan_direction(r"Sagittal")
# detect_scan_direction(r"D:\Dataset\DatasetV0.17 Dicom format - Correct version\PATIENT672717\ST000000-MR, VERTEBRA, LOMBER\SE000000-Sag T2 frFSE"
# detect_scan_direction(r"D:\AISSLab\Code\3D CNN\datasets\DatasetV0.17 Dicom format - Correct version\PATIENT62951\ST000000-MR, LOMBER\SE000000-Sag T2 frFSE")
# detect_scan_direction(r"D:\AISSLab\Code\3D CNN\datasets\DatasetV0.17 Dicom format - Correct version\PATIENT110343\ST000000-MRG, LOMBER VERTEBRA, KONTRASTSIZ\SE000000-T2W_TSE")
# opisite 


In [3]:
import os

root_folder = r"D:\Dataset\DatasetV0.17 Dicom format - Correct version"

for patient_name in os.listdir(root_folder):
    patient_folder = os.path.join(root_folder, patient_name)
    
    if not os.path.isdir(patient_folder):
        continue

    folder_file_counts = {}

    for item in os.listdir(patient_folder):
        item_path = os.path.join(patient_folder, item)
        
        if os.path.isdir(item_path):
            for scan_module in os.listdir(item_path):
                scan_path = os.path.join(item_path, scan_module)
                
                if os.path.isdir(scan_path):
                    file_count = sum(len(files) for _, _, files in os.walk(scan_path))
                    folder_file_counts[scan_path] = file_count

    if folder_file_counts:
        # Identify the longest and shortest folders
        longest_folder = max(folder_file_counts, key=folder_file_counts.get)
        shortest_folder = min(folder_file_counts, key=folder_file_counts.get)

        # Define new paths
        axial_path = os.path.join(os.path.dirname(longest_folder), "Axial")
        sagittal_path = os.path.join(os.path.dirname(shortest_folder), "Sagittal")

        # Rename the folders
        try:
            os.rename(longest_folder, axial_path)
            print(f"Renamed '{longest_folder}' to '{axial_path}'")
        except FileExistsError:
            print(f"Cannot rename '{longest_folder}' to '{axial_path}': Destination already exists.")
        except Exception as e:
            print(f"Error renaming '{longest_folder}' to '{axial_path}': {e}")

        try:
            os.rename(shortest_folder, sagittal_path)
            print(f"Renamed '{shortest_folder}' to '{sagittal_path}'")
        except FileExistsError:
            print(f"Cannot rename '{shortest_folder}' to '{sagittal_path}': Destination already exists.")
        except Exception as e:
            print(f"Error renaming '{shortest_folder}' to '{sagittal_path}': {e}")


Renamed 'D:\Dataset\DatasetV0.17 Dicom format - Correct version\PATIENT1000513\ST000000-LOMBER\Axial' to 'D:\Dataset\DatasetV0.17 Dicom format - Correct version\PATIENT1000513\ST000000-LOMBER\Axial'
Renamed 'D:\Dataset\DatasetV0.17 Dicom format - Correct version\PATIENT1000513\ST000000-LOMBER\Sagittal' to 'D:\Dataset\DatasetV0.17 Dicom format - Correct version\PATIENT1000513\ST000000-LOMBER\Sagittal'
Renamed 'D:\Dataset\DatasetV0.17 Dicom format - Correct version\PATIENT1001680\ST000000-MR, VERTEBRA, LOMBER\Axial' to 'D:\Dataset\DatasetV0.17 Dicom format - Correct version\PATIENT1001680\ST000000-MR, VERTEBRA, LOMBER\Axial'
Renamed 'D:\Dataset\DatasetV0.17 Dicom format - Correct version\PATIENT1001680\ST000000-MR, VERTEBRA, LOMBER\Sagittal' to 'D:\Dataset\DatasetV0.17 Dicom format - Correct version\PATIENT1001680\ST000000-MR, VERTEBRA, LOMBER\Sagittal'
Renamed 'D:\Dataset\DatasetV0.17 Dicom format - Correct version\PATIENT1005006\ST000000-MR LOMBER\Axial' to 'D:\Dataset\DatasetV0.17 Dic

In [4]:
# check the lenght of each folder  
import os

root_folder = r"D:\AISSLab\Code\3D CNN\datasets\test_system\Dicom"
sagittal_counts = []

for patient_name in os.listdir(root_folder):
    patient_folder = os.path.join(root_folder, patient_name)
    
    if not os.path.isdir(patient_folder):
        continue

    for item in os.listdir(patient_folder):
        item_path = os.path.join(patient_folder, item)
        
        if os.path.isdir(item_path):
            sagittal_path = os.path.join(item_path, "Sagittal")
            if os.path.isdir(sagittal_path):
                file_count = sum(len(files) for _, _, files in os.walk(sagittal_path))
                sagittal_counts.append((sagittal_path, file_count))

# Sort and get top 5
top5_sagittal = sorted(sagittal_counts, key=lambda x: x[1], reverse=True)[:10]

# Display results
print("Top 5 Sagittal directories with the largest number of files:")
for path, count in top5_sagittal:
    print(f"{path} - {count} files")


Top 5 Sagittal directories with the largest number of files:
D:\AISSLab\Code\3D CNN\datasets\test_system\Dicom\PATIENT110343\ST000000-MRG, LOMBER VERTEBRA, KONTRASTSIZ\Sagittal - 18 files
D:\AISSLab\Code\3D CNN\datasets\test_system\Dicom\PATIENT114037\ST000000-MRG, LOMBER VERTEBRA, KONTRASTSIZ\Sagittal - 18 files
D:\AISSLab\Code\3D CNN\datasets\test_system\Dicom\PATIENT147603\ST000000-MRG, LOMBER VERTEBRA, KONTRASTSIZ\Sagittal - 18 files
D:\AISSLab\Code\3D CNN\datasets\test_system\Dicom\PATIENT156676\ST000000-MR, LOMBER\Sagittal - 18 files
D:\AISSLab\Code\3D CNN\datasets\test_system\Dicom\PATIENT168218\ST000000-MRG, LOMBER VERTEBRA, KONTRASTSIZ\Sagittal - 18 files
D:\AISSLab\Code\3D CNN\datasets\test_system\Dicom\PATIENT177006\ST000000-MR, VERTEBRA, LOMBER\Sagittal - 18 files
D:\AISSLab\Code\3D CNN\datasets\test_system\Dicom\PATIENT187823\ST000000-MR, VERTEBRA, LOMBER\Sagittal - 18 files
D:\AISSLab\Code\3D CNN\datasets\test_system\Dicom\PATIENT190704\ST000000-MR, VERTEBRA, LOMBER\Sagit

## padding 


In [5]:
import os
import pydicom
import numpy as np
from pydicom.uid import generate_uid

def pad_dicom_series(folder_path, target_count=18):
    # Load and sort DICOM files by InstanceNumber
    dicom_files = sorted(
        [f for f in os.listdir(folder_path) if f.lower().endswith('.dcm')],
        key=lambda x: int(pydicom.dcmread(os.path.join(folder_path, x)).InstanceNumber)
    )

    current_count = len(dicom_files)
    if current_count >= target_count:
        print(f"{folder_path} already has {current_count} slices.")
        return

    print(f"Padding {folder_path}: {current_count} → {target_count}")

    last_file = os.path.join(folder_path, dicom_files[-1])
    last_ds = pydicom.dcmread(last_file)
    last_instance_number = int(last_ds.InstanceNumber)

    # Determine slice spacing
    if current_count >= 2:
        second_last_file = os.path.join(folder_path, dicom_files[-2])
        second_last_ds = pydicom.dcmread(second_last_file)
        spacing = np.array(last_ds.ImagePositionPatient) - np.array(second_last_ds.ImagePositionPatient)
    else:
        spacing = np.array([0, 0, 1])  # default spacing if only 1 slice

    current_position = np.array(last_ds.ImagePositionPatient)
    rows, cols = last_ds.Rows, last_ds.Columns
    dtype = last_ds.pixel_array.dtype
    black_image = np.zeros((rows, cols), dtype=dtype).tobytes()

    instance_number = last_instance_number  # starting point

    for i in range(target_count - current_count):
        instance_number += 1  # increment correctly

        new_ds = last_ds.copy()
        new_ds.SOPInstanceUID = generate_uid()
        new_ds.InstanceNumber = instance_number

        # Update position
        new_position = current_position + spacing * (i + 1)
        new_ds.ImagePositionPatient = [str(p) for p in new_position]
        if 'SliceLocation' in new_ds:
            new_ds.SliceLocation = float(new_ds.SliceLocation) + spacing[-1] * (i + 1)

        new_ds.PixelData = black_image

        # Save
        filename = os.path.join(folder_path, f"pad_{i+1:03d}.dcm")
        new_ds.save_as(filename)

    print("✅ Padding complete and InstanceNumbers correctly incremented.")

# Example usage
# folder = r"Sagittal"

# pad_dicom_series(folder)


In [6]:
root_folder = r"D:\AISSLab\Code\3D CNN\datasets\test_system\Dicom"
for patient_folder in os.listdir(root_folder):
    patient_folder = os.path.join(root_folder ,patient_folder)
    for subfolders in os.listdir(patient_folder):
        if  os.path.isdir( os.path.join(patient_folder ,subfolders)):
            for sagittal in os.listdir(  os.path.join(patient_folder ,subfolders)):
                 if sagittal == "Sagittal":
                     print(os.path.join(patient_folder ,subfolders,sagittal))

                     pad_dicom_series(os.path.join(patient_folder ,subfolders,sagittal))
            
   


D:\AISSLab\Code\3D CNN\datasets\test_system\Dicom\PATIENT110343\ST000000-MRG, LOMBER VERTEBRA, KONTRASTSIZ\Sagittal
D:\AISSLab\Code\3D CNN\datasets\test_system\Dicom\PATIENT110343\ST000000-MRG, LOMBER VERTEBRA, KONTRASTSIZ\Sagittal already has 18 slices.
D:\AISSLab\Code\3D CNN\datasets\test_system\Dicom\PATIENT114037\ST000000-MRG, LOMBER VERTEBRA, KONTRASTSIZ\Sagittal
D:\AISSLab\Code\3D CNN\datasets\test_system\Dicom\PATIENT114037\ST000000-MRG, LOMBER VERTEBRA, KONTRASTSIZ\Sagittal already has 18 slices.
D:\AISSLab\Code\3D CNN\datasets\test_system\Dicom\PATIENT147603\ST000000-MRG, LOMBER VERTEBRA, KONTRASTSIZ\Sagittal
D:\AISSLab\Code\3D CNN\datasets\test_system\Dicom\PATIENT147603\ST000000-MRG, LOMBER VERTEBRA, KONTRASTSIZ\Sagittal already has 18 slices.
D:\AISSLab\Code\3D CNN\datasets\test_system\Dicom\PATIENT156676\ST000000-MR, LOMBER\Sagittal
D:\AISSLab\Code\3D CNN\datasets\test_system\Dicom\PATIENT156676\ST000000-MR, LOMBER\Sagittal already has 18 slices.
D:\AISSLab\Code\3D CNN\dat

copy the padding image to  other folder 

In [7]:
import os
import shutil

root_folder = r"D:\AISSLab\Code\3D CNN\datasets\DatasetV0.17 Dicom"
destination_base = r"preprocessing_Dataset"

for patient_folder in os.listdir(root_folder):
    patient_folder_path = os.path.join(root_folder, patient_folder)
    if not os.path.isdir(patient_folder_path):
        continue

    for subfolder in os.listdir(patient_folder_path):
        subfolder_path = os.path.join(patient_folder_path, subfolder)
        if not os.path.isdir(subfolder_path):
            continue

        for item in os.listdir(subfolder_path):
            if item == "Sagittal":
                sagittal_path = os.path.join(subfolder_path, item)
                print(f"Found Sagittal folder: {sagittal_path}")

                # Create destination directory
                dest_dir = os.path.join(destination_base, patient_folder)
                os.makedirs(dest_dir, exist_ok=True)

                # Copy all files from Sagittal folder
                for file in os.listdir(sagittal_path):
                    src_file = os.path.join(sagittal_path, file)
                    if os.path.isfile(src_file):
                        shutil.copy2(src_file, os.path.join(dest_dir, file))

                print(f"✅ Copied to {dest_dir}")


Found Sagittal folder: D:\AISSLab\Code\3D CNN\datasets\DatasetV0.17 Dicom\PATIENT1000513\ST000000-LOMBER\Sagittal
✅ Copied to preprocessing_Dataset\PATIENT1000513
Found Sagittal folder: D:\AISSLab\Code\3D CNN\datasets\DatasetV0.17 Dicom\PATIENT1001680\ST000000-MR, VERTEBRA, LOMBER\Sagittal
✅ Copied to preprocessing_Dataset\PATIENT1001680
Found Sagittal folder: D:\AISSLab\Code\3D CNN\datasets\DatasetV0.17 Dicom\PATIENT1005006\ST000000-MR LOMBER\Sagittal
✅ Copied to preprocessing_Dataset\PATIENT1005006
Found Sagittal folder: D:\AISSLab\Code\3D CNN\datasets\DatasetV0.17 Dicom\PATIENT1005561\ST000000-MR, VERTEBRA, LOMBER\Sagittal
✅ Copied to preprocessing_Dataset\PATIENT1005561
Found Sagittal folder: D:\AISSLab\Code\3D CNN\datasets\DatasetV0.17 Dicom\PATIENT1006221\ST000000-MR, VERTEBRA, LOMBER\Sagittal
✅ Copied to preprocessing_Dataset\PATIENT1006221
Found Sagittal folder: D:\AISSLab\Code\3D CNN\datasets\DatasetV0.17 Dicom\PATIENT1008947\ST000000-MR, VERTEBRA, LOMBER\Sagittal
✅ Copied to 

labeling data

In [8]:
import pandas as pd
import xml.etree.ElementTree as ET


In [9]:
# root_dir = r"D:\AISSLab\Code\3D CNN\datasets\DatasetV0.21 Final"
root_dir = r"D:\Submitted Matrial (conference&journal)\Sagittal Data Artical\V0.47 Dataset analysis\DatasetV0.47 Final\DatasetV0.47"
df= pd.DataFrame(columns = ["patient_ID","filename","level","name", "xmin","ymin","xmax","ymax" ,"width","height"])
for patient_ in os.listdir(root_dir):
    patient_folder = os.path.join(root_dir, patient_)
    for sag in os.listdir(patient_folder):
        if sag =="Sagittal":
            sagittal_folder = os.path.join(patient_folder, sag)
            for XML in os.listdir(sagittal_folder):
                if XML.endswith("xml"):
                    xml_path = os.path.join(sagittal_folder, XML)
                    # print(patient_)
                    # print(XML)
                                        
                    # Load the XML file
                    tree = ET.parse(xml_path)  # Replace with the path to your XML file
                    root = tree.getroot()

                    # Extract global information
                    filename = root.find('filename').text
                    width = int(root.find('size/width').text)
                    height = int(root.find('size/height').text)

                    # Extract all objects
                    data = []
                    for obj in root.findall('object'):
                        level = obj.find('level').text
                        name = obj.find('name').text
                        bbox = obj.find('bndbox')
                        xmin = int(bbox.find('xmin').text)
                        ymin = int(bbox.find('ymin').text)
                        xmax = int(bbox.find('xmax').text)
                        ymax = int(bbox.find('ymax').text)
                        new_row = {
                            "patient_ID":patient_,
                            'filename': XML.replace(".xml" , ""),
                            'level': level,
                            'name': name,
                            'xmin': xmin,
                            'ymin': ymin,
                            'xmax': xmax,
                            'ymax': ymax,
                            'width': width,
                            'height': height
                        }
                        df.loc[len(df)] = new_row

                                


In [10]:
df.to_csv("label.csv" , index=False)

In [13]:
df

Unnamed: 0,patient_ID,filename,level,name,xmin,ymin,xmax,ymax,width,height
0,PATIENT1000513,IM000002,L4-L5,RFS0,234,320,274,373,512,512
1,PATIENT1000513,IM000003,L2-L3,RFS0,247,203,279,259,512,512
2,PATIENT1000513,IM000003,L3-L4,RFS2,242,263,274,318,512,512
3,PATIENT1000513,IM000003,L4-L5,RFS3,236,322,274,373,512,512
4,PATIENT1000513,IM000003,L5-S1,RFS0,239,377,285,423,512,512
...,...,...,...,...,...,...,...,...,...,...
2974,PATIENT995871,IM000009,L1-L2,RFS0,147,99,169,132,320,320
2975,PATIENT995871,IM000009,L4-L5,RFS0,138,197,166,229,320,320
2976,PATIENT997674,IM000003,L3-L4,LFS0,279,224,321,289,576,576
2977,PATIENT997674,IM000003,L2-L3,LFS0,294,157,330,216,576,576


In [None]:
# df= pd.DataFrame(columns = ["patient_ID","level","name", "xmin","ymin","xmax","ymax" ,"width","height"])

In [110]:
# # Define a new row as a dictionary
# new_row = {
#     'filename': 'IM000004.png',
#     'level': 'L3-L4',
#     'name': 'LFS3',
#     'xmin': 250,
#     'ymin': 300,
#     'xmax': 310,
#     'ymax': 360,
#     'width': 576,
#     'height': 576
# }

# # Add it to the DataFrame
# df.loc[len(df)] = new_row

# # Show the updated DataFrame
# print(df)


        filename  level  name  xmin  ymin  xmax  ymax  width  height
0   IM000011.png  L4-L5  RFS1   265   297   321   352    576     576
1   IM000004.png  L3-L4  LFS3   250   300   310   360    576     576
2   IM000004.png  L3-L4  LFS3   250   300   310   360    576     576
3   IM000004.png  L3-L4  LFS3   250   300   310   360    576     576
4   IM000004.png  L3-L4  LFS3   250   300   310   360    576     576
5   IM000004.png  L3-L4  LFS3   250   300   310   360    576     576
6   IM000004.png  L3-L4  LFS3   250   300   310   360    576     576
7   IM000004.png  L3-L4  LFS3   250   300   310   360    576     576
8   IM000004.png  L3-L4  LFS3   250   300   310   360    576     576
9   IM000004.png  L3-L4  LFS3   250   300   310   360    576     576
10  IM000004.png  L3-L4  LFS3   250   300   310   360    576     576
11  IM000004.png  L3-L4  LFS3   250   300   310   360    576     576
12  IM000004.png  L3-L4  LFS3   250   300   310   360    576     576
13  IM000004.png  L3-L4  LFS3   25

### labeling

In [264]:

# old without consider the repetation of boxes

# import pandas as pd
# import json
# import os
# from collections import defaultdict

# # Load dataframe
# df = pd.read_csv("label.csv", sep=",")

# # Normalize center
# def compute_center(row):
#     x_center = (row["xmin"] + row["xmax"]) / 2 / row["width"]
#     y_center = (row["ymin"] + row["ymax"]) / 2 / row["height"]
#     return x_center, y_center

# # Track z count
# z_tracker = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))

# output_dir = r"D:\AISSLab\Code\3D CNN\datasets\label"
# os.makedirs(output_dir, exist_ok=True)

# patient_jsons = {}
# i = 0
# for patient_id, group in df.groupby("patient_ID"):
#     result = {
#         "L1-L2": {"left": None, "right": None},
#         "L2-L3": {"left": None, "right": None},
#         "L3-L4": {"left": None, "right": None},
#         "L4-L5": {"left": None, "right": None},
#         "L5-S1": {"left": None, "right": None},
#     }

#     repetation = [] 
#     for _, row in group.iterrows():


        
#         level = row["level"]
#         side = "left" if row["name"].startswith("LFS") else "right"
#         x, y = compute_center(row)
#         index_slice = row["filename"]

        

#         z= index_slice.replace("IM0000" , "")
#         if z[0] =="0" :
#             try:
#                 z=int(z.replace("0",""))
#             except:
#                 pass
#         else:
#             try:
#                  z=int(z)
#             except:
#                 pass


#         z = int(z) / 18
        
#         result[level][side] = [round(x, 4), round(y, 4), round(z, 4)]

#     patient_jsons[patient_id] = result

#     # Save per patient
#     file_path = os.path.join(output_dir, f"{patient_id}.json")
#     with open(file_path, "w") as f:
#         json.dump(result, f, indent=2)




In [15]:

# with consideration repetation 
import pandas as pd
import json
import os
from collections import defaultdict
from collections import Counter
# Load dataframe
df = pd.read_csv("label.csv", sep=",")

# Normalize center
def compute_center(row):
    x_center = (row["xmin"] + row["xmax"]) / 2 / row["width"]
    y_center = (row["ymin"] + row["ymax"]) / 2 / row["height"]
    return x_center, y_center

# Track z count
z_tracker = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))

output_dir = r"labelv1"
os.makedirs(output_dir, exist_ok=True)

patient_jsons = {}
i = 0
for patient_id, group in df.groupby("patient_ID"):
    result = {
        "L1-L2": {"left": None, "right": None},
        "L2-L3": {"left": None, "right": None},
        "L3-L4": {"left": None, "right": None},
        "L4-L5": {"left": None, "right": None},
        "L5-S1": {"left": None, "right": None},
    }

    repetation = [] 
    for _, row in group.iterrows():


        try:
        
            level = row["level"]
            side = "left" if row["name"].startswith("LFS") else "right"
            x, y = compute_center(row)
            index_slice = row["filename"]
            
            z= index_slice.replace("IM0000" , "")
            if z[0] =="0" :
                try:
                    z=int(z.replace("0",""))
                except:
                    pass
            else:
                try:
                    z=int(z)
                except:
                    pass

        
            z = int(z) / 18

            repetation.append([level, side ,index_slice, x, y , z])
            

            key_pairs = [(item[0], item[1]) for item in repetation]
            # Count repetitions
            counts = Counter(key_pairs)

            # Print repeated pairs
            for pair, count in counts.items():
                if count > 1:
                    target = pair
                    indices = [i for i, item in enumerate(repetation) if item[0] == target[0] and item[1] == target[1]]
                    # print(repetation)
                    repetation_times_1 = repetation[indices[0]][2]
                    repetation_times_2 = repetation[indices[1]][2]
                    count_1 = sum(1 for row in repetation if repetation_times_1 in row)
                    count_2 = sum(1 for row in repetation if repetation_times_2 in row)
                    if count_1 > count_2 : 
                        "delete list of two  "
                        del repetation[indices[1]]
                    else:
                        "delete repetation one "
                        del repetation[indices[0]]


            for level, side, _, x, y, z in repetation:
                if level in result and side in result[level]:
                    result[level][side] = (round(x,4), round(y,4),round( z, 4)  )

        except:
            pass

    # Save per patient
    file_path = os.path.join(output_dir, f"{patient_id}.json")
    with open(file_path, "w") as f:
        json.dump(result, f, indent=2)


