In [3]:
import pandas as pd
import numpy as np
from pydicom import dcmread

In [4]:
coords_dataset = pd.read_csv('../../data/lumbar-coordinate-pretraining-dataset/coords_rsna_improved.csv')

In [5]:
coords_dataset

Unnamed: 0.1,Unnamed: 0,series_id,relative_x,relative_y,condition,side,instance_number,study_id,level
0,0,10996,0.521148,0.325282,Left Neural Foraminal Narrowing,L,13,3996069892,L1/L2
1,1,10996,0.516856,0.319701,Right Neural Foraminal Narrowing,R,6,3996069892,L1/L2
2,2,10996,0.493773,0.400966,Left Neural Foraminal Narrowing,L,12,3996069892,L2/L3
3,3,10996,0.504032,0.385531,Right Neural Foraminal Narrowing,R,6,3996069892,L2/L3
4,4,10996,0.487331,0.475040,Left Neural Foraminal Narrowing,L,12,3996069892,L3/L4
...,...,...,...,...,...,...,...,...,...
58730,58730,4294540297,0.511282,0.571751,Spinal Canal Stenosis,R,10,916362094,L3/L4
58731,58731,4294540297,0.308594,0.667969,Spinal Canal Stenosis,L,-1,916362094,L4/L5
58732,58732,4294540297,0.516697,0.671029,Spinal Canal Stenosis,R,9,916362094,L4/L5
58733,58733,4294540297,0.328125,0.796875,Spinal Canal Stenosis,L,-1,916362094,L5/S1


In [6]:
center_points_dataset = coords_dataset[
    (coords_dataset["condition"] == "Spinal Canal Stenosis") & (coords_dataset["instance_number"] != -1)]
center_points_dataset

Unnamed: 0.1,Unnamed: 0,series_id,relative_x,relative_y,condition,side,instance_number,study_id,level
21,21,370109,0.564182,0.282331,Spinal Canal Stenosis,R,9,1217477368,L1/L2
23,23,370109,0.546010,0.413573,Spinal Canal Stenosis,R,9,1217477368,L2/L3
25,25,370109,0.528847,0.536739,Spinal Canal Stenosis,R,9,1217477368,L3/L4
27,27,370109,0.541972,0.633656,Spinal Canal Stenosis,R,9,1217477368,L4/L5
29,29,370109,0.557115,0.732592,Spinal Canal Stenosis,R,9,1217477368,L5/S1
...,...,...,...,...,...,...,...,...,...
58726,58726,4294540297,0.567238,0.355144,Spinal Canal Stenosis,R,11,916362094,L1/L2
58728,58728,4294540297,0.534747,0.467058,Spinal Canal Stenosis,R,10,916362094,L2/L3
58730,58730,4294540297,0.511282,0.571751,Spinal Canal Stenosis,R,10,916362094,L3/L4
58732,58732,4294540297,0.516697,0.671029,Spinal Canal Stenosis,R,9,916362094,L4/L5


In [7]:
def convert_coords_to_patient(relative_x, relative_y, dicom_slice):
    x = dicom_slice.pixel_array.shape[0] * relative_x
    y = dicom_slice.pixel_array.shape[1] * relative_y
    
    # transform_matrix_factor = np.matrix(
    #     [[0, 1, 0, 0],
    #      [1, 0, 0, 0],
    #      [0, 0, 1, 0],
    #      [0, 0, 0, 1]]
    # )
        
    dX, dY = dicom_slice.PixelSpacing
    
    X = np.array(list(dicom_slice.ImageOrientationPatient[:3]) + [0]) * dY
    Y = np.array(list(dicom_slice.ImageOrientationPatient[3:]) + [0]) * dX

    S = np.array(list(dicom_slice.ImagePositionPatient) + [1])

    transform_matrix = np.array([Y, X, np.zeros(len(X)), S]).T
    # transform_matrix = transform_matrix @ transform_matrix_factor

    return (transform_matrix @ np.array([y, x, 0, 1]).T)

In [8]:
train_images_basepath = "../../data/rsna-2024-lumbar-spine-degenerative-classification/train_images"

patient_coords_dict = {
    "study_id": [],
    "level": [],
    "x": [],
    "y": [],
    "z": []
}

for index, group in center_points_dataset.groupby("study_id"):
    for row_index, row in group.iterrows():
        dicom_slice_path = f"{train_images_basepath}/{row['study_id']}/{row['series_id']}/{row['instance_number']}.dcm"
        dicom_slice = dcmread(dicom_slice_path)
        coords = convert_coords_to_patient(row['relative_x'], row['relative_y'], dicom_slice)
        
        patient_coords_dict["study_id"].append(row['study_id'])
        patient_coords_dict["level"].append(row['level'])
        patient_coords_dict["x"].append(coords[0])
        patient_coords_dict["y"].append(coords[1])
        patient_coords_dict["z"].append(coords[2])
    
patient_coords = pd.DataFrame.from_dict(patient_coords_dict)
patient_coords

Unnamed: 0,study_id,level,x,y,z
0,4003253,L1/L2,2.281485,74.681099,-389.679551
1,4003253,L2/L3,2.503593,73.621785,-421.436447
2,4003253,L3/L4,2.750310,74.774676,-457.109285
3,4003253,L4/L5,2.924442,80.522574,-483.128617
4,4003253,L5/S1,3.098990,89.018362,-509.676647
...,...,...,...,...,...
9865,4290709089,L1/L2,10.407222,74.543790,-347.122554
9866,4290709089,L2/L3,9.814853,64.679538,-380.458776
9867,4290709089,L3/L4,9.187656,61.836557,-413.573176
9868,4290709089,L4/L5,8.589401,67.148507,-442.856477


In [9]:
patient_coords.to_csv('../../data/lumbar-coordinate-pretraining-dataset/coords_3d.csv', index=False)

In [21]:
patient_bounding_boxes_dict = {
    "study_id": [],
    "level": [],
    "x_min": [],
    "y_min": [],
    "z_min": [],
    "x_max": [],
    "y_max": [],
    "z_max": [],
}

for index, group in patient_coords.groupby("study_id"):
    ordered_group = group.sort_values(by="level", ascending=True)
    if len(ordered_group) != 5:
        continue
    for level_index in range(5):
        patient_bounding_boxes_dict["study_id"].append(group['study_id'].iloc[0])
        patient_bounding_boxes_dict["level"].append(group['level'].iloc[level_index])

        if level_index == 0:
            dist = abs(group['z'].iloc[level_index + 1] - group['z'].iloc[level_index])
        elif level_index == 4:
            dist = abs(group['z'].iloc[level_index] - group['z'].iloc[level_index - 1])
        else:
            dist = abs(group['z'].iloc[level_index] - group['z'].iloc[level_index - 1])
        
        x_min = group['x'].iloc[level_index] - dist / 3 * 6
        y_min = group['y'].iloc[level_index] - dist / 3 * 6
        z_min = group['z'].iloc[level_index] - dist / 3 * (3 if level_index == 4 else 1)
        
        if level_index == 4:
            dist = abs(group['z'].iloc[level_index] - group['z'].iloc[level_index - 1])
        else:
            dist = abs(group['z'].iloc[level_index + 1] - group['z'].iloc[level_index])
        
        
        x_max = group['x'].iloc[level_index] + dist / 3 * 6
        y_max = group['y'].iloc[level_index] + dist / 3 * 6
        z_max = group['z'].iloc[level_index] + dist / 3 
         
        
        patient_bounding_boxes_dict["x_min"].append(x_min)
        patient_bounding_boxes_dict["y_min"].append(y_min)
        patient_bounding_boxes_dict["z_min"].append(z_min)
        patient_bounding_boxes_dict["x_max"].append(x_max)
        patient_bounding_boxes_dict["y_max"].append(y_max)
        patient_bounding_boxes_dict["z_max"].append(z_max)

patient_bounding_boxes = pd.DataFrame.from_dict(patient_bounding_boxes_dict)
patient_bounding_boxes

Unnamed: 0,study_id,level,x_min,y_min,z_min,x_max,y_max,z_max
0,4003253,L1/L2,-61.232307,11.167308,-400.265183,65.795276,138.194891,-379.093919
1,4003253,L2/L3,-61.010199,10.107994,-432.022079,73.849269,144.967462,-409.545501
2,4003253,L3/L4,-68.595366,3.429000,-469.000231,54.788974,126.813340,-448.436174
3,4003253,L4/L5,-49.114223,28.483910,-491.801728,56.020502,133.618634,-474.279274
4,4003253,L5/S1,-49.997070,35.922302,-536.224677,56.195050,142.114422,-500.827304
...,...,...,...,...,...,...,...,...
9865,4290709089,L1/L2,-56.265220,7.871348,-358.234628,77.079664,141.216232,-336.010481
9866,4290709089,L2/L3,-56.857589,-1.992904,-391.570849,76.043655,130.908340,-369.420642
9867,4290709089,L3/L4,-57.041146,-4.392245,-424.611310,67.754256,120.403158,-403.812076
9868,4290709089,L4/L5,-49.977199,8.581907,-452.617577,61.993295,120.552402,-433.955828


In [22]:
patient_bounding_boxes.to_csv('../../data/lumbar-coordinate-pretraining-dataset/bounding_boxes_3d.csv', index=False)