In [1]:
ROOT_DIR = "/workspaces/RSNA_LSDC/inputs/rsna-2024-lumbar-spine-degenerative-classification"
TRAIN_DIR = f'{ROOT_DIR}/train_images'

In [20]:
import pandas as pd
import numpy as np
import os
import cv2
import dicomsdl as dicoml
import tqdm.notebook as tqdm
from joblib import Parallel, delayed

In [3]:
tsd = pd.read_csv(f'{ROOT_DIR}/train_series_descriptions.csv')
tsd.head(10)

Unnamed: 0,study_id,series_id,series_description
0,4003253,702807833,Sagittal T2/STIR
1,4003253,1054713880,Sagittal T1
2,4003253,2448190387,Axial T2
3,4646740,3201256954,Axial T2
4,4646740,3486248476,Sagittal T1
5,4646740,3666319702,Sagittal T2/STIR
6,7143189,132939515,Sagittal T2/STIR
7,7143189,1951927562,Axial T2
8,7143189,3219733239,Sagittal T1
9,8785691,481125819,Sagittal T2/STIR


In [4]:
# Loading dicom images 
# load image with discosdl
def load_image(filepath:str):
    dataset = dicoml.open(filepath)
    img = dataset.pixelData()

    if dataset["PhotometricInterpretation"] == "MONOCHROME1":
        img = np.amax(img) - img

    if np.max(img) != 0:
        img = img / np.max(img)
    img=(img * 255).astype(np.uint8)

    return img

In [26]:
def prepare_sample(row, png_datapath):
    directory = f"{TRAIN_DIR}/{row['study_id']}/{row['series_id']}"
    png_dir = f"{png_datapath}/{row['study_id']}/{row['series_id']}"
    os.makedirs(png_dir, exist_ok=True)

    for file in os.listdir(directory):
        img = load_image(os.path.join(directory,file))
        png_path = os.path.join(png_dir, file.replace(os.path.splitext(file)[-1].lower(), '.png'))
        cv2.imwrite(png_path, img)

In [None]:
process_parallel = True
if process_parallel:
    #Parallel dataset creation
    png_dataset_path = "/workspaces/RSNA_LSDC/inputs/dataset"
    Parallel(n_jobs=24)(
        delayed(prepare_sample)(row, png_dataset_path)
        for ind, row in tsd.iterrows()
    )
else:
    png_dataset_path = "/workspaces/RSNA_LSDC/inputs/dataset"
    pbar = tqdm.tqdm(total=len(tsd.index))
    for index, row in tsd.iterrows():
        prepare_sample(row, png_dataset_path)
        pbar.update(1)