In [1]:
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
import pydicom
import imageio
import os
from glob import glob
from tqdm import tqdm
import json
def create_dir(path):
    if not os.path.exists(path):
        os.makedirs(path)

CT

In [2]:
class_list=['0100.Ischemic Stroke','0200.Hemorrhagic Stroke','0300.Stroke Control Group']
save_class_list=['Ischemic','Hemorrhagic','Normal']
data_dir='../../data/stroke_2023/**/1.원천데이터/'
save_dir='../../data/stroke_CT/'

In [3]:
for j in range(len(class_list)):
    data_list=glob(data_dir+class_list[j]+'/*NCCT/*NCCT*.dcm')
    for i in tqdm(range(len(data_list))):
        folder_split=os.path.basename(data_list[i]).split('_')
        folder_name=folder_split[0]+'_'+folder_split[1]
        folder_path=save_dir+save_class_list[j]+'/'+folder_name+'/'
        create_dir(folder_path)
        dicom_file=data_list[i]
        dcm = pydicom.dcmread(dicom_file)
        img = dcm.pixel_array.astype(np.float32)
        # HU 변환 적용
        slope = dcm.RescaleSlope if 'RescaleSlope' in dcm else 1
        intercept = dcm.RescaleIntercept if 'RescaleIntercept' in dcm else 0
        img = img * slope + intercept  # HU 변환
        # DICOM 헤더에서 Window Level과 Width 가져오기
        window_center = dcm.WindowCenter if hasattr(dcm, 'WindowCenter') else None
        window_width = dcm.WindowWidth if hasattr(dcm, 'WindowWidth') else None

        # Window Level/Width가 여러 개일 경우 첫 번째 값 사용
        if isinstance(window_center, pydicom.multival.MultiValue):
            window_center = window_center[0]
        if isinstance(window_width, pydicom.multival.MultiValue):
            window_width = window_width[0]

        if window_center is not None and window_width is not None:
            min_val = window_center - window_width / 2
            max_val = window_center + window_width / 2
            img = np.clip(img, min_val, max_val)  # 범위 내 값 유지
            img = (img - min_val) / (max_val - min_val) * 255  # 0~255 정규화
        else:
            # WL/WW 값이 없으면 기본적으로 min-max 정규화
            img = (img - img.min()) / (img.max() - img.min()) * 255

        Image.fromarray(img.astype(np.uint8)).save(folder_path+os.path.splitext(os.path.basename(dicom_file))[0].split('_')[-1]+'.png')

100%|██████████| 73876/73876 [25:54<00:00, 47.52it/s]  
100%|██████████| 56076/56076 [19:51<00:00, 47.08it/s]
100%|██████████| 29307/29307 [11:15<00:00, 43.42it/s]


MRI

In [None]:

save_class_list=['ADC','DWI']
data_dir='../../data/stroke_2023/**/1.원천데이터/'
save_dir='../../data/Ischemic_stroke_mri/image/'

In [None]:

for j in range(len(save_class_list)):
    data_list=glob(data_dir+'0100.Ischemic Stroke'+'/*MR Diffusion/*'+save_class_list[j]+'*.dcm')
    for i in tqdm(range(len(data_list))):
        folder_split=os.path.basename(data_list[i]).split('_')
        folder_name=folder_split[0]+'_'+folder_split[1]
        folder_path=save_dir+save_class_list[j]+'/'+folder_name+'/'
        create_dir(folder_path)
        dicom_file=data_list[i]
        dcm = pydicom.dcmread(dicom_file)
        img = dcm.pixel_array.astype(np.float32)
        # HU 변환 적용
        slope = dcm.RescaleSlope if 'RescaleSlope' in dcm else 1
        intercept = dcm.RescaleIntercept if 'RescaleIntercept' in dcm else 0
        img = img * slope + intercept  # HU 변환
        # DICOM 헤더에서 Window Level과 Width 가져오기
        window_center = dcm.WindowCenter if hasattr(dcm, 'WindowCenter') else None
        window_width = dcm.WindowWidth if hasattr(dcm, 'WindowWidth') else None

        # Window Level/Width가 여러 개일 경우 첫 번째 값 사용
        if isinstance(window_center, pydicom.multival.MultiValue):
            window_center = window_center[0]
        if isinstance(window_width, pydicom.multival.MultiValue):
            window_width = window_width[0]

        if window_center is not None and window_width is not None:
            min_val = window_center - window_width / 2
            max_val = window_center + window_width / 2
            img = np.clip(img, min_val, max_val)  # 범위 내 값 유지
            img = (img - min_val) / (max_val - min_val) * 255  # 0~255 정규화
        else:
            # WL/WW 값이 없으면 기본적으로 min-max 정규화
            img = (img - img.min()) / (img.max() - img.min()) * 255

        Image.fromarray(img.astype(np.uint8)).save(folder_path+os.path.splitext(os.path.basename(dicom_file))[0].split('_')[-1]+'.png')

In [None]:
save_class_list=['ADC','DWI']
data_dir='../../data/stroke_2023/**/2.라벨링데이터/'
save_dir='../../data/Ischemic_stroke_mri/label/'

In [22]:
for j in range(len(save_class_list)):
    data_list=glob(data_dir+'0100.Ischemic Stroke'+'/*MR Diffusion/*'+save_class_list[j]+'*.json')
    for i in tqdm(range(len(data_list))):
        with open(data_list[i], 'r') as f:
            json_data = json.load(f)
        label=np.array(json_data["Ischemic Core"])*255
        folder_split=os.path.basename(data_list[i]).split('_')
        folder_name=folder_split[0]+'_'+folder_split[1]
        folder_path=save_dir+save_class_list[j]+'/'+folder_name+'/'
        create_dir(folder_path)
        Image.fromarray(label.astype(np.uint8)).save(folder_path+os.path.splitext(os.path.basename(data_list[i]))[0].split('_')[-1]+'.png')

100%|██████████| 81618/81618 [22:58<00:00, 59.22it/s] 
100%|██████████| 81624/81624 [22:04<00:00, 61.62it/s] 
