In [57]:
import pydicom
from pydicom.dataset import Dataset, FileDataset
import numpy as np
import datetime
from glob import glob
import os
import matplotlib.pyplot as plt
from tqdm import tqdm
import pandas as pd
def create_directory(path):
    if not os.path.exists(path):
        os.makedirs(path)

CT

In [None]:
class_list=['0100.Ischemic Stroke','0200.Hemorrhagic Stroke','0300.Stroke Control Group']
save_class_list=['Ischemic','Hemorrhagic','Normal']
file_path='../../data/stroke_2023/**/1.원천데이터/'
save_path='../../data/raw_12bit/CT/'
for i in range(len(class_list)):
    file_list=glob(f'{file_path}{class_list[i]}/*NCCT/*.dcm')
    for j in tqdm(range(len(file_list))):
        dicom_file=file_list[j]
        folder_split=os.path.basename(dicom_file).split('_')
        folder_name=folder_split[0]+'_'+folder_split[1]
        folder_path=save_path+save_class_list[i]+'/'+folder_name+'/'
        dcm = pydicom.dcmread(dicom_file)
        slope = dcm.RescaleSlope if 'RescaleSlope' in dcm else 1
        intercept = dcm.RescaleIntercept if 'RescaleIntercept' in dcm else 0
        img = dcm.pixel_array.astype(np.float32)
        hu = img * slope + intercept  # HU 변환
        hu_clipped = np.clip(hu, -1024, 3071)
        hu_normalized = (hu_clipped + 1024) / (4095)
        create_directory(folder_path)
        hu_normalized
        np.save(folder_path+os.path.splitext(folder_split[-1])[0]+'.npy',hu_normalized)


 10%|█         | 7613/73876 [04:03<36:16, 30.44it/s]  

MRI

In [None]:
class_list=['0100.Ischemic Stroke','0300.Stroke Control Group']
save_class_list=['Ischemic','Normal']
file_path = '../../data/stroke_2023/**/1.원천데이터/'  # MRI 경로 예시
save_path = '../../data/raw_12bit/MRI/'


In [None]:
# DataFrame 저장용 리스트
records = []

for i in range(len(class_list)):
    file_list = glob(f'{file_path}{class_list[i]}/*Diffusion/*DWI*.dcm')
    for j in tqdm(range(len(file_list))):
        dicom_file = file_list[j]
        folder_split = os.path.basename(dicom_file).split('_')
        folder_name = folder_split[0] + '_' + folder_split[1]
        folder_path = os.path.join(save_path, save_class_list[i], folder_name)
        create_directory(folder_path)

        # DICOM 읽기 및 z-score 정규화
        dcm = pydicom.dcmread(dicom_file)
        img = dcm.pixel_array.astype(np.float32)
        mean = np.mean(img)
        std = np.std(img)
        img_z = (img - mean) / (std + 1e-8)

        # 파일명
        base_filename = os.path.splitext(folder_split[-1])[0] + '.npy'
        np.save(os.path.join(folder_path, base_filename), img_z)

        # 기록 저장
        records.append({
            'data_folder': folder_name,
            'data_name': base_filename,
            'mean': round(mean, 4),
            'std': round(std, 4)
        })

# pandas DataFrame으로 저장
df = pd.DataFrame(records)
csv_save_path = os.path.join(save_path, 'mri_mean_std.csv')
df.to_csv(csv_save_path, index=False)


  2%|▏         | 1643/81624 [00:18<14:39, 90.90it/s] 


KeyboardInterrupt: 

In [17]:
img_int16 = img1.astype(np.int16)

# 저장할 경로
save_path = "output_hu.dcm"

# 새로운 DICOM 객체 생성
file_meta = pydicom.Dataset()
file_meta.MediaStorageSOPClassUID = pydicom.uid.generate_uid()
file_meta.MediaStorageSOPInstanceUID = pydicom.uid.generate_uid()
file_meta.ImplementationClassUID = pydicom.uid.PYDICOM_IMPLEMENTATION_UID

# 최소 헤더 세팅
ds = FileDataset(
    save_path, {}, file_meta=file_meta, preamble=b"\0" * 128
)

# 환자 및 기본 정보
ds.PatientName = "Anonymous"
ds.PatientID = "000000"
ds.StudyInstanceUID = pydicom.uid.generate_uid()
ds.SeriesInstanceUID = pydicom.uid.generate_uid()
ds.SOPInstanceUID = file_meta.MediaStorageSOPInstanceUID
ds.SOPClassUID = file_meta.MediaStorageSOPClassUID

# 날짜
dt = datetime.datetime.now()
ds.StudyDate = dt.strftime("%Y%m%d")
ds.StudyTime = dt.strftime("%H%M%S")

# 이미지 관련 속성
ds.Modality = "CT"
ds.Rows, ds.Columns = img_int16.shape
ds.BitsStored = 16
ds.BitsAllocated = 16
ds.HighBit = 15
ds.PixelRepresentation = 1  # signed int
ds.SamplesPerPixel = 1
ds.PhotometricInterpretation = "MONOCHROME2"

# HU로 변환된 상태이므로 slope=1, intercept=0
ds.RescaleIntercept = 0
ds.RescaleSlope = 1

# 실제 픽셀 데이터
ds.PixelData = img_int16.tobytes()

# 저장
ds.save_as(save_path)
print(f"HU 기반 DICOM 저장 완료: {save_path}")

HU 기반 DICOM 저장 완료: output_hu.dcm
