In [1]:
import os
import pydicom
import numpy as np
import cv2
from tqdm import tqdm
from pydicom.pixel_data_handlers.util import apply_windowing
import pandas as pd


def is_dicom_file(filename):

    #判断某文件是否是dicom格式的文件
    file_stream = open(filename, 'rb')
    file_stream.seek(128)
    data = file_stream.read(4)
    file_stream.close()
    if data == b'DICM':
        return True
    return False
  
def linear_reflect(dcm_array,dcm_data):
  slope = dcm_data.RescaleSlope
  intercept = dcm_data.RescaleIntercept
  
  # set wc and ww by self wc(-450~-600) ww(1500,2000)
  wc = dcm_data.WindowCenter
  ww = dcm_data.WindowWidth

  lowest = wc-ww/2
  highest = wc+ww/2

  
  dcm_array = dcm_array*slope+intercept
  
  normalized_array = (dcm_array-dcm_array.min())/(dcm_array.max()-dcm_array.min())
  
  lowest = (lowest-dcm_array.min())/(dcm_array.max()-dcm_array.min())
  highest = (highest-dcm_array.min())/(dcm_array.max()-dcm_array.min())
  
  # Directly set values outside the window to 0 (or desired background value)
  normalized_array[normalized_array < lowest] = 0  # Set to black
  normalized_array[normalized_array > highest] = 1  # Set to white (or desired max value)
  
  normalized_array = normalized_array*255
  
  return normalized_array
  
def reflection(dcm_array,dcm_data,key):
  if key==1:
    windowed_array = apply_windowing(dcm_array,dcm_data)
    windowed_array = windowed_array-np.min(windowed_array)
    windowed_array = windowed_array/np.max(windowed_array)*255
    return windowed_array
  elif key==2:
    return linear_reflect(dcm_array,dcm_data)

def save_as_png(png_dic,dcm_root,array):
  dcm_name = os.path.basename(dcm_root)
  name,ext = os.path.splitext(dcm_name)
  if not os.path.exists(png_dic):
    os.makedirs(png_dic)
  cv2.imwrite(png_dic+'/'+name+".png",array)
  return png_dic+'/'+name+".png"
  
def translate_single_dicom(dcm_root,png_root):
  if not is_dicom_file(dcm_root):
    return '-1'
  
  # Read the DICOM file
  dcm_data = pydicom.read_file(dcm_root)
  dcm_array = dcm_data.pixel_array
  array = reflection(dcm_array,dcm_data,2)
  return save_as_png(png_root,dcm_root,array)

In [2]:
if __name__ == "__main__":
  csv_root = "../datafile.csv"

  df = pd.read_csv("../datafile.csv")
  df['Png']='-1'
  for index, row in tqdm(df.iterrows()):
    dcm_root = row['target']
    png_dir = dcm_root.replace("Data", "Png")
    png_dir = os.path.dirname(png_dir)
    png_root = translate_single_dicom(dcm_root, png_dir)
    row['Png'] = png_root
    df.iloc[index] = row
  # Save the modified DataFrame to CSV (assuming 'csv_root' is defined)
  df.to_csv(csv_root, index=False)


0it [00:00, ?it/s]

135it [00:01, 80.96it/s]


In [2]:
print(dcm_data)

Dataset.file_meta -------------------------------
(0002, 0000) File Meta Information Group Length  UL: 194
(0002, 0001) File Meta Information Version       OB: b'\x00\x01'
(0002, 0002) Media Storage SOP Class UID         UI: CT Image Storage
(0002, 0003) Media Storage SOP Instance UID      UI: 1.3.6.1.4.1.14519.5.2.1.6279.6001.189873581367743509083304574705
(0002, 0010) Transfer Syntax UID                 UI: Explicit VR Little Endian
(0002, 0012) Implementation Class UID            UI: 1.2.40.0.13.1.1
(0002, 0013) Implementation Version Name         SH: 'dcm4che-1.4.27'
-------------------------------------------------
(0008, 0005) Specific Character Set              CS: 'ISO_IR 100'
(0008, 0008) Image Type                          CS: ['ORIGINAL', 'PRIMARY', 'AXIAL']
(0008, 0016) SOP Class UID                       UI: CT Image Storage
(0008, 0018) SOP Instance UID                    UI: 1.3.6.1.4.1.14519.5.2.1.6279.6001.189873581367743509083304574705
(0008, 0020) Study Date         

*****
Above is the best way to reveal the image

In [54]:
# Extract the pixel array
pixel_array = dcm_data.pixel_array

# Get slope and intercept values (if available)
# These are typically used for HU to intensity conversion
try:
  slope = dcm_data.RescaleSlope
  intercept = dcm_data.RescaleIntercept
except AttributeError:
  print("Warning: RescaleSlope or RescaleIntercept not found in DICOM data. Using default values.")
  slope = 1  # Default assumption
  intercept = 0  # Default assumption


In [59]:
print(slope,intercept)

1 -1024


In [56]:
intensity_array = pixel_array * slope + intercept

In [57]:
import numpy as np
# Minimum and maximum intensity values (consider outliers)
min_intensity = np.min(intensity_array)
max_intensity = np.max(intensity_array)

# Handle potential zero denominator (if all pixels have the same value)
if min_intensity == max_intensity:
  grayscale_array = np.full_like(intensity_array, 128, dtype=np.uint8)  # Set all to mid-gray
else:
  # Linear rescaling to 0-255 range
  grayscale_array = (intensity_array - min_intensity) / (max_intensity - min_intensity) * 255
  grayscale_array = grayscale_array.astype(np.uint8)  # Convert to uint8 for grayscale image


In [58]:
import cv2
cv2.imwrite("../Png/test.png",grayscale_array)

True

In [52]:
import os
import SimpleITK
import pydicom
import numpy as np
import cv2
from tqdm import tqdm
from pydicom.pixel_data_handlers.util import apply_voi_lut

def is_dicom_file(filename):

    #判断某文件是否是dicom格式的文件
    file_stream = open(filename, 'rb')
    file_stream.seek(128)
    data = file_stream.read(4)
    file_stream.close()
    if data == b'DICM':
        return True
    return False

def load_patient(src_dir):
    '''
    读取某文件夹内的所有dicom文件
    :param src_dir: dicom文件夹路径
    :return: dicom list
    '''
    files = os.listdir(src_dir)
    slices = []
    for s in files:
        if is_dicom_file(src_dir + '/' + s):
            instance = pydicom.read_file(src_dir + '/' + s)
            slices.append(instance)

    try:
        slice_thickness = np.abs(slices[0].ImagePositionPatient[2] - slices[1].ImagePositionPatient[2])
    except:
        slice_thickness = np.abs(slices[0].SliceLocation - slices[1].SliceLocation)

    for s in slices:
        # s.pixel_array = apply_voi_lut(s.pixel_array,s)
        s.SliceThickness = slice_thickness
    return slices

def get_pixels_hu_by_simpleitk(dicom_dir):
    '''
        读取某文件夹内的所有dicom文件
    :param src_dir: dicom文件夹路径
    :return: image array
    '''
    reader = SimpleITK.ImageSeriesReader()
    dicom_names = reader.GetGDCMSeriesFileNames(dicom_dir)
    reader.SetFileNames(dicom_names)
    image = reader.Execute()
    img_array = SimpleITK.GetArrayFromImage(image)
    img_array[img_array == -2000] = 0
    return img_array
if __name__ == '__main__':
	#dicom文件目录
    dicom_dir = '../Data\\LIDC-IDRI-0001\\1.3.6.1.4.1.14519.5.2.1.6279.6001.298806137288633453246975630178\\000000'
    # 读取dicom文件的元数据(dicom tags)
    # slices = load_patient(dicom_dir)
    # print('The number of dicom files : ', len(slices))
    # 提取dicom文件中的像素值
    image = get_pixels_hu_by_simpleitk(dicom_dir)
    for i in tqdm(range(image.shape[0])):
    	#输出png文件目录
        img_path = "../Png/img_" + str(i).rjust(4, '0') + ".png"
        org_img =image[i]*20
        # 保存图像数组
        cv2.imwrite(img_path, org_img )

100%|██████████| 133/133 [00:00<00:00, 593.37it/s]


In [None]:

if __name__ == '__main__':
	#dicom文件目录
    dicom_dir = 'dcm/'
    # 读取dicom文件的元数据(dicom tags)
    slices = load_patient(dicom_dir)
    print('The number of dicom files : ', len(slices))
    # 提取dicom文件中的像素值
    image = get_pixels_hu_by_simpleitk(dicom_dir)
    for i in tqdm(range(image.shape[0])):
    	#输出png文件目录
        img_path = "dcm_2_png/img_" + str(i).rjust(4, '0') + "_i.png"
        org_img =image[i]*20
        # 保存图像数组
        cv2.imwrite(img_path, org_img )



In [3]:
import os
import SimpleITK
import pydicom
import numpy as np
import cv2
def read_single_dcm(dcm_root, png_root):
    """
    Read a single DICOM file and convert it to a PNG image, saving it to the specified PNG root directory.

    Args:
        dcm_root (str): Path to the DICOM file.
        png_root (str): Path to the directory where the converted PNG image should be saved.

    Returns:
        None
    """

    if not os.path.exists(png_root):
        os.makedirs(png_root)

    # Read the DICOM file
    dcm_data = pydicom.read_file(dcm_root)

    # Extract pixel data
    pixel_data = dcm_data.pixel_array

    # Rescale pixel data to 0-255 range
    if dcm_data.PhotometricInterpretation == 'Monochrome2':
        pixel_data = (pixel_data - pixel_data.min()) / (pixel_data.max() - pixel_data.min()) * 255
    else:
        pixel_data = pixel_data.astype(np.uint8)

    # Convert to grayscale image if needed
    if dcm_data.SamplesPerPixel == 3:
        pixel_data = cv2.cvtColor(pixel_data, cv2.COLOR_BGR2GRAY)

    # Get filename without extension
    filename, ext = os.path.splitext(os.path.basename(dcm_root))

    # Save the image as PNG
    cv2.imwrite(os.path.join(png_root, filename + '.png'), pixel_data)



In [8]:
dcm_root = '..\\Data\\LIDC-IDRI-0001\\1.3.6.1.4.1.14519.5.2.1.6279.6001.298806137288633453246975630178\\000000\\000069.dcm'
png_root = '../Png'
if is_dicom_file(dcm_root):
    print("this is a standard dcmi file!")
else:
    print("this not a dcmi file!")
# read_single_dcm(dcm_root,png_root)
dcm_data = pydicom.read_file(dcm_root)


this is a standard dcmi file!


In [14]:
# print(dcm_data)
# from pydicom import dcmread
from pydicom.pixel_data_handlers.util import apply_voi_lut

# ds = dcmread(file)
if 'WindowWidth' in dcm_data:
    print('Dataset has windowing')

windowed = apply_voi_lut(dcm_data.pixel_array, dcm_data)

Dataset has windowing


In [23]:
dcm_data.pixel_array.min(),dcm_data.pixel_array.max()

(-1024, 2378)

In [24]:
window_center = dcm_data['WindowCenter'].value
window_width = dcm_data['WindowWidth'].value
print(f"Window Center: {window_center}, Window Width: {window_width}")

Window Center: -600, Window Width: 1600


In [22]:
windowed.min(),windowed.max()


(-18381.643527204502, 31743.0)

In [18]:
print(windowed.shape)

(512, 512)


In [60]:
def linear_reflection(dcm_data):
  """
  Performs linear reflection on a DICOM image and returns the reflected pixel array.

  Args:
      dcm_data (pydicom.Dataset): The DICOM data object.

  Returns:
      np.ndarray: The reflected image data as a NumPy array.
  """
  # Get slope and intercept values (if available)
# These are typically used for HU to intensity conversion
  try:
    slope = dcm_data.RescaleSlope
    intercept = dcm_data.RescaleIntercept
  except AttributeError:
    print("Warning: RescaleSlope or RescaleIntercept not found in DICOM data. Using default values.")
    slope = 1  # Default assumption
    intercept = 0  # Default assumption

  window_center = dcm_data['WindowCenter'].value
  window_width = dcm_data['WindowWidth'].value

  dcm_array = dcm_data.pixel_array
  a = (256/window_width)*slope
  b = 128-(256*window_center)/window_width+a*intercept


  # Apply reflection (assuming grayscale image data)
  reflected_array = dcm_array.copy()  # Create a copy to avoid modifying original data
  reflected_array = np.vectorize(reflected_linear)(a, b, dcm_array)

  return reflected_array

def reflected_linear(a,b,x):
    return a*x+b


In [68]:
def read_single_dcm(dcm_root, png_root):
  """
  Read a single DICOM file, convert it to a PNG image with linear reflection, 
  and save it to the specified PNG root directory.

  Args:
      dcm_root (str): Path to the DICOM file.
      png_root (str): Path to the directory where the converted PNG image should be saved.

  Returns:
      None
  """

  if not os.path.exists(png_root):
    os.makedirs(png_root)

  # Check if the file is a DICOM using your custom function
  if not is_dicom_file(dcm_root):
    print(f"Error: '{dcm_root}' is not a DICOM file. Skipping.")
    return

  # Read the DICOM file
  try:
    dcm_data = pydicom.read_file(dcm_root)
  except FileNotFoundError:
    print(f"Error: DICOM file '{dcm_root}' not found. Skipping.")
    return
  
  try:
    slope = dcm_data.RescaleSlope
    intercept = dcm_data.RescaleIntercept
  except AttributeError:
    print("Warning: RescaleSlope or RescaleIntercept not found in DICOM data. Using default values.")
    slope = 1  # Default assumption
    intercept = 0  # Default assumption
  
  print(slope,intercept)
  
  data_array = dcm_data.pixel_array
  data_array = data_array * slope + intercept
  
  # Apply linear reflection (assuming grayscale image data)
  # reflected_array = linear_reflection(dcm_data)
  reflected_array = apply_voi_lut(data_array,dcm_data)

  reflected_array = reflected_array.astype(np.uint8)  # Convert to uint8 for color data

  # Get filename without extension
  filename, _ = os.path.splitext(os.path.basename(dcm_root))

  # Save the image as PNG
  cv2.imwrite(os.path.join(png_root, filename + '.png'), reflected_array)


In [69]:
dcm_root = '..\\Data\\LIDC-IDRI-0001\\1.3.6.1.4.1.14519.5.2.1.6279.6001.298806137288633453246975630178\\000000\\000069.dcm'
png_root = '../Png'

read_single_dcm(dcm_root,png_root)

1 -1024


In [62]:
# 应用VOI LUT（如果存在）
ds = pydicom.read_file(dcm_root)
if hasattr(ds, 'VOILUTSequence'):
    data = pydicom.apply_voi_lut(ds.pixel_array, ds)
    print("apply voi")
else:
    data = ds.pixel_array

# 归一化像素值到0-255范围
data = data - np.min(data)
data = (data / np.max(data) * 255).astype(np.uint8)


In [63]:
cv2.imwrite('../Png/test.png',data)

True

In [71]:
import SimpleITK as sitk

# Path to the DICOM file
dicom_file_path = dcm_root
import SimpleITK as sitk

# Create an ImageReader object
reader = sitk.ImageFileReader()

# Set the DICOM file path to the reader
reader.SetFileName(dicom_file_path)

# Read the DICOM image (using a compatible method)
image = reader.Execute()  # This method is available in most SimpleITK versions

# Get the pixel array from the image
img_array = sitk.GetArrayFromImage(image)

# Save the image as a desired format (e.g., PNG, JPEG)
sitk.WriteImage(sitk.Cast(image, sitk.sitkFloat32), "output_image.png")  # Replace 'output_image.png' with your desired output path and format


RuntimeError: Exception thrown in SimpleITK ImageFileWriter_Execute: D:\a\1\sitk-build\ITK\Modules\IO\PNG\src\itkPNGImageIO.cxx:542:
PNG supports unsigned char and unsigned short

In [72]:
file = sitk.ReadImage(dcm_root)
pixel_array = sitk.GetArrayFromImage(file)


In [76]:
for i in tqdm(range(pixel_array.shape[0])):
    	#输出png文件目录
        img_path = "./img_" + str(i).rjust(4, '0') + ".png"
        org_img =pixel_array[i]
        # 保存图像数组
        cv2.imwrite(img_path, org_img )

100%|██████████| 1/1 [00:00<00:00, 499.86it/s]
