In [None]:
import os
import numpy as np
import cv2
import pydicom
from pydicom.pixel_data_handlers.util import apply_voi_lut

def fit_image(fname):
    """Process DICOM image to extract breast region"""
    dicom = pydicom.dcmread(fname)
    if 'PixelData' not in dicom:
        print(f"No PixelData found in DICOM file: {fname}")
        return None
    
    X = apply_voi_lut(dicom.pixel_array, dicom, prefer_lut=False)
    X = (X - X.min()) / (X.max() - X.min())
    
    if dicom.PhotometricInterpretation == "MONOCHROME1":
        X = 1 - X
    
    X = X * 255
    
    # Remove narrow exterior frames
    X = X[10:-10, 10:-10]
    
    # Find breast region using connected components
    output = cv2.connectedComponentsWithStats((X > 20).astype(np.uint8), 8, cv2.CV_32S)
    stats = output[2]
    
    # Find largest region (breast)
    idx = stats[1:, 4].argmax() + 1
    x1, y1, w, h = stats[idx][:4]
    x2 = x1 + w
    y2 = y1 + h
    
    return X[y1:y2, x1:x2]

def process_png_to_jpg(png_path, output_dir):
    """Convert PNG to processed JPG and save to output directory"""
    # Read PNG file
    img = cv2.imread(png_path, cv2.IMREAD_GRAYSCALE)
    if img is None:
        print(f"Failed to read PNG file: {png_path}")
        return False
    
    # Create output directory
    os.makedirs(output_dir, exist_ok=True)
    
    # Save as JPG
    output_path = os.path.join(output_dir, 'img.jpg')
    cv2.imwrite(output_path, img)
    return True

# Directory paths
output_base_dir = 'Benchmark/RSNA'
train_image_base_path = '/Volumes/Newsmy/rsna-breast-cancer-detection/train_images'

# Process all PNG files in train_images directory
for root, dirs, files in os.walk(train_image_base_path):
    for file in files:
        if file.lower().endswith('.png'):
            # Get patient_id from directory name
            patient_id = os.path.basename(root)
            image_id = os.path.splitext(file)[0]
            
            # Construct paths
            png_path = os.path.join(root, file)
            output_folder = os.path.join(output_base_dir, f"{patient_id}_{image_id}_img")
            
            # Process and save image
            if process_png_to_jpg(png_path, output_folder):
                print(f"Processed {patient_id}/{image_id} to {output_folder}")
            else:
                print(f"Failed to process {png_path}")

print("PNG to JPG conversion complete.")

  predata_df = pd.read_csv(predata_path)


No PixelData found in DICOM file: /Volumes/Newsmy/rsna-breast-cancer-detection/train_images/27328/1289476237.dcm
Skipping /Volumes/Newsmy/rsna-breast-cancer-detection/train_images/27328/1289476237.dcm due to processing issue.
处理完毕。
