In [None]:
import os
import pandas as pd
import numpy as np
import pydicom as pdcm
import cv2

def np_CountUpContinuingOnes(b_arr):
    left = np.arange(len(b_arr))
    left[b_arr > 0] = 0
    left = np.maximum.accumulate(left)
    rev_arr = b_arr[::-1]
    right = np.arange(len(rev_arr))
    right[rev_arr > 0] = 0
    right = np.maximum.accumulate(right)
    right = len(rev_arr) - 1 - right[::-1]
    return right - left - 1

def ExtractBreast(img):
    img_copy = img.copy()
    img = np.where(img <= 20, 0, img)
    height, _ = img.shape
    y_a = height // 2 + int(height * 0.4)
    y_b = height // 2 - int(height * 0.4)
    b_arr = img[y_b:y_a].std(axis=0) != 0
    continuing_ones = np_CountUpContinuingOnes(b_arr)
    col_ind = np.where(continuing_ones == continuing_ones.max())[0]
    img = img[:, col_ind]
    _, width = img.shape
    x_a = width // 2 + int(width * 0.4)
    x_b = width // 2 - int(width * 0.4)
    b_arr = img[:, x_b:x_a].std(axis=1) != 0
    continuing_ones = np_CountUpContinuingOnes(b_arr)
    row_ind = np.where(continuing_ones == continuing_ones.max())[0]
    return img_copy[row_ind][:, col_ind]

# Read XLS file
xls_path = '/Volumes/图图/INBreast/INbreast/INbreast.xls'
df = pd.read_excel(xls_path)

# Define input and output paths
DCM_PATH = "/Volumes/图图/INBreast/INbreast/AllDICOMs"
OUTPUT_BASE_PATH = "Benchmark/INbreast"

def process_and_save(df):
    for index, row in df.iterrows():
        file_name = str(row['File Name']).split('.')[0]

        # Read and process DICOM file
        dcm_path = os.path.join(DCM_PATH, file_name + '.dcm')
        if os.path.exists(dcm_path):
            dcm = pdcm.dcmread(dcm_path)
            img = dcm.pixel_array
            img = ExtractBreast(img)
            img = cv2.normalize(img, None, 0, 255, cv2.NORM_MINMAX).astype(np.uint8)

            # Save image and metadata
            img_output_path = os.path.join(OUTPUT_BASE_PATH, file_name)
            os.makedirs(img_output_path, exist_ok=True)
            jpg_path = os.path.join(img_output_path, 'img.jpg')
            cv2.imwrite(jpg_path, img)

            print(f"Processed {file_name}")
        else:
            print(f"DICOM file for {file_name} not found.")

# Process and save all data
process_and_save(df)