In [None]:
import os
import numpy as np
import pandas as pd
import pydicom
import cv2

def np_CountUpContinuingOnes(b_arr):
    left = np.arange(len(b_arr))
    left[b_arr > 0] = 0
    left = np.maximum.accumulate(left)

    rev_arr = b_arr[::-1]
    right = np.arange(len(rev_arr))
    right[rev_arr > 0] = 0
    right = np.maximum.accumulate(right)
    right = len(rev_arr) - 1 - right[::-1]

    return right - left - 1

def ExtractBreast(img):
    img_copy = img.copy()
    img = np.where(img <= 20, 0, img)
    height, _ = img.shape

    y_a = height // 2 + int(height * 0.4)
    y_b = height // 2 - int(height * 0.4)
    b_arr = img[y_b:y_a].std(axis=0) != 0
    continuing_ones = np_CountUpContinuingOnes(b_arr)
    col_ind = np.where(continuing_ones == continuing_ones.max())[0]
    img = img[:, col_ind]

    _, width = img.shape
    x_a = width // 2 + int(width * 0.4)
    x_b = width // 2 - int(width * 0.4)
    b_arr = img[:, x_b:x_a].std(axis=1) != 0
    continuing_ones = np_CountUpContinuingOnes(b_arr)
    row_ind = np.where(continuing_ones == continuing_ones.max())[0]

    return img_copy[row_ind][:, col_ind]

normal_cases_xlsx = '/Volumes/图图/BMCD/Normal_case.xlsx'
suspicious_cases_xlsx = '/Volumes/图图/BMCD/Suspicious_case.xlsx'
normal_cases_dir = '/Volumes/图图/BMCD/Dataset/Normal_cases'
suspicious_cases_dir = '/Volumes/图图/BMCD/Dataset/Suspicious_cases'
output_dir = '/Volumes/Newsmy/FM_dataset/BMCD'


normal_cases_df = pd.read_excel(normal_cases_xlsx)
suspicious_cases_df = pd.read_excel(suspicious_cases_xlsx)
normal_cases_df['case_type'] = 'Normal'
suspicious_cases_df['case_type'] = 'Suspicious'
combined_df = pd.concat([normal_cases_df, suspicious_cases_df], ignore_index=True)


all_images = []
for _, row in combined_df.iterrows():
    case_type = row['case_type']
    folder_name = str(int(row['Folder #']))
    case_dir = normal_cases_dir if case_type == 'Normal' else suspicious_cases_dir
    folder_path = os.path.join(case_dir, folder_name)
    
    for file_name in os.listdir(folder_path):
        if (file_name.endswith('.dcm') or file_name.endswith('.DCM')) and not file_name.startswith('._'):
            image_path = os.path.join(folder_path, file_name)
            all_images.append({
                'folder': folder_name,
                'case_type': case_type,
                'image_path': image_path,
                'laterality': row['Breast (Right/Left)'],
                'breast_density': row['BI-RADS categories for breast density'],
                'classification': row['BI-RADS categories for classification ']
            })


def process_and_save_image(row, output_base_dir):
    output_folder = os.path.join(
        output_base_dir,
        f"{row['case_type']}_{row['folder']}_{os.path.basename(row['image_path']).split('.')[0]}"
    )
    os.makedirs(output_folder, exist_ok=True)

    dicom_data = pydicom.dcmread(row['image_path'], force=True)
    image = dicom_data.pixel_array
    image = ExtractBreast(image)
    image = cv2.normalize(image, None, 0, 255, cv2.NORM_MINMAX)

    output_image_path = os.path.join(output_folder, 'img.jpg')
    cv2.imwrite(output_image_path, image)
    print(f"Saved: {output_image_path}")

for _, row in pd.DataFrame(all_images).iterrows():
    process_and_save_image(row, output_dir)
