In [3]:
import pandas as pd
import numpy as np
import os
from PIL import Image

def np_CountUpContinuingOnes(b_arr):
    left = np.arange(len(b_arr))
    left[b_arr > 0] = 0
    left = np.maximum.accumulate(left)

    rev_arr = b_arr[::-1]
    right = np.arange(len(rev_arr))
    right[rev_arr > 0] = 0
    right = np.maximum.accumulate(right)
    right = len(rev_arr) - 1 - right[::-1]

    return right - left - 1

def ExtractBreast(img):
    img_copy = img.copy()
    img = np.where(img <= 40, 0, img)
    height, _ = img.shape

    y_a = height // 2 + int(height * 0.4)
    y_b = height // 2 - int(height * 0.4)
    b_arr = img[y_b:y_a].std(axis=0) != 0
    continuing_ones = np_CountUpContinuingOnes(b_arr)
    col_ind = np.where(continuing_ones == continuing_ones.max())[0]
    img = img[:, col_ind]

    _, width = img.shape
    x_a = width // 2 + int(width * 0.4)
    x_b = width // 2 - int(width * 0.4)
    b_arr = img[:, x_b:x_a].std(axis=1) != 0
    continuing_ones = np_CountUpContinuingOnes(b_arr)
    row_ind = np.where(continuing_ones == continuing_ones.max())[0]

    return img_copy[row_ind][:, col_ind]

# 定义路径
train_csv_file = '/Volumes/图图/CSAW-M/labels/CSAW-M_train.csv'
test_csv_file = '/Volumes/图图/CSAW-M/labels/CSAW-M_test.csv'
image_dir = '/Volumes/图图/CSAW-M/images/preprocessed'
output_base_dir = '/Volumes/图图/CSAW-M/image-classification'  # 保存处理后的图像和文件的新文件夹路径

def process_csv(csv_file, image_subfolder, output_subfolder):
    df = pd.read_csv(csv_file, delimiter=';')
    
    for index, row in df.iterrows():
        filename = row['Filename']
        label = row['Label']
        dicom_image_laterality = row['Dicom_image_laterality']
        
        # 构建图像路径
        image_path = os.path.join(image_dir, image_subfolder, filename)
        
        # 打开图像并处理
        image = Image.open(image_path)
        image_array = np.array(image)
        image = ExtractBreast(image_array)
        
        # 保存预处理后的图像到新的文件夹
        output_dir = os.path.join(output_base_dir, output_subfolder, filename.split('.')[0])
        os.makedirs(output_dir, exist_ok=True)
        output_image_path = os.path.join(output_dir, 'img.jpg')
        Image.fromarray(image).save(output_image_path)
        
        # 保存Label和Dicom_image_laterality到info_dict.npy
        info_dict = {
            'Label': label,
            'Dicom_image_laterality': dicom_image_laterality
        }
        np.save(os.path.join(output_dir, 'info_dict.npy'), info_dict)
        print(f'{output_image_path} has been saved')

    print(f"{output_subfolder} 文件夹处理完成并保存文件。")

# 处理train和test数据，并保存到新的文件夹中
process_csv(train_csv_file, 'train', 'Train')
process_csv(test_csv_file, 'test', 'Test')


/Volumes/图图/CSAW-M/image-classification/Train/train_1/img.jpg has been saved
/Volumes/图图/CSAW-M/image-classification/Train/train_2/img.jpg has been saved
/Volumes/图图/CSAW-M/image-classification/Train/train_3/img.jpg has been saved
/Volumes/图图/CSAW-M/image-classification/Train/train_4/img.jpg has been saved
/Volumes/图图/CSAW-M/image-classification/Train/train_5/img.jpg has been saved
/Volumes/图图/CSAW-M/image-classification/Train/train_6/img.jpg has been saved
/Volumes/图图/CSAW-M/image-classification/Train/train_7/img.jpg has been saved
/Volumes/图图/CSAW-M/image-classification/Train/train_8/img.jpg has been saved
/Volumes/图图/CSAW-M/image-classification/Train/train_9/img.jpg has been saved
/Volumes/图图/CSAW-M/image-classification/Train/train_10/img.jpg has been saved
/Volumes/图图/CSAW-M/image-classification/Train/train_11/img.jpg has been saved
/Volumes/图图/CSAW-M/image-classification/Train/train_12/img.jpg has been saved
/Volumes/图图/CSAW-M/image-classification/Train/train_13/img.jpg has been s