In [9]:
import os
import pandas as pd
import shutil
import numpy as np
from PIL import Image
from rasterio import features
import cv2

In [10]:
# Define the paths to your image folders and csv files
benign_path = 'C:/Users/munte/Desktop/Licenta/Datasets/cbis-ddsm-preprocessed/benign/'
malignant_path = 'C:/Users/munte/Desktop/Licenta/Datasets/cbis-ddsm-preprocessed/malignant/'
benign_csv_path = 'C:/Users/munte/Desktop/Licenta/Datasets/cbis-ddsm-preprocessed/data_df_benign.csv'
malignant_csv_path = 'C:/Users/munte/Desktop/Licenta/Datasets/cbis-ddsm-preprocessed/data_df_malignant.csv'


classes = ['benign', 'malignant']

output_folder_path = "C:/Users/munte/Desktop/Licenta/Datasets/yolo-segmentation/1024/no_pectoral/initial/cbis-yolo-1024-no-pectoral"

In [11]:
# Define a set to keep track of which full mammograms have already been processed
processed_full_mammograms = set()

SIZE = 1024

# Find the points that describe the binary mask contour
def find_polygon_points(mask_path, class_index, new_size=(SIZE, SIZE)):
    # Load the binary mask (ROI) as a grayscale image
    mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)

    # Resize the mask
    resized_mask = cv2.resize(mask, new_size)

    # Find contours in the resized binary mask
    contours, _ = cv2.findContours(resized_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    # Find the largest contour by area
    max_contour = max(contours, key=cv2.contourArea)

    # Simplify the contour to get the polygon points
    epsilon = 0.01 * cv2.arcLength(max_contour, True)
    approx_polygon = cv2.approxPolyDP(max_contour, epsilon, True)

    # Normalize the points and convert them to a list
    normalized_points = []
    height, width = new_size
    for pt in approx_polygon:
        x, y = pt[0]
        normalized_points.append(x / width)
        normalized_points.append(y / height)

    # Combine the class index and polygon points in the desired format
    formatted_line = "{} {}".format(class_index, " ".join(map(str, normalized_points)))

    return formatted_line


def resize_image (image_path, output_path, new_width=SIZE, new_height=SIZE):
    img = Image.open(image_path)
    resized_img = img.resize((new_width, new_height), Image.ANTIALIAS)
    resized_img.save(output_path)


In [12]:
# Save the adnotation file instead of mask for each file 
for csv_path, image_folder, class_name in [(benign_csv_path, benign_path, 'benign'), (malignant_csv_path, malignant_path, 'malignant')]:
    csv_data = pd.read_csv(csv_path)
    for index, row in csv_data.iterrows():
        # Get the image path and mask paths
        patient_id = row['patient_id']
        lr = row['left or right breast']
        view = row['image view']
        mask_filename = f"{class_name} ({index})_mask.jpg"

        image_name = f"{class_name} ({index})_preprocessed.png"
        full_mammogram_name = f"{patient_id} {lr} {view}"
        if full_mammogram_name in processed_full_mammograms:
            continue
        processed_full_mammograms.add(full_mammogram_name)
        image_path = os.path.join(image_folder, image_name)
        mask_paths = []
        masks = csv_data[(csv_data['patient_id'] == patient_id) & (csv_data['left or right breast'] == lr) & (csv_data['image view'] == view)]
        for mask_index, mask_row in masks.iterrows():
            mask_path = os.path.join(image_folder, f"{class_name} ({mask_index})_mask.jpg")
            mask_paths.append(mask_path)
        
        image_filename = os.path.splitext(image_name)[0]
        output_filename = os.path.join(output_folder_path, f"{image_filename}.txt")
        with open(output_filename, 'a') as output_file:
            for mask_path in mask_paths:
                class_id = classes.index(class_name)
                annotation = find_polygon_points(mask_path, class_id)
                output_file.write(annotation + '\n')

        # Copy the image file to the output folder
        shutil.copy(image_path, os.path.join(output_folder_path, f"{image_filename}.jpg"))
        resize_image(image_path, os.path.join(output_folder_path, f"{image_filename}.jpg"))


  resized_img = img.resize((new_width, new_height), Image.ANTIALIAS)
  resized_img = img.resize((new_width, new_height), Image.ANTIALIAS)
