# Preprocess images and output a dataset that has a directory structure that represents a binary classification - fracture or no fracture.

In [28]:
from PIL import Image, ImageEnhance, ImageChops, ImageOps
import numpy as np
import pandas as pd
from os import path, makedirs, walk, listdir
import shutil
import cv2
import glob
from shutil import copyfile


Give File Locations


In [12]:
#replace with own directoires
training_directory = path.join('retinanet_data_format/train')
testing_directory = path.join('retinanet_data_format/test')
valid_directory = path.join('retinanet_data_format/valid')
output_directory =path.join('retinanet_data_format')

Split Images into entropy levels

In [13]:
def calculate_entropy(image_path):
    image = Image.open(image_path)
    grayscale_image = image.convert("L")
    image_array = np.array(grayscale_image)
    histogram, _ = np.histogram(image_array, bins=256, range=(0, 255), density=True)
    entropy = -np.sum(histogram * np.log2(histogram + 1e-10))  # adding epsilon to avoid log(0)
    return entropy

def split_images_into_entropy_groups(directory, entropy_bins, output_dir):
    # Create a new directory for the entropy groups
    entropy_groups_dir = path.join(output_dir, 'entropy_groups')
    makedirs(entropy_groups_dir, exist_ok=True)

    for i in range(len(entropy_bins) - 1):
        # Create directories for the entropy groups
        makedirs(path.join(entropy_groups_dir, f'entropy_group_{i}'), exist_ok=True)

    for filename in listdir(directory):
        file_path = path.join(directory, filename)
        try:
            entropy = calculate_entropy(file_path)
            # Assign the image to an entropy group
            for i in range(len(entropy_bins) - 1):
                if entropy_bins[i] <= entropy < entropy_bins[i + 1]:
                    # Copy the image to the corresponding directory
                    shutil.copy(file_path, path.join(entropy_groups_dir, f'entropy_group_{i}'))
                    break
        except Exception as e:
            print(f"Could not process {file_path}: {e}")

#entropy bins
entropy_bins = [0, 2, 3, 3.5, 4, float('inf')] 

# Run the function for each directory
split_images_into_entropy_groups(training_directory, entropy_bins, training_directory)
split_images_into_entropy_groups(testing_directory, entropy_bins, testing_directory)
split_images_into_entropy_groups(valid_directory, entropy_bins, valid_directory)



Could not process retinanet_data_format/train/entropy_groups: [Errno 21] Is a directory: 'retinanet_data_format/train/entropy_groups'
Could not process retinanet_data_format/train/_annotations.csv: cannot identify image file 'retinanet_data_format/train/_annotations.csv'
Could not process retinanet_data_format/test/entropy_groups: [Errno 21] Is a directory: 'retinanet_data_format/test/entropy_groups'
Could not process retinanet_data_format/test/_annotations.csv: cannot identify image file 'retinanet_data_format/test/_annotations.csv'
Could not process retinanet_data_format/valid/entropy_groups: [Errno 21] Is a directory: 'retinanet_data_format/valid/entropy_groups'
Could not process retinanet_data_format/valid/_annotations.csv: cannot identify image file 'retinanet_data_format/valid/_annotations.csv'


Sub-Split into Brightness Levels

In [14]:
def calculate_brightness(image):
    hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    return np.mean(hsv[:,:,2])

def categorize_images(path):
    categories = ['very_dark_brightness', 'dark_brightness', 'extra_low_brightness', 
                  'very_low_brightness', 'low_brightness', 'medium_brightness', 'high_brightness']

    for filename in listdir(path):
        if filename.endswith(".jpg") or filename.endswith(".png"):
            image = cv2.imread(path.join(path, filename))
            brightness = calculate_brightness(image)
            if brightness < 5:
                category = categories[0]
            elif brightness < 10:
                category = categories[1]
            elif brightness < 21:
                category = categories[2]
            elif brightness < 42:
                category = categories[3]
            elif brightness < 85:
                category = categories[4]
            elif brightness < 170:
                category = categories[5]
            else:
                category = categories[6]

            new_dir = path.join(path, category)
            if not path.exists(new_dir):
                makedirs(new_dir)

            shutil.copy(path.join(path, filename), path.join(new_dir, filename))


# Apply the categorization within the training, testing, and validation directories
for i in range(5):
    entropy_group_dir_train = path.join(training_directory, 'entropy_groups', f'entropy_group_{i}')
    entropy_group_dir_test = path.join(testing_directory, 'entropy_groups', f'entropy_group_{i}')
    entropy_group_dir_valid = path.join(valid_directory, 'entropy_groups', f'entropy_group_{i}')
    if path.exists(entropy_group_dir_train):
        categorize_images(entropy_group_dir_train)
    if path.exists(entropy_group_dir_test):
        categorize_images(entropy_group_dir_test)
    if path.exists(entropy_group_dir_valid):
        categorize_images(entropy_group_dir_valid)

Enhanse images according to brigtness-entropy

In [15]:
# Function to increase brightness and contrast
def adjust_brightness_contrast(input_image_path, output_image_path, brightness=1, contrast=1):
    # Open the image file
    img = Image.open(input_image_path)

    # Enhance brightness
    enhancer = ImageEnhance.Brightness(img)
    img = enhancer.enhance(brightness)

    # Enhance contrast
    enhancer = ImageEnhance.Contrast(img)
    img = enhancer.enhance(contrast)

    # Save the edited image
    img.save(output_image_path)
    img.close()

# Define the brightness and contrast values for each entropy group folder
entropy_group_values = { #checked and cleared
    'entropy_group_0': { 
        "dark_brightness": (1.25, 1.5), #checked and cleared
        "extra_low_brightness": (1, 1.25), #checked and cleared
        "high_brightness": (0.75, 1.5), #does not exist
        "low_brightness": (1, 1.25), #does not exist
        "medium_brightness": (1, 1), #does not exist
        "very_dark_brightness": (2.25, 2.5), #checked and cleared
        "very_low_brightness": (1, 0.75)  
    },

    'entropy_group_1': { 
        "dark_brightness": (1.75, 1.75), #checked and cleared
        "extra_low_brightness": (1.3, 0.75), #checked and cleared
        "high_brightness": (0.75, 1.5), #does not exist
        "low_brightness": (0.75, 1.5), #checked and cleared
        "medium_brightness": (1, 1), #does not exist
        "very_dark_brightness": (2.25, 2.5), #still to dark, cannot justify increase due to fidelity loss
        "very_low_brightness": (1, 0.75) #checked and cleared
    },

    'entropy_group_2': { #checked and cleared
        "dark_brightness": (2, 2), #checked and cleared
        "extra_low_brightness": (1.25, 1.25), #checked and cleared
        "high_brightness": (0.75, 1.5), #checked and cleared
        "low_brightness": (1, 1.25), #checked and cleared
        "medium_brightness": (1, 1), #checked and cleared
        "very_dark_brightness": (2.25, 2.5), #checked and cleared
        "very_low_brightness": (1, 1.25) #checked and cleared
    },

    'entropy_group_3': { #checked and cleared
        "dark_brightness": (2, 2), #checked and cleared
        "extra_low_brightness": (1.5, 1.25), #checked and cleared
        "high_brightness": (0.75, 1.75), #checked and cleared
        "low_brightness": (1, 1.25), #checked and cleared
        "medium_brightness": (1, 1), #checked and cleared
        "very_dark_brightness": (3, 2),  #still to dark, cannot justify increase due to fidelity loss
        "very_low_brightness": (1, 1.25) #checked and cleared
    },

    'entropy_group_4': { 
        "dark_brightness": (2, 2),  #checked and cleared
        "extra_low_brightness": (1.5, 1.25), #checked and cleared
        "high_brightness": (0.75, 2), #checked and cleared
        "low_brightness": (1, 1.25), #checked and cleared
        "medium_brightness": (1, 1.15), #checked and cleared
        "very_dark_brightness": (2.25, 2.5), #does not exist
        "very_low_brightness": (1, 1.25) #checked and cleared
    }
}

# Define a list of the master directories
master_directories = [training_directory, testing_directory, valid_directory]

# Loop through each master directory
for master_directory in master_directories:
    # Loop through each entropy group folder in the dictionary
    for entropy_group_folder, folder_values in entropy_group_values.items():
        # Define the directory path for the current entropy group folder
        dir_path = path.join(master_directory, 'entropy_groups', entropy_group_folder)

        # Loop through each brightness folder in the dictionary
        for folder, (brightness_value, contrast_value) in folder_values.items():
            # Define the input and output paths
            input_dir_path = path.join(dir_path, folder)
            output_dir_path = path.join(input_dir_path, 'edited_images')

            # Remove the directory if it exists
            if path.exists(output_dir_path):
                shutil.rmtree(output_dir_path)

            # Create a new directory for the edited images
            makedirs(output_dir_path, exist_ok=True)

            # Loop through each file in the directory
            for filename in listdir(input_dir_path):
                # Check if the file is an image
                if filename.endswith('.jpg') or filename.endswith('.png'):
                    # Define the input and output paths
                    input_image_path = path.join(input_dir_path, filename)
                    output_image_path = path.join(output_dir_path, filename)

                    # Adjust brightness and contrast of the image
                    adjust_brightness_contrast(input_image_path, output_image_path, brightness=brightness_value, contrast=contrast_value)

move files to master location

In [16]:
def move_augmented_images(master_directories, output_directory):
    # Define the new directories
    new_directories = ['new_train', 'new_test', 'new_valid']

    # Loop through each master directory
    for master_directory, new_directory in zip(master_directories, new_directories):
        # Define the new directory path
        new_dir_path = path.join(output_directory, new_directory)

        # Create the new directory if it doesn't exist
        if not path.exists(new_dir_path):
            makedirs(new_dir_path)

        # Loop through each entropy group folder in the dictionary
        for entropy_group_folder in entropy_group_values.keys():
            # Define the directory path for the current entropy group folder
            dir_path = path.join(master_directory, 'entropy_groups', entropy_group_folder)

            # Loop through each brightness folder in the dictionary
            for folder in entropy_group_values[entropy_group_folder].keys():
                # Define the input and output paths
                input_dir_path = path.join(dir_path, folder, 'edited_images')
                output_dir_path = path.join(new_dir_path, entropy_group_folder, folder)

                # Create the output directory if it doesn't exist
                if not path.exists(output_dir_path):
                    makedirs(output_dir_path)

                # Loop through each file in the directory
                for filename in listdir(input_dir_path):
                    # Check if the file is an image
                    if filename.endswith('.jpg') or filename.endswith('.png'):
                        # Define the input and output paths
                        input_image_path = path.join(input_dir_path, filename)
                        output_image_path = path.join(output_dir_path, filename)

                        # Move the image to the new directory
                        shutil.copy(input_image_path, output_image_path)

        # Copy the _annotations.csv file to the new directory
        annotations_file_path = path.join(master_directory, '_annotations.csv')
        if path.exists(annotations_file_path):
            shutil.copy(annotations_file_path, new_dir_path)

def move_images_to_top(master_directories, output_directory):
    # Define the new directories
    new_directories = ['new_train', 'new_test', 'new_valid']

    # Loop through each master directory
    for master_directory, new_directory in zip(master_directories, new_directories):
        # Define the new directory path
        new_dir_path = path.join(output_directory, new_directory)

        # Loop through each entropy group folder in the dictionary
        for entropy_group_folder in entropy_group_values.keys():
            # Define the directory path for the current entropy group folder
            dir_path = path.join(new_dir_path, entropy_group_folder)

            # Find all images in the entropy group folder and its subfolders
            images = glob.glob(path.join(dir_path, '**', '*.jpg'), recursive=True)
            images += glob.glob(path.join(dir_path, '**', '*.png'), recursive=True)

            # Move each image to the top level of the new directory
            for image_path in images:
                shutil.move(image_path, new_dir_path)

            # Delete the entropy group folder
            shutil.rmtree(dir_path)

move_augmented_images(master_directories, output_directory)
move_images_to_top(master_directories, output_directory)
training_directory = path.join(output_directory, 'new_train')
testing_directory = path.join(output_directory, 'new_test')
valid_directory = path.join(output_directory, 'new_valid')

resize images and update CSV and fill in blanks

In [19]:
def resize_images_and_boxes(directory, size=(256, 256)):
    # Define the column names
    column_names = ['filename', 'x1', 'y1', 'x2', 'y2', 'label']

    # Load the annotations file
    df = pd.read_csv(path.join(directory, '_annotations.csv'), names=column_names)

    # Replace 'humerus' with 'humerus fracture'
    df['label'] = df['label'].replace('humerus', 'humerus fracture')

    # Loop through each unique filename in the DataFrame
    for filename in df['filename'].unique():
        # Load the image
        image = Image.open(path.join(directory, filename))

        # Get the original image size
        original_size = image.size

        # Resize the image
        image = image.resize(size)
        image.save(path.join(directory, filename))

        # Adjust the bounding boxes
        df.loc[df['filename'] == filename, ['x1', 'x2']] = (df.loc[df['filename'] == filename, ['x1', 'x2']] * size[0] / original_size[0]).round()
        df.loc[df['filename'] == filename, ['y1', 'y2']] = (df.loc[df['filename'] == filename, ['y1', 'y2']] * size[1] / original_size[1]).round()

    # Check for images in the directory that are not in the DataFrame
    for image_file in glob.glob(path.join(directory, '*.jpg')):
        filename = path.basename(image_file)
        if filename not in df['filename'].values:
            # Add the missing image to the DataFrame with the label 'None'
            df = df._append({'filename': filename, 'label': 'None'}, ignore_index=True)

    # Save the adjusted annotations
    df.to_csv(path.join(directory, '_annotations.csv'), index=False)

resize_images_and_boxes(training_directory)
resize_images_and_boxes(testing_directory)
resize_images_and_boxes(valid_directory)

create a group of inverted images

In [30]:
def copy_and_invert_images(input_directory, output_directory):
    # Create the output directory if it doesn't exist
    if not path.exists(output_directory):
        makedirs(output_directory)

    # Copy the _annotations.csv file to the new directory
    #shutil.copy(os.path.join(input_directory, '_annotations.csv'), output_directory)

    # Loop through each file in the directory
    for filename in listdir(input_directory):
        # Check if the file is an image
        if filename.endswith('.jpg') or filename.endswith('.png'):
            # Define the input and output paths
            input_image_path = path.join(input_directory, filename)
            output_image_path = path.join(output_directory, filename)

            # Load the image
            image = Image.open(input_image_path)

            # Invert the image
            inverted_image = ImageOps.invert(image)

            # Save the inverted image
            inverted_image.save(output_image_path)

# Use the function to copy and invert the images
copy_and_invert_images(r'retinanet_data_format/new_train', r'retinanet_data_format/new_train_inverted')
copy_and_invert_images(r'retinanet_data_format/new_test', r'retinanet_data_format/new_test_inverted')
copy_and_invert_images(r'retinanet_data_format/new_valid', r'retinanet_data_format/new_valid_inverted')

NameError: name 'listdir' is not defined

create an overlayed image directory

In [25]:
def overlay_images(input_directory1, input_directory2, output_directory):
    # Create the output directory if it doesn't exist
    if not path.exists(output_directory):
        makedirs(output_directory)

    # Copy the _annotations.csv file from the first input directory to the new directory
    shutil.copy(path.join(input_directory1, '_annotations.csv'), output_directory)

    # Loop through each file in the first input directory
    for filename in listdir(input_directory1):
        # Check if the file is an image
        if filename.endswith('.jpg') or filename.endswith('.png'):
            # Define the input paths
            input_image_path1 = path.join(input_directory1, filename)
            input_image_path2 = path.join(input_directory2, filename)

            # Load the images
            image1 = Image.open(input_image_path1).convert('RGB')
            image2 = Image.open(input_image_path2).convert('RGB')

            # Overlay the images
            overlayed_image = ImageChops.darker(image1, image2)

            # Save the overlayed image
            overlayed_image.save(path.join(output_directory, filename))

# Use the function to overlay the images

overlay_images(r'retinanet_data_format/new_train', r'retinanet_data_format/new_train_inverted', r'retinanet_data_format/new_train_overlayed')
overlay_images(r'retinanet_data_format/new_test', r'retinanet_data_format/new_test_inverted', r'retinanet_data_format/new_test_overlayed')
overlay_images(r'retinanet_data_format/new_valid', r'retinanet_data_format/new_valid_inverted', r'retinanet_data_format/new_valid_overlayed')


Remove absolute black (WIP)

## move the files to a binary categorised data structure

In [52]:
def categorise_images(a_dataset_name):
    #creates a directory structure  like: images/
    #                                       class0/*.jpg
    #                                       class1/*.jpg
    #and copies the images into either leaf directory depending on their class label
    #this puts the data into a format that keras can load into a dataset
    
    CLASS_DIRECTORY_NAMES = ('class_fracture','class_no_fracture')
    DATASET_DIRECTORY = 'retinanet_data_format'
    TOP_DIRECTORY_CLASSIFIFICATIONS = path.join(DATASET_DIRECTORY ,'categorised_images')

    fracture_directory = path.join(TOP_DIRECTORY_CLASSIFIFICATIONS, a_dataset_name,CLASS_DIRECTORY_NAMES[0])
    print('fracture_directory', fracture_directory)
    no_fracture_directory = path.join(TOP_DIRECTORY_CLASSIFIFICATIONS,a_dataset_name,CLASS_DIRECTORY_NAMES[-1])
    print('no_fracture_directory', no_fracture_directory)

    if path.exists(fracture_directory) == False:  
        makedirs(fracture_directory) 
    if path.exists(no_fracture_directory) == False:  
        makedirs(no_fracture_directory) 

    #read labels into pandas df
    csv_filepath = path.join('dataset', a_dataset_name, 'labels/labels.csv')
    print('csv_filepath', csv_filepath)
    df = pd.read_csv(path.join(csv_filepath))
    df = df.set_index('filename')
    print('df', df.head())

    #walk through files and copy file to relevant directory
    images_dir = path.join(DATASET_DIRECTORY, 'new_' + a_dataset_name + '_overlayed')
    print('images_dir', images_dir)
    filenames = []
    for (dirpath, dirnames, filenames) in walk(images_dir):
        filenames.extend(filenames)
    print('filenames', filenames[:10])
    print('len(filenames)', len(filenames))
    for filename in filenames:
        #ignore mac os system file
        if filename.endswith('.jpg') == False:
            pass
        elif df.loc[filename]['target'] == 1:
            destination = path.join(fracture_directory, filename)
        else:
            destination = path.join(no_fracture_directory, filename)
        source = path.join(images_dir, filename)
        copyfile(source, destination)

In [53]:
DATASET_NAMES = ('train','valid','test')

for dataset_name in DATASET_NAMES:
    categorise_images(dataset_name)

fracture_directory retinanet_data_format/categorised_images/train/class_fracture
no_fracture_directory retinanet_data_format/categorised_images/train/class_no_fracture
csv_filepath dataset/train/labels/labels.csv
df                                                     target
filename                                                  
image1_753_png.rf.611c51510c1794dbf76a673076783...       1
image1_888_png.rf.614a3cdebe5fb086b60ccef277a97...       1
image1_208_png.rf.61714b3d44ca4a1981225a2910543...       1
image1_766_png.rf.6185692944249a1226ae9ed74c329...       0
image1_68_png.rf.60970c5e5b05495c1d0d4950a95d57...       0
images_dir retinanet_data_format/new_train_overlayed
filenames ['image1_273_png.rf.f022d65b5a20cc363d04891eabb54357.jpg', 'image1_3519_png.rf.d216fecf8d40b460c9937c466a46ad55.jpg', 'image2_535_png.rf.cf4d40b370c59fa5a126e2120d33ea6c.jpg', 'image1_459_png.rf.002fc7bef16dddfc7dd77a8adb9a6a3d.jpg', 'image1_3466_png.rf.90386a2ceaa6ba28df75e7aa1abbd854.jpg', 'image1_201_png

In [54]:
import datetime
print("Last modified: ", datetime.datetime.now().strftime("%d/%m/%Y %H:%M:%S") + "\n")

Last modified:  24/03/2024 07:36:13
