In [9]:
from PIL import Image, ImageDraw, ImageEnhance, ImageFont, ImageChops, ImageOps
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
import shutil
import cv2
import glob
import random
import ipykernel
import pathlib

Give File Locations


In [None]:
#replace with own directoires
training_directory = r'C:\Users\Chris\Documents\Uni\Programm for AI\Final Test\train'
testing_directory = r'C:\Users\Chris\Documents\Uni\Programm for AI\Final Test\test'
valid_directory = r'C:\Users\Chris\Documents\Uni\Programm for AI\Final Test\valid'
output_directory =r'C:\Users\Chris\Documents\Uni\Programm for AI\Final Test'

Split Images into entropy levels

In [6]:
def calculate_entropy(image_path):
    image = Image.open(image_path)
    grayscale_image = image.convert("L")
    image_array = np.array(grayscale_image)
    histogram, _ = np.histogram(image_array, bins=256, range=(0, 255), density=True)
    entropy = -np.sum(histogram * np.log2(histogram + 1e-10))  # adding epsilon to avoid log(0)
    return entropy

def split_images_into_entropy_groups(directory, entropy_bins, output_dir):
    # Create a new directory for the entropy groups
    entropy_groups_dir = os.path.join(output_dir, 'entropy_groups')
    os.makedirs(entropy_groups_dir, exist_ok=True)

    for i in range(len(entropy_bins) - 1):
        # Create directories for the entropy groups
        os.makedirs(os.path.join(entropy_groups_dir, f'entropy_group_{i}'), exist_ok=True)

    for filename in os.listdir(directory):
        file_path = os.path.join(directory, filename)
        try:
            entropy = calculate_entropy(file_path)
            # Assign the image to an entropy group
            for i in range(len(entropy_bins) - 1):
                if entropy_bins[i] <= entropy < entropy_bins[i + 1]:
                    # Copy the image to the corresponding directory
                    shutil.copy(file_path, os.path.join(entropy_groups_dir, f'entropy_group_{i}'))
                    break
        except Exception as e:
            print(f"Could not process {file_path}: {e}")

#entropy bins
entropy_bins = [0, 2, 3, 3.5, 4, float('inf')] 

# Run the function for each directory
split_images_into_entropy_groups(training_directory, entropy_bins, training_directory)
split_images_into_entropy_groups(testing_directory, entropy_bins, testing_directory)
split_images_into_entropy_groups(valid_directory, entropy_bins, valid_directory)



Sub-Split into Brightness Levels

In [17]:
def calculate_brightness(image):
    hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    return np.mean(hsv[:,:,2])

def categorize_images(path):
    categories = ['very_dark_brightness', 'dark_brightness', 'extra_low_brightness', 
                  'very_low_brightness', 'low_brightness', 'medium_brightness', 'high_brightness']

    for filename in os.listdir(path):
        if filename.endswith(".jpg") or filename.endswith(".png"):
            image = cv2.imread(os.path.join(path, filename))
            brightness = calculate_brightness(image)
            if brightness < 5:
                category = categories[0]
            elif brightness < 10:
                category = categories[1]
            elif brightness < 21:
                category = categories[2]
            elif brightness < 42:
                category = categories[3]
            elif brightness < 85:
                category = categories[4]
            elif brightness < 170:
                category = categories[5]
            else:
                category = categories[6]

            new_dir = os.path.join(path, category)
            if not os.path.exists(new_dir):
                os.makedirs(new_dir)

            shutil.copy(os.path.join(path, filename), os.path.join(new_dir, filename))


# Apply the categorization within the training, testing, and validation directories
for i in range(5):
    entropy_group_dir_train = os.path.join(training_directory, 'entropy_groups', f'entropy_group_{i}')
    entropy_group_dir_test = os.path.join(testing_directory, 'entropy_groups', f'entropy_group_{i}')
    entropy_group_dir_valid = os.path.join(valid_directory, 'entropy_groups', f'entropy_group_{i}')
    if os.path.exists(entropy_group_dir_train):
        categorize_images(entropy_group_dir_train)
    if os.path.exists(entropy_group_dir_test):
        categorize_images(entropy_group_dir_test)
    if os.path.exists(entropy_group_dir_valid):
        categorize_images(entropy_group_dir_valid)

Enhanse images according to brigtness-entropy

In [18]:
# Function to increase brightness and contrast
def adjust_brightness_contrast(input_image_path, output_image_path, brightness=1, contrast=1):
    # Open the image file
    img = Image.open(input_image_path)

    # Enhance brightness
    enhancer = ImageEnhance.Brightness(img)
    img = enhancer.enhance(brightness)

    # Enhance contrast
    enhancer = ImageEnhance.Contrast(img)
    img = enhancer.enhance(contrast)

    # Save the edited image
    img.save(output_image_path)
    img.close()

# Define the brightness and contrast values for each entropy group folder
entropy_group_values = { #checked and cleared
    'entropy_group_0': { 
        "dark_brightness": (1.25, 1.5), #checked and cleared
        "extra_low_brightness": (1, 1.25), #checked and cleared
        "high_brightness": (0.75, 1.5), #does not exist
        "low_brightness": (1, 1.25), #does not exist
        "medium_brightness": (1, 1), #does not exist
        "very_dark_brightness": (2.25, 2.5), #checked and cleared
        "very_low_brightness": (1, 0.75)  
    },

    'entropy_group_1': { 
        "dark_brightness": (1.75, 1.75), #checked and cleared
        "extra_low_brightness": (1.3, 0.75), #checked and cleared
        "high_brightness": (0.75, 1.5), #does not exist
        "low_brightness": (0.75, 1.5), #checked and cleared
        "medium_brightness": (1, 1), #does not exist
        "very_dark_brightness": (2.25, 2.5), #still to dark, cannot justify increase due to fidelity loss
        "very_low_brightness": (1, 0.75) #checked and cleared
    },

    'entropy_group_2': { #checked and cleared
        "dark_brightness": (2, 2), #checked and cleared
        "extra_low_brightness": (1.25, 1.25), #checked and cleared
        "high_brightness": (0.75, 1.5), #checked and cleared
        "low_brightness": (1, 1.25), #checked and cleared
        "medium_brightness": (1, 1), #checked and cleared
        "very_dark_brightness": (2.25, 2.5), #checked and cleared
        "very_low_brightness": (1, 1.25) #checked and cleared
    },

    'entropy_group_3': { #checked and cleared
        "dark_brightness": (2, 2), #checked and cleared
        "extra_low_brightness": (1.5, 1.25), #checked and cleared
        "high_brightness": (0.75, 1.75), #checked and cleared
        "low_brightness": (1, 1.25), #checked and cleared
        "medium_brightness": (1, 1), #checked and cleared
        "very_dark_brightness": (3, 2),  #still to dark, cannot justify increase due to fidelity loss
        "very_low_brightness": (1, 1.25) #checked and cleared
    },

    'entropy_group_4': { 
        "dark_brightness": (2, 2),  #checked and cleared
        "extra_low_brightness": (1.5, 1.25), #checked and cleared
        "high_brightness": (0.75, 2), #checked and cleared
        "low_brightness": (1, 1.25), #checked and cleared
        "medium_brightness": (1, 1.15), #checked and cleared
        "very_dark_brightness": (2.25, 2.5), #does not exist
        "very_low_brightness": (1, 1.25) #checked and cleared
    }
}

# Define a list of the master directories
master_directories = [training_directory, testing_directory, valid_directory]

# Loop through each master directory
for master_directory in master_directories:
    # Loop through each entropy group folder in the dictionary
    for entropy_group_folder, folder_values in entropy_group_values.items():
        # Define the directory path for the current entropy group folder
        dir_path = os.path.join(master_directory, 'entropy_groups', entropy_group_folder)

        # Loop through each brightness folder in the dictionary
        for folder, (brightness_value, contrast_value) in folder_values.items():
            # Define the input and output paths
            input_dir_path = os.path.join(dir_path, folder)
            output_dir_path = os.path.join(input_dir_path, 'edited_images')

            # Remove the directory if it exists
            if os.path.exists(output_dir_path):
                shutil.rmtree(output_dir_path)

            # Create a new directory for the edited images
            os.makedirs(output_dir_path, exist_ok=True)

            # Loop through each file in the directory
            for filename in os.listdir(input_dir_path):
                # Check if the file is an image
                if filename.endswith('.jpg') or filename.endswith('.png'):
                    # Define the input and output paths
                    input_image_path = os.path.join(input_dir_path, filename)
                    output_image_path = os.path.join(output_dir_path, filename)

                    # Adjust brightness and contrast of the image
                    adjust_brightness_contrast(input_image_path, output_image_path, brightness=brightness_value, contrast=contrast_value)

C:\Users\Chris\Documents\Uni\Programm for AI\completed test


move files to master location

In [23]:
def move_augmented_images(master_directories, output_directory):
    # Define the new directories
    new_directories = ['new_training', 'new_testing', 'new_valid']

    # Loop through each master directory
    for master_directory, new_directory in zip(master_directories, new_directories):
        # Define the new directory path
        new_dir_path = os.path.join(output_directory, new_directory)

        # Create the new directory if it doesn't exist
        if not os.path.exists(new_dir_path):
            os.makedirs(new_dir_path)

        # Loop through each entropy group folder in the dictionary
        for entropy_group_folder in entropy_group_values.keys():
            # Define the directory path for the current entropy group folder
            dir_path = os.path.join(master_directory, 'entropy_groups', entropy_group_folder)

            # Loop through each brightness folder in the dictionary
            for folder in entropy_group_values[entropy_group_folder].keys():
                # Define the input and output paths
                input_dir_path = os.path.join(dir_path, folder, 'edited_images')
                output_dir_path = os.path.join(new_dir_path, entropy_group_folder, folder)

                # Create the output directory if it doesn't exist
                if not os.path.exists(output_dir_path):
                    os.makedirs(output_dir_path)

                # Loop through each file in the directory
                for filename in os.listdir(input_dir_path):
                    # Check if the file is an image
                    if filename.endswith('.jpg') or filename.endswith('.png'):
                        # Define the input and output paths
                        input_image_path = os.path.join(input_dir_path, filename)
                        output_image_path = os.path.join(output_dir_path, filename)

                        # Move the image to the new directory
                        shutil.copy(input_image_path, output_image_path)

        # Copy the _annotations.csv file to the new directory
        annotations_file_path = os.path.join(master_directory, '_annotations.csv')
        if os.path.exists(annotations_file_path):
            shutil.copy(annotations_file_path, new_dir_path)

def move_images_to_top(master_directories, output_directory):
    # Define the new directories
    new_directories = ['new_training', 'new_testing', 'new_valid']

    # Loop through each master directory
    for master_directory, new_directory in zip(master_directories, new_directories):
        # Define the new directory path
        new_dir_path = os.path.join(output_directory, new_directory)

        # Loop through each entropy group folder in the dictionary
        for entropy_group_folder in entropy_group_values.keys():
            # Define the directory path for the current entropy group folder
            dir_path = os.path.join(new_dir_path, entropy_group_folder)

            # Find all images in the entropy group folder and its subfolders
            images = glob.glob(os.path.join(dir_path, '**', '*.jpg'), recursive=True)
            images += glob.glob(os.path.join(dir_path, '**', '*.png'), recursive=True)

            # Move each image to the top level of the new directory
            for image_path in images:
                shutil.move(image_path, new_dir_path)

            # Delete the entropy group folder
            shutil.rmtree(dir_path)

move_augmented_images(master_directories, output_directory)
move_images_to_top(master_directories, output_directory)
training_directory = os.path.join(output_directory, 'new_training')
testing_directory = os.path.join(output_directory, 'new_testing')
valid_directory = os.path.join(output_directory, 'new_valid')

resize images and update CSV and fill in blanks

In [25]:
def resize_images_and_boxes(directory, size=(256, 256)):
    # Define the column names
    column_names = ['filename', 'x1', 'y1', 'x2', 'y2', 'label']

    # Load the annotations file
    df = pd.read_csv(os.path.join(directory, '_annotations.csv'), names=column_names)

    # Replace 'humerus' with 'humerus fracture'
    df['label'] = df['label'].replace('humerus', 'humerus fracture')

    # Loop through each unique filename in the DataFrame
    for filename in df['filename'].unique():
        # Load the image
        image = Image.open(os.path.join(directory, filename))

        # Get the original image size
        original_size = image.size

        # Resize the image
        image = image.resize(size)
        image.save(os.path.join(directory, filename))

        # Adjust the bounding boxes
        df.loc[df['filename'] == filename, ['x1', 'x2']] = (df.loc[df['filename'] == filename, ['x1', 'x2']] * size[0] / original_size[0]).round()
        df.loc[df['filename'] == filename, ['y1', 'y2']] = (df.loc[df['filename'] == filename, ['y1', 'y2']] * size[1] / original_size[1]).round()

    # Check for images in the directory that are not in the DataFrame
    for image_file in glob.glob(os.path.join(directory, '*.jpg')):
        filename = os.path.basename(image_file)
        if filename not in df['filename'].values:
            # Add the missing image to the DataFrame with the label 'None'
            df = df.append({'filename': filename, 'label': 'None'}, ignore_index=True)

    # Save the adjusted annotations
    df.to_csv(os.path.join(directory, '_annotations.csv'), index=False)

resize_images_and_boxes(training_directory)
resize_images_and_boxes(testing_directory)
resize_images_and_boxes(valid_directory)

create a group of inverted images

In [None]:
def copy_and_invert_images(input_directory, output_directory):
    # Create the output directory if it doesn't exist
    if not os.path.exists(output_directory):
        os.makedirs(output_directory)

    # Copy the _annotations.csv file to the new directory
    shutil.copy(os.path.join(input_directory, '_annotations.csv'), output_directory)

    # Loop through each file in the directory
    for filename in os.listdir(input_directory):
        # Check if the file is an image
        if filename.endswith('.jpg') or filename.endswith('.png'):
            # Define the input and output paths
            input_image_path = os.path.join(input_directory, filename)
            output_image_path = os.path.join(output_directory, filename)

            # Load the image
            image = Image.open(input_image_path)

            # Invert the image
            inverted_image = ImageOps.invert(image)

            # Save the inverted image
            inverted_image.save(output_image_path)

# Use the function to copy and invert the images
copy_and_invert_images('new_training', 'new_training_inverted')
copy_and_invert_images('new_testing', 'new_testing_inverted')
copy_and_invert_images('new_valid', 'new_valid_inverted')

create an overlayed image directory

In [None]:
def overlay_images(input_directory1, input_directory2, output_directory):
    # Create the output directory if it doesn't exist
    if not os.path.exists(output_directory):
        os.makedirs(output_directory)

    # Copy the _annotations.csv file from the first input directory to the new directory
    shutil.copy(os.path.join(input_directory1, '_annotations.csv'), output_directory)

    # Loop through each file in the first input directory
    for filename in os.listdir(input_directory1):
        # Check if the file is an image
        if filename.endswith('.jpg') or filename.endswith('.png'):
            # Define the input paths
            input_image_path1 = os.path.join(input_directory1, filename)
            input_image_path2 = os.path.join(input_directory2, filename)

            # Load the images
            image1 = Image.open(input_image_path1).convert('RGB')
            image2 = Image.open(input_image_path2).convert('RGB')

            # Overlay the images
            overlayed_image = ImageChops.darker(image1, image2)

            # Save the overlayed image
            overlayed_image.save(os.path.join(output_directory, filename))

# Use the function to overlay the images
overlay_images('new_training', 'new_training_inverted', 'new_training_overlayed')
overlay_images('new_testing', 'new_testing_inverted', 'new_testing_overlayed')
overlay_images('new_valid', 'new_valid_inverted', 'new_valid_overlayed')


Remove absolute black (WIP)

In [None]:
# # Define source and target directories 
# src_dir = 'all_resized_overlay'
# tgt_dir = 'all_resized_overlay_background_removed'

# # Create target directory if it doesn't exist
# if not os.path.exists(tgt_dir):
#     os.makedirs(tgt_dir)

# # Loop over all files in the source directory
# for filename in os.listdir(src_dir):
#     if filename.endswith('.jpg'): 
#         # Open each image file
#         img = Image.open(os.path.join(src_dir, filename)).convert("RGBA")
        
#         # Create a new sequence object by replacing all black pixels with transparent ones
#         datas = []
#         for item in img.getdata():
#             # change all black (also shades of blacks)
#             # pixels to transparent
#             if item[0] in list(range(0, 50)):
#                 datas.append((255, 255, 255, 0))
#             else:
#                 datas.append(item)
                
#         # Create new image and save
#         img.putdata(datas)
#         img_rgb = img.convert("RGB")  # convert back to RGB before saving as JPEG
#         img_rgb.save(os.path.join(tgt_dir, filename))  # Save as JPEG
#         img.save(os.path.join(tgt_dir, os.path.splitext(filename)[0] + '.png'))  # Save as PNG