Data description

In [17]:
from JoinDatasets import *
main_dir = "./data"
output_dir = "./merged-data"
join_data_sets(main_dir, output_dir)

Dataset content

In [None]:
import os
import matplotlib.pyplot as plt

# Function to count the number of files in a directory
def count_files(directory):
    """
    Count files in the given directory.

    :param directory: directory to analyse
    :return: number of files in the given directory
    """
    count = 0
    for filename in os.listdir(directory):
        if os.path.isfile(os.path.join(directory, filename)):
            count += 1
    return count

plots_saving_dir = "saved-plots/"

# Count images in each folder
healthy_count = count_files(output_dir + "/Healthy")
powdery_count = count_files(output_dir + "/Powdery")
rust_count = count_files(output_dir + "/Rust")

# Create a bar chart
labels = ['Healthy', 'Powdery', 'Rust']
counts = [healthy_count, powdery_count, rust_count]

plt.bar(labels, counts)
plt.xlabel('Categories')
plt.ylabel('Count')
plt.title('Number of Images in Each Category')
plt.savefig(plots_saving_dir+'Number_of_Images_in_Each_Category.png', bbox_inches='tight')
plt.show()

# Calculate total count
total_count = healthy_count + powdery_count + rust_count

# Calculate percentages
healthy_percentage = (healthy_count / total_count) * 100
powdery_percentage = (powdery_count / total_count) * 100
rust_percentage = (rust_count / total_count) * 100

# Create a pie chart
labels = ['Healthy', 'Powdery', 'Rust']
sizes = [healthy_percentage, powdery_percentage, rust_percentage]
colors = ['lightgreen', 'lightblue', 'lightcoral']

plt.pie(sizes, labels=labels, colors=colors, autopct='%1.1f%%', startangle=140)
plt.axis('equal')
plt.title('Percentage of Images of Each Category in the Dataset')
plt.savefig(plots_saving_dir+'Percentage_of_Images_of_Each_Category_in_the_Dataset.png', bbox_inches='tight')

plt.show()


Data relations

Errors and noise

Data difficulty

Data representation

Data normalisation

Data augmentation

In [18]:
import numpy as np
from skimage import exposure
from skimage import io
import os
from scipy import ndimage

def better_contrast(original_image, saving_dir, new_name):
    v_min, v_max = np.percentile(original_image, (0.2, 99.8))
    better_contrast = exposure.rescale_intensity(original_image, in_range=(v_min, v_max))

    new_filename = saving_dir + new_name + '.jpg'
    io.imsave(fname=new_filename, arr=better_contrast)

def gamma_correction(original_image, saving_dir, new_name):
    adjusted_gamma_image = exposure.adjust_gamma(original_image, gamma=0.4, gain=0.9)
    new_filename = saving_dir + new_name + '.jpg'
    io.imsave(fname=new_filename, arr=adjusted_gamma_image)

def log_correction(original_image, saving_dir, new_name):
    log_correction_image = exposure.adjust_log(original_image)
    new_filename = saving_dir + new_name + '.jpg'
    io.imsave(fname=new_filename, arr=log_correction_image)

def sigmoid_correction(original_image, saving_dir, new_name):
    sigmoid_correction_image = exposure.adjust_sigmoid(original_image)
    new_filename = saving_dir + new_name + '.jpg'
    io.imsave(fname=new_filename, arr=sigmoid_correction_image)

def horizontal_flip(original_image, saving_dir, new_name):
    horizontal_flip = original_image[:, ::-1]
    new_filename = saving_dir + new_name + '.jpg'
    io.imsave(fname=new_filename, arr=horizontal_flip)

def vertical_flip(original_image, saving_dir, new_name):
    vertical_flip = original_image[::-1, :]
    new_filename = saving_dir + new_name + '.jpg'
    io.imsave(fname=new_filename, arr=vertical_flip)

def blured(original_image, saving_dir, new_name):
    blured_image = ndimage.uniform_filter(original_image, size=(11, 11, 1))
    new_filename = saving_dir + new_name + '.jpg'
    io.imsave(fname=new_filename, arr=blured_image)

def save_original(original_image, saving_dir, new_name):
    new_filename = saving_dir + new_name + '.jpg'
    io.imsave(fname=new_filename, arr=original_image)


input_folder = 'merged-data'
output_dir = 'augmented-data/'

for subfolder in os.listdir(input_folder):
    subfolder_path = os.path.join(input_folder, subfolder)
    if os.path.isdir(subfolder_path):
        print(f"Processing images in '{subfolder}' folder...")

        # Iterate through each image file in the subfolder
        for filename in os.listdir(subfolder_path):
            if filename.endswith('.jpg') or filename.endswith('.jpeg') or filename.endswith('.png'):

                image_path = os.path.join(subfolder_path, filename)

                original_image = io.imread(image_path)
                saving_dir = output_dir + subfolder + '/' + filename.split('.')[0]

                save_original(original_image, saving_dir, '_original')
                better_contrast(original_image, saving_dir, '_contrast')
                gamma_correction(original_image, saving_dir, '_gamma')
                log_correction(original_image, saving_dir, '_log')
                sigmoid_correction(original_image, saving_dir, '_sigmoid')
                horizontal_flip(original_image, saving_dir, '_horizontal')
                vertical_flip(original_image, saving_dir, '_vertical')
                blured(original_image, saving_dir, '_blured')


Data splits