In [2]:
from PIL import Image
import os

input_base_folder = 'Dataset Proper/Test'
output_base_folder = 'New Dataset_proper_resized/test'

# List of class folders
class_folders = os.listdir(input_base_folder)

# Iterate through class folders
for class_folder in class_folders:
    input_folder = os.path.join(input_base_folder, class_folder)
    output_folder = os.path.join(output_base_folder, class_folder)

    # Create the output folder if it doesn't exist
    os.makedirs(output_folder, exist_ok=True)

    # Resize images and save them to the output folder
    for filename in os.listdir(input_folder):
        image_path = os.path.join(input_folder, filename)

        # Load the image using PIL
        image = Image.open(image_path)

        # Resize the image to your desired size (e.g., 224x224)
        target_size = (224, 224)
        image = image.resize(target_size)

        # Save the resized image to the output folder
        save_path = os.path.join(output_folder, filename)
        image.save(save_path)

print("Resized images done")


Resized images done


In [4]:
import tensorflow as tf
import cv2
import os

# Path to the directory containing the Herlev dataset
dataset_dir = 'New Dataset_proper_resized/test'

# Detect classes automatically
classes = [class_name for class_name in os.listdir(dataset_dir) if os.path.isdir(os.path.join(dataset_dir, class_name))]

# Define the NLM filter function
def nlm_filter(image):
    filtered_image = cv2.fastNlMeansDenoisingColored(image, None, 10, 10, 7, 21)  # Adjust parameters as needed
    return filtered_image

# Iterate through each class
for class_name in classes:
    class_dir = os.path.join(dataset_dir, class_name)
    output_dir = os.path.join('New NLM Filter/test', class_name, f'{class_name}_filtered_nlm')
    os.makedirs(output_dir, exist_ok=True)

    # List all image files in the class directory
    image_files = [os.path.join(class_dir, filename) for filename in os.listdir(class_dir) if filename.lower().endswith('.bmp')]

    # Apply NLM filtering to each image, convert to PNG, and save the filtered images
    for image_file in image_files:
        # Load the image using OpenCV
        image = cv2.imread(image_file)

        # Convert the image to RGB (OpenCV reads images in BGR format)
        image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        # Apply NLM filtering
        filtered_image = nlm_filter(image_rgb)

        # Get the file name without extension
        filename = os.path.splitext(os.path.basename(image_file))[0]

        # Save the filtered image as PNG format
        output_file = os.path.join(output_dir, f'{filename}_filtered.png')
        cv2.imwrite(output_file, filtered_image)

print("Noise removal and PNG conversion completed.")


Noise removal and PNG conversion completed.


In [8]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator, load_img, img_to_array
from PIL import Image
import os

# Define the base input and output folders
base_input_folder = 'New NLM Filter/test'
base_output_folder = 'New NLM Augmentation/test'

# Automatically detect class folders
class_folders = [folder for folder in os.listdir(base_input_folder) if os.path.isdir(os.path.join(base_input_folder, folder))]

# Create an ImageDataGenerator and specify the augmentation parameters
datagen = ImageDataGenerator(
    rotation_range=45,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    vertical_flip=True,
    fill_mode='nearest',
    rescale=1./255  # Normalize pixel values
)

# Loop through each class folder
for class_folder in class_folders:
    input_folder = os.path.join(base_input_folder, class_folder)
    output_folder = os.path.join(base_output_folder, class_folder)

    # Create the output folder if it doesn't exist
    os.makedirs(output_folder, exist_ok=True)

    # Generate augmented images and save them to the output folder
    for filename in os.listdir(input_folder):
        image_path = os.path.join(input_folder, filename)

        # Load the image using Keras' load_img function
        image = load_img(image_path)

        # Convert the image to a NumPy array
        image_array = img_to_array(image)

        # Reshape the image array to match the expected input shape of the generator
        image_array = image_array.reshape((1,) + image_array.shape)

        # Generate augmented images using the datagen.flow() method
        augmented_images = datagen.flow(
            image_array,
            batch_size=16,
            save_to_dir=output_folder,
            save_prefix='augmented',
            save_format='png'
        )

        # Generate and save the augmented images
        num_augmented_images = 6
        for i, augmented_image in enumerate(augmented_images):
            if i >= num_augmented_images:
                break

            augmented_image_pil = Image.fromarray((augmented_image[0] * 255).astype('uint8'))  # Ensure correct range

            save_filename = f'{filename.split(".")[0]}_{i}.png'
            save_path = os.path.join(output_folder, save_filename)
            augmented_image_pil.save(save_path)

print("Augmented images done")


Augmented images done


In [9]:
import os
import shutil

# Define the directories for train and test datasets
train_dir = 'New NLM Augmentation/train'  # Replace with your train dataset directory
test_dir = 'New NLM Augmentation/test'    # Replace with your test dataset directory
output_dir = 'NEW_DATASET_SIZE'  # Replace with the directory where you want to store the merged dataset

# Create the output directory if it doesn't exist
os.makedirs(output_dir, exist_ok=True)

# List the classes in the train dataset (assuming each subdirectory corresponds to a class)
classes = os.listdir(train_dir)

# Merge train and test datasets by copying files to the output directory
for class_name in classes:
    train_class_dir = os.path.join(train_dir, class_name)
    test_class_dir = os.path.join(test_dir, class_name)
    output_class_dir = os.path.join(output_dir, class_name)
    
    # Create the class directory in the merged dataset if it doesn't exist
    os.makedirs(output_class_dir, exist_ok=True)
    
    # Copy images from the train dataset
    train_files = os.listdir(train_class_dir)
    for file in train_files:
        src_path = os.path.join(train_class_dir, file)
        dst_path = os.path.join(output_class_dir, file)
        shutil.copy(src_path, dst_path)
    
    # Copy images from the test dataset
    test_files = os.listdir(test_class_dir)
    for file in test_files:
        src_path = os.path.join(test_class_dir, file)
        dst_path = os.path.join(output_class_dir, file)
        shutil.copy(src_path, dst_path)

print("Merging complete.")


Merging complete.


In [12]:
import os
import shutil
import random

# Set your data directory
data_dir = 'NEW_DATASET_SIZE'  # Replace with the path to your Herlev dataset

# Define the directory names for the splits
train_dir = 'NEW_DATASET/train'
val_dir = 'NEW_DATASET/validation'
test_dir = 'NEW_DATASET/test'

# Create the directories if they don't exist
os.makedirs(train_dir, exist_ok=True)
os.makedirs(val_dir, exist_ok=True)
os.makedirs(test_dir, exist_ok=True)

# Define the ratio for splitting (e.g., 70% train, 15% validation, 15% test)
train_ratio = 0.7
val_ratio = 0.18
test_ratio = 0.12

# Loop through each class in the dataset
for class_name in os.listdir(data_dir):
    class_dir = os.path.join(data_dir, class_name)
    if os.path.isdir(class_dir):
        # List all the files in the class directory
        files = os.listdir(class_dir)
        random.shuffle(files)  # Shuffle the files
        
        num_files = len(files)
        
        # Calculate the number of samples for each split
        num_train = int(train_ratio * num_files)
        num_val = int(val_ratio * num_files)
        num_test = num_files - num_train - num_val
        
        # Create subdirectories for each split
        class_train_dir = os.path.join(train_dir, class_name)
        class_val_dir = os.path.join(val_dir, class_name)
        class_test_dir = os.path.join(test_dir, class_name)
        
        os.makedirs(class_train_dir, exist_ok=True)
        os.makedirs(class_val_dir, exist_ok=True)
        os.makedirs(class_test_dir, exist_ok=True)
        
        # Copy files to respective splits
        for i, file in enumerate(files):
            src_path = os.path.join(class_dir, file)
            if i < num_train:
                dst_path = os.path.join(class_train_dir, file)
            elif i < num_train + num_val:
                dst_path = os.path.join(class_val_dir, file)
            else:
                dst_path = os.path.join(class_test_dir, file)
            
            shutil.copy(src_path, dst_path)

print("Herlev dataset split into train, validation, and test sets.")


Herlev dataset split into train, validation, and test sets.
