In [10]:
import os
import random
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator, img_to_array, load_img, array_to_img
import matplotlib.pyplot as plt

In [11]:
# Function to load and resize images from a directory
def load_images_from_directory(directory, target_size):
    images = []
    for filename in os.listdir(directory):
        img_path = os.path.join(directory, filename)
        if os.path.isfile(img_path):
            img = load_img(img_path, target_size=target_size)
            images.append(img_to_array(img))
    return images

# Function to augment images and save them
def augment_images(images, augmentor, target_dir, target_count):
    current_count = len(images)
    if current_count >= target_count:
        return

    images_needed = target_count - current_count
    augmented_images = []

    while len(augmented_images) < images_needed:
        img = random.choice(images)
        img = img.reshape((1,) + img.shape)
        for batch in augmentor.flow(img, batch_size=1):
            augmented_image = array_to_img(batch[0])
            augmented_images.append(augmented_image)
            if len(augmented_images) >= images_needed:
                break

    for i, img in enumerate(augmented_images):
        img.save(os.path.join(target_dir, f'aug_{i}.png'))

# Main function to resize and augment images in the dataset
def resize_and_augment(base_dir, target_size=(400, 400), target_count=4000):
    normal_dir = os.path.join(base_dir, 'NORMAL')
    pneumonia_dir = os.path.join(base_dir, 'PNEUMONIA')

    normal_images = load_images_from_directory(normal_dir, target_size)
    pneumonia_images = load_images_from_directory(pneumonia_dir, target_size)

    augmentor = ImageDataGenerator(
        width_shift_range=0.1,
        height_shift_range=0.1,
        shear_range=0.1,
        zoom_range=0.1,
        horizontal_flip=True,
        fill_mode='nearest'
    )

    augment_images(normal_images, augmentor, normal_dir, target_count)
    augment_images(pneumonia_images, augmentor, pneumonia_dir, target_count)


In [13]:
# Function to count the number of images in a directory
def count_images(directory):
    return len([filename for filename in os.listdir(directory) if os.path.isfile(os.path.join(directory, filename))])

# Set base directory
base_dir = 'chest_xray/train'
normal_dir = os.path.join(base_dir, 'NORMAL')
pneumonia_dir = os.path.join(base_dir, 'PNEUMONIA')

# Calculate number of images before augmentation
normal_count_before = count_images(normal_dir)
pneumonia_count_before = count_images(pneumonia_dir)

print(f"Number of NORMAL images before augmentation: {normal_count_before}")
print(f"Number of PNEUMONIA images before augmentation: {pneumonia_count_before}")

# Execute the resizing and augmentation process
resize_and_augment(base_dir)

# Calculate number of images after augmentation
normal_count_after = count_images(normal_dir)
pneumonia_count_after = count_images(pneumonia_dir)

print(f"Number of NORMAL images after augmentation: {normal_count_after}")
print(f"Number of PNEUMONIA images after augmentation: {pneumonia_count_after}")


Number of NORMAL images before augmentation: 1341
Number of PNEUMONIA images before augmentation: 3875
Number of NORMAL images after augmentation: 4000
Number of PNEUMONIA images after augmentation: 4000
