# Balancing Covid-19 training dataset

In [None]:
import os
import numpy as np
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from PIL import Image

# Paths
input_dir = '/kaggle/input/chest-xray-pneumoniacovid19tuberculosis/train/COVID19'  # 460 images
output_dir = '/kaggle/working/B_COVID19'  # output path
os.makedirs(output_dir, exist_ok=True)  

# Generate 3680 additional images (we aim for 3415)
num_to_generate = 8 # generate 8 image from each image

# Data augmentation settings
datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

# Load and augment images
images = [Image.open(os.path.join(input_dir, img)) for img in os.listdir(input_dir) if img.endswith(('.png', '.jpg', '.jpeg'))]
generated_count = 0

for img in images:
    # Convert the image to a NumPy array
    img_array = np.array(img)

    if len(img_array.shape) == 2:  # Grayscale image
        img_array = np.expand_dims(img_array, axis=-1)  # Add channel dimension
    elif img_array.shape[-1] == 4:  # RGBA image
        img = img.convert("RGB")  # Convert to RGB
        img_array = np.array(img)

    # Add batch dimension
    img_array = np.expand_dims(img_array, axis=0)

    # Pass the array to the ImageDataGenerator
    aug_iter = datagen.flow(img_array, batch_size=1)

    for _ in range(num_to_generate):
        augmented_img = next(aug_iter)[0].astype('uint8')  # Generate augmented image
        augmented_img = np.squeeze(augmented_img)  # Remove batch dimension if needed

        # Save the augmented image
        output_path = os.path.join(output_dir, f"aug_{generated_count}.jpg")
        Image.fromarray(augmented_img).convert("RGB").save(output_path)  # Ensure the image is saved as RGB
        generated_count += 1

print(f"Generated {generated_count} augmented images in {output_dir}.")


# Balancing Normal training dataset

In [None]:
import os
import numpy as np
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from PIL import Image

# Paths
input_dir = '/kaggle/input/chest-xray-pneumoniacovid19tuberculosis/train/NORMAL'  # 1341 images
output_dir = '/kaggle/working/B_NORMAL'  # output path
os.makedirs(output_dir, exist_ok=True)  

# Generate 4023 additional images (we aim for 2534)
num_to_generate = 3 # generate 3 image from each image

# Data augmentation settings
datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

# Load and augment images
images = [Image.open(os.path.join(input_dir, img)) for img in os.listdir(input_dir) if img.endswith(('.png', '.jpg', '.jpeg'))]
generated_count = 0

for img in images:
    # Convert the image to a NumPy array
    img_array = np.array(img)

    # Ensure the array has the shape (height, width, channels)
    if len(img_array.shape) == 2:  # Grayscale image
        img_array = np.expand_dims(img_array, axis=-1)  # Add channel dimension
    elif img_array.shape[-1] == 4:  # RGBA image
        img = img.convert("RGB")  # Convert to RGB
        img_array = np.array(img)

    # Add batch dimension
    img_array = np.expand_dims(img_array, axis=0)

    # Pass the array to the ImageDataGenerator
    aug_iter = datagen.flow(img_array, batch_size=1)

    for _ in range(num_to_generate):
        augmented_img = next(aug_iter)[0].astype('uint8')  # Generate augmented image
        augmented_img = np.squeeze(augmented_img)  # Remove batch dimension if needed

        # Save the augmented image
        output_path = os.path.join(output_dir, f"aug_{generated_count}.jpg")
        Image.fromarray(augmented_img).convert("RGB").save(output_path)  # Ensure the image is saved as RGB
        generated_count += 1

print(f"Generated {generated_count} augmented images in {output_dir}.")


# Balancing Turberculosis training dataset

In [None]:
import os
import numpy as np
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from PIL import Image

# Paths
input_dir = '/kaggle/input/chest-xray-pneumoniacovid19tuberculosis/train/TURBERCULOSIS'  # 650 images
output_dir = '/kaggle/working/B_TURBERCULOSIS'  # output path
os.makedirs(output_dir, exist_ok=True)  

# Generate 3250 additional images (we aim for 3225 )
num_to_generate = 5 # generate 5 image from each image

# Data augmentation settings
datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

# Load and augment images
images = [Image.open(os.path.join(input_dir, img)) for img in os.listdir(input_dir) if img.endswith(('.png', '.jpg', '.jpeg'))]
generated_count = 0

for img in images:
    # Convert the image to a NumPy array
    img_array = np.array(img)

    # Ensure the array has the shape (height, width, channels)
    if len(img_array.shape) == 2:  # Grayscale image
        img_array = np.expand_dims(img_array, axis=-1)  # Add channel dimension
    elif img_array.shape[-1] == 4:  # RGBA image
        img = img.convert("RGB")  # Convert to RGB
        img_array = np.array(img)

    # Add batch dimension
    img_array = np.expand_dims(img_array, axis=0)

    # Pass the array to the ImageDataGenerator
    aug_iter = datagen.flow(img_array, batch_size=1)

    for _ in range(num_to_generate):
        augmented_img = next(aug_iter)[0].astype('uint8')  # Generate augmented image
        augmented_img = np.squeeze(augmented_img)  # Remove batch dimension if needed

        # Save the augmented image
        output_path = os.path.join(output_dir, f"aug_{generated_count}.jpg")
        Image.fromarray(augmented_img).convert("RGB").save(output_path)  # Ensure the image is saved as RGB
        generated_count += 1

print(f"Generated {generated_count} augmented images in {output_dir}.")
