In [1]:
import os
import numpy as np
import shutil
from tqdm import tqdm
from imgaug import augmenters as iaa
from PIL import Image

In [2]:
root = 'C:/Users/nithi/Desktop/FAU/Semester-4/Master_Thesis_Federated_Learning/Dataset/sample'

# Path to the dataset
data_path = root + '/images'
balanced_data_path = root

# Class labels
classes = ['Hernia', 'Pneumonia', 'Fibrosis', 'Nodule', 'Mass', 'Consolidation', 'Effusion', 'Edema', 'Atelectasis', 'No Finding', 'Cardiomegaly', 'Pneumothorax', 'Pleural_Thickening', 'Infiltration', 'Emphysema']


In [3]:
# Create a directory for balanced data if it doesn't exist
os.makedirs(balanced_data_path, exist_ok=True)

In [4]:
# Data augmentation sequence
augmentation_seq = iaa.Sequential([
    iaa.Fliplr(0.5),  # Horizontal flip 50% of images
    iaa.Crop(percent=(0, 0.1)),  # Random crop
    iaa.Affine(rotate=(-20, 20)),  # Rotate images
    iaa.LinearContrast((0.75, 1.5)),  # Change contrast
    iaa.AdditiveGaussianNoise(scale=(0, 0.05*255)),  # Add Gaussian noise
])

In [5]:
# Count the number of images in each class
class_counts = {class_name: len(os.listdir(os.path.join(data_path, class_name))) for class_name in classes}
max_count = max(class_counts.values())

In [6]:
def augment_images(class_name, img_paths, target_count):
    os.makedirs(os.path.join(balanced_data_path, class_name), exist_ok=True)
    img_count = len(img_paths)
    iterations = (target_count - img_count) // img_count + 1
    
    for i in tqdm(range(iterations), desc=f'Augmenting {class_name}'):
        for img_path in img_paths:
            img = Image.open(img_path)
            img_np = np.array(img)
            augmented_images = augmentation_seq(images=[img_np])
            for j, augmented_img in enumerate(augmented_images):
                aug_img = Image.fromarray(augmented_img)
                aug_img.save(os.path.join(balanced_data_path, class_name, f'{os.path.splitext(os.path.basename(img_path))[0]}_aug_{i}_{j}.png'))
    
    # Copy original images to the new folder
    for img_path in img_paths:
        shutil.copy(img_path, os.path.join(balanced_data_path, class_name))

In [7]:
for class_name in classes:
    class_folder = os.path.join(data_path, class_name)
    img_paths = [os.path.join(class_folder, img_name) for img_name in os.listdir(class_folder)]
    augment_images(class_name, img_paths, max_count)

print("Data augmentation completed. Balanced dataset created.")

Augmenting Atelectasis: 100%|██████████| 5/5 [05:37<00:00, 67.41s/it]
Augmenting No Finding: 100%|██████████| 1/1 [07:48<00:00, 468.77s/it]
Augmenting Cardiomegaly: 100%|██████████| 21/21 [07:15<00:00, 20.74s/it]
Augmenting Pneumothorax: 100%|██████████| 11/11 [08:15<00:00, 45.08s/it]
Augmenting Pleural_Thickening: 100%|██████████| 17/17 [06:45<00:00, 23.86s/it]
Augmenting Infiltration: 100%|██████████| 3/3 [06:29<00:00, 129.91s/it]
Augmenting Emphysema: 100%|██████████| 23/23 [06:24<00:00, 16.72s/it]

Data augmentation completed. Balanced dataset created.



