
# ---------------------------
# Data Augmentation
# ---------------------------

In [None]:

import os  
import cv2  
from keras.preprocessing.image import ImageDataGenerator  # For data augmentation

# Define input and output directories
input_dir = r'C:\Users\elhas\ML Project\DataSet\Malignant'  
augmented_dir = r'C:\Users\elhas\ML Project\DataSet\train_augmented'  
os.makedirs(augmented_dir, exist_ok=True)  # Create output directory if it doesn't exist

# Create an image data generator with specified transformations
datagen = ImageDataGenerator(
    rotation_range=30,       # Random rotation between 0 and 30 degrees
    width_shift_range=0.1,   # Horizontal shift by up to 10%
    height_shift_range=0.1,  # Vertical shift by up to 10%
    shear_range=0.2,         # Random shear transformation
    zoom_range=0.2,          # Random zoom between 80% and 120%
    horizontal_flip=True,    # Enable horizontal flipping
    vertical_flip=False,     # Disable vertical flipping (not needed for medical images)
    fill_mode='nearest'      # Fill missing pixels after transformations
)

# Function to generate and save augmented images
def augment_images(input_dir, output_dir, augmentations=30):
    # Walk through the input directory
    for subdir, _, files in os.walk(input_dir):
        label = os.path.basename(subdir)  # Get the label from the subdirectory name
        label_dir = os.path.join(output_dir, label)  # Create output directory for this label
        os.makedirs(label_dir, exist_ok=True)  # Create the label directory if it doesn't exist

        # Iterate through each file in the directory
        for filename in files:
            img_path = os.path.join(subdir, filename)  # Full path of the image
            img = cv2.imread(img_path)  # Read the image
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)  # Convert the image to RGB format for Keras

            # Resize the image if necessary (e.g., to 128x128 pixels)
            img_resized = cv2.resize(img, (128, 128))

            # Reshape the image to match the input format expected by the generator
            img_resized = img_resized.reshape((1,) + img_resized.shape)

            # Generate multiple augmented images
            i = 0  # Initialize a counter for augmented images
            for batch in datagen.flow(img_resized, batch_size=1,
                                      save_to_dir=label_dir,  # Directory to save augmented images
                                      save_prefix='aug',       # Prefix for saved image files
                                      save_format='jpeg'):     # Save format
                i += 1  # Increment counter
                if i >= augmentations:  # Limit the number of augmented images to 'augmentations'
                    break


augment_images(input_dir, augmented_dir) 
print("Data augmentation complete and images saved.") 

