In [84]:
import os
import cv2
import numpy as np
import random
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [85]:

# Step 1: Define the dataset location and augmentation parameters
data_directory = "C:/Users/Shusmita/Desktop/Oral Cancer"
output_directory = "C:/Users/Shusmita/Desktop/Oral Cancer/Augmented_Oral_Cancer"
augmentation_factor = 10  # Number of augmented images per original image

In [86]:
# Step 2: Function to preprocess images (resize and crop)
def preprocess_image(image_path, target_size=(224, 224)):
    img = cv2.imread(image_path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)  # Convert BGR to RGB
    img = cv2.resize(img, target_size)
    return img

In [87]:

# Step 3: Function to augment images and save them to the output directory
def augment_images(input_directory, output_directory, augmentation_factor):
    if not os.path.exists(output_directory):
        os.makedirs(output_directory)

    image_gen = ImageDataGenerator(
        rotation_range=30,
        width_shift_range=0.2,
        height_shift_range=0.2,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True,
        vertical_flip=True
    )

    image_files = os.listdir(input_directory)
    for img_file in image_files:
        img_path = os.path.join(input_directory, img_file)
        img = preprocess_image(img_path)
        img = img.reshape((1,) + img.shape)  # Reshape for augmentation
        count = 0

        output_folder = os.path.join(output_directory, os.path.basename(input_directory))
        if not os.path.exists(output_folder):
            os.makedirs(output_folder)

        for batch in image_gen.flow(img, save_to_dir=output_folder, save_prefix=img_file[:-4], save_format='jpg'):
            count += 1
            if count >= augmentation_factor:
                break


In [88]:
# Step 4: Preprocess and augment both cancer and non-cancer folders
cancer_folder = os.path.join(data_directory, "cancer")
non_cancer_folder = os.path.join(data_directory, "non_cancer")

augment_images(cancer_folder, os.path.join(output_directory, "cancer"), augmentation_factor)
augment_images(non_cancer_folder, os.path.join(output_directory, "non_cancer"), augmentation_factor)

In [89]:
# Step 5: Display the number of augmented images
def count_images(directory):
    return sum(len(files) for _, _, files in os.walk(directory))

total_augmented_cancer = count_images(os.path.join(output_directory, "cancer"))
total_augmented_non_cancer = count_images(os.path.join(output_directory, "non_cancer"))

print("Total augmented cancer images:", total_augmented_cancer)
print("Total augmented non-cancer images:", total_augmented_non_cancer)

Total augmented cancer images: 440
Total augmented non-cancer images: 439
