In [None]:
# pip install --upgrade ipywidgets

In [None]:
# !pip install opencv-python-headless tqdm tensorflow

In [11]:
import os
import cv2
import numpy as np
from tensorflow.keras.preprocessing.image import ImageDataGenerator
#from tqdm.notebook import tqdm  # Use tqdm.notebook for better visualization in Jupyter
from tqdm import tqdm 

In [13]:
# Define paths
dataset_folder = 'D:/College/7th Sem/Major Project/Final Data/colored_images'  # Change this to your local dataset folder
output_folder = 'D:/College/7th Sem/Major Project/Final Data/Augmented Data'  # Change this to your local output folder
categories = ['Mild', 'Moderate', 'No_DR', 'Proliferate_DR', 'Severe']

In [15]:
# Create ImageDataGenerator for augmentation
datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.1,
    height_shift_range=0.1,
    shear_range=0.1,
    zoom_range=0.1,
    horizontal_flip=True,
    fill_mode='nearest'
)

In [17]:
# Function to save augmented images
def save_augmented_images(category, img_array, image_name, augmentations_needed):
    img_array = np.expand_dims(img_array, axis=0)
    aug_iter = datagen.flow(img_array, batch_size=1)
    for i in range(augmentations_needed):
        aug_image = next(aug_iter)[0].astype('uint8')
        save_path = os.path.join(output_folder, category, f"aug_{i}_{image_name}")
        cv2.imwrite(save_path, aug_image)


In [19]:
# Ensure output folder structure
for category in categories:
    os.makedirs(os.path.join(output_folder, category), exist_ok=True)

# Count images per class
image_counts = {category: len(os.listdir(os.path.join(dataset_folder, category))) for category in categories}

# Target count is the maximum number of images from any class (No_DR)
max_count = max(image_counts.values())


In [None]:
# Perform augmentation
for category in categories:
    category_folder = os.path.join(dataset_folder, category)
    images = os.listdir(category_folder)
    
    for image_name in tqdm(images, desc=f"Augmenting {category}"):
        img_path = os.path.join(category_folder, image_name)
        img = cv2.imread(img_path)
        if img is None:
            continue  # Skip if image can't be read

        # Check how many augmentations we need to perform
        augmentations_needed = max_count - image_counts[category]
        
        if augmentations_needed > 0:
            save_augmented_images(category, img, image_name, augmentations_needed)

Augmenting Mild:   0%|          | 1/2463 [04:37<190:00:26, 277.83s/it]

# Data Augumentation file with proper working code

In [2]:
import os
import cv2
import numpy as np
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tqdm import tqdm  # Standard tqdm for VS Code

In [4]:
# Define paths
dataset_folder = 'D:/College/7th Sem/Major Project/Final Data/colored_images'  # Change this to your local dataset folder
output_folder = 'D:/College/7th Sem/Major Project/Final Data/Augmented Data'  # Change this to your local output folder
categories = ['Mild', 'Moderate', 'No_DR', 'Proliferate_DR', 'Severe']

In [6]:
# Create ImageDataGenerator for augmentation
datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.1,
    height_shift_range=0.1,
    shear_range=0.1,
    zoom_range=0.1,
    horizontal_flip=True,
    fill_mode='nearest'
)

In [8]:
# Function to save augmented images
def save_augmented_images(category, img_array, image_name, augmentations_needed):
    img_array = np.expand_dims(img_array, axis=0)
    aug_iter = datagen.flow(img_array, batch_size=1)
    for i in range(augmentations_needed):
        aug_image = next(aug_iter)[0].astype('uint8')
        save_path = os.path.join(output_folder, category, f"aug_{i}_{image_name}")
        cv2.imwrite(save_path, aug_image)

In [10]:
# Ensure output folder structure
for category in categories:
    os.makedirs(os.path.join(output_folder, category), exist_ok=True)

# Count images per class
image_counts = {category: len(os.listdir(os.path.join(dataset_folder, category))) for category in categories}

# Target count is the maximum number of images from any class (largest class size)
max_count = max(image_counts.values())

In [12]:
# Perform augmentation to balance the dataset
for category in categories:
    category_folder = os.path.join(dataset_folder, category)
    images = os.listdir(category_folder)
    
    # Calculate how many augmentations are needed to balance the class
    current_count = image_counts[category]
    augmentations_needed = max_count - current_count
    
    # Augment images only if the category has fewer images than the largest category
    if augmentations_needed > 0:
        for image_name in tqdm(images, desc=f"Balancing {category}"):
            img_path = os.path.join(category_folder, image_name)
            img = cv2.imread(img_path)
            if img is None:
                continue  # Skip if the image can't be read

            # Distribute augmentations across images
            augmentations_per_image = (augmentations_needed // current_count) + 1  # Add 1 to ensure coverage
            save_augmented_images(category, img, image_name, augmentations_per_image)

            # Update the count of augmented images
            augmentations_needed -= augmentations_per_image
            
            # Stop augmenting once the category reaches the target size
            if augmentations_needed <= 0:
                break

print("Data augmentation and balancing completed.")

Balancing Mild: 100%|██████████| 2463/2463 [13:51<00:00,  2.96it/s]
Balancing Moderate: 100%|██████████| 5428/5428 [29:23<00:00,  3.08it/s]
Balancing Proliferate_DR: 100%|██████████| 757/757 [57:11<00:00,  4.53s/it]
Balancing Severe: 100%|██████████| 947/947 [1:06:21<00:00,  4.20s/it]

Data augmentation and balancing completed.



