In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        # print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import os
import shutil
import cv2
from albumentations import (
    HorizontalFlip, VerticalFlip, RandomBrightnessContrast, Rotate, ShiftScaleRotate
)
from albumentations import Compose
from tqdm import tqdm
import tensorflow as tf

base_dir = '/kaggle/input/potato-disease-leaf-datasetpld/PLD_3_Classes_256/'
output_base_dir = '/kaggle/working/augmented_data/'

# Delete and recreate output directory
if os.path.exists(output_base_dir):
    shutil.rmtree(output_base_dir)
os.makedirs(output_base_dir, exist_ok=True)

# Define the augmentation pipeline
augmentor = Compose([
    HorizontalFlip(p=0.5),
    VerticalFlip(p=0.5),
    RandomBrightnessContrast(p=0.2),
    Rotate(limit=20, p=0.5),
    ShiftScaleRotate(shift_limit=0.1, scale_limit=0.1, rotate_limit=15, p=0.5)
])

for subset in ['Training']:
    subset_dir = os.path.join(base_dir, subset)
    output_subset_dir = os.path.join(output_base_dir, subset)

    if not os.path.exists(output_subset_dir):
        os.makedirs(output_subset_dir)

    class_dirs = [os.path.join(subset_dir, cls) for cls in os.listdir(subset_dir)]
    class_counts = {cls: len(os.listdir(cls)) for cls in class_dirs}
    print(class_counts)

    max_class = max(class_counts, key=class_counts.get)
    max_count = class_counts[max_class]

    print(f"{subset}: maximum class - {os.path.basename(max_class)} and count - {max_count}")

    for class_dir in class_dirs:
        class_name = os.path.basename(class_dir)
        output_class_dir = os.path.join(output_subset_dir, class_name)

        if not os.path.exists(output_class_dir):
            os.makedirs(output_class_dir)

        images = [os.path.join(class_dir, fname) for fname in os.listdir(class_dir)]
        num_images_to_generate = max_count - len(images)

        # Copy original images to output
        for img_path in images:
            img = cv2.imread(img_path)
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            cv2.imwrite(os.path.join(output_class_dir, os.path.basename(img_path)), cv2.cvtColor(img, cv2.COLOR_RGB2BGR))

        # Apply augmentation
        for img_path in images:
            img = cv2.imread(img_path)
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

            # Apply augmentations to each image
            for i in range(num_images_to_generate):
                augmented = augmentor(image=img)
                aug_img = augmented['image']
                aug_img_name = f"{os.path.splitext(os.path.basename(img_path))[0]}_aug{i}.jpg"
                aug_img_path = os.path.join(output_class_dir, aug_img_name)

                # Save the augmented image
                cv2.imwrite(aug_img_path, cv2.cvtColor(aug_img, cv2.COLOR_RGB2BGR), [int(cv2.IMWRITE_JPEG_QUALITY), 80])

    print(f"{subset} augmentation done")


In [None]:
import os
import shutil
import cv2
from albumentations import (
    HorizontalFlip, VerticalFlip, RandomBrightnessContrast, Rotate, ShiftScaleRotate
)
from albumentations import Compose
from tqdm import tqdm
import tensorflow as tf

base_dir = '/kaggle/input/potato-disease-leaf-datasetpld/PLD_3_Classes_256/'
output_base_dir = '/kaggle/working/augmented_data/'

# Delete and recreate output directory
if os.path.exists(output_base_dir):
    shutil.rmtree(output_base_dir)
os.makedirs(output_base_dir, exist_ok=True)

# Define the augmentation pipeline
augmentor = Compose([
    HorizontalFlip(p=0.5),
    VerticalFlip(p=0.5),
    RandomBrightnessContrast(p=0.2),
    Rotate(limit=20, p=0.5),
    ShiftScaleRotate(shift_limit=0.1, scale_limit=0.1, rotate_limit=15, p=0.5)
])

for subset in ['Training']:
    subset_dir = os.path.join(base_dir, subset)
    output_subset_dir = os.path.join(output_base_dir, subset)

    if not os.path.exists(output_subset_dir):
        os.makedirs(output_subset_dir)

    class_dirs = [os.path.join(subset_dir, cls) for cls in os.listdir(subset_dir)]
    class_counts = {cls: len(os.listdir(cls)) for cls in class_dirs}
    print(class_counts)

    max_class = max(class_counts, key=class_counts.get)
    max_count = class_counts[max_class]

    print(f"{subset}: maximum class - {os.path.basename(max_class)} and count - {max_count}")

    for class_dir in class_dirs:
        class_name = os.path.basename(class_dir)
        output_class_dir = os.path.join(output_subset_dir, class_name)

        if not os.path.exists(output_class_dir):
            os.makedirs(output_class_dir)

        images = [os.path.join(class_dir, fname) for fname in os.listdir(class_dir)]
        num_images_to_generate = max_count - len(images)
        # print(f"{class_dir}: {num_images_to_generate}")

        # # Calculate how many augmentations to generate per image
        # num_augmentations_per_image = num_images_to_generate // len(images) if len(images) > 0 else 0

        print(f"Class '{class_name}': {len(images)} images available, {num_images_to_generate} more needed.")
        # print(f"Generating {num_augmentations_per_image} augmentations per image.")


        augmented_images = 0
        for img_path in images:
            img = cv2.imread(img_path)
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            
            if augmented_images >= num_images_to_generate:
               break




        # Copy original images to output
        # for img_path in images:
            # print(img_path)
            # img = cv2.imread(img_path)
            # img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            # cv2.imwrite(os.path.join(output_class_dir, os.path.basename(img_path)), cv2.cvtColor(img, cv2.COLOR_RGB2BGR))

    #     # Apply augmentation
    #     for img_path in images:
    #         img = cv2.imread(img_path)
    #         img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

    #         # Apply augmentations to each image
    #         for i in range(num_augmentations_per_image):
    #             augmented = augmentor(image=img)
    #             aug_img = augmented['image']
    #             aug_img_name = f"{os.path.splitext(os.path.basename(img_path))[0]}_aug{i}.jpg"
    #             aug_img_path = os.path.join(output_class_dir, aug_img_name)

    #             # Save the augmented image
    #             cv2.imwrite(aug_img_path, cv2.cvtColor(aug_img, cv2.COLOR_RGB2BGR), [int(cv2.IMWRITE_JPEG_QUALITY), 80])

    # print(f"{subset} augmentation done")


In [2]:
import os
import shutil
import cv2
from albumentations import (
    HorizontalFlip, VerticalFlip, RandomBrightnessContrast, Rotate, ShiftScaleRotate
)
from albumentations import Compose
import random
from tqdm import tqdm


base_dir = '/kaggle/input/potato-disease-leaf-datasetpld/PLD_3_Classes_256/'
output_base_dir = '/kaggle/working/balanced_data/'


if os.path.exists(output_base_dir):
    shutil.rmtree(output_base_dir)
os.makedirs(output_base_dir, exist_ok=True)


augmentor = Compose([
    HorizontalFlip(p=0.5),
    VerticalFlip(p=0.5),
    RandomBrightnessContrast(p=0.2),
    Rotate(limit=20, p=0.5),
    ShiftScaleRotate(shift_limit=0.1, scale_limit=0.1, rotate_limit=15, p=0.5)
])


total_augmentations = 0


for subset in ['Training','Validation','Testing']:
    subset_dir = os.path.join(base_dir, subset)
    output_subset_dir = os.path.join(output_base_dir, subset)

    if not os.path.exists(output_subset_dir):
        os.makedirs(output_subset_dir)

   
    class_dirs = [os.path.join(subset_dir, cls) for cls in os.listdir(subset_dir)]
    class_counts = {cls: len(os.listdir(cls)) for cls in class_dirs}
    print(class_counts)

    
    max_class = max(class_counts, key=class_counts.get)
    max_count = class_counts[max_class]
    print(f"{subset}: maximum class - {os.path.basename(max_class)} and count - {max_count}")

    
    for class_dir in class_dirs:
        class_name = os.path.basename(class_dir)
        output_class_dir = os.path.join(output_subset_dir, class_name)

        if not os.path.exists(output_class_dir):
            os.makedirs(output_class_dir)

        images = [os.path.join(class_dir, fname) for fname in os.listdir(class_dir)]
        num_images_to_generate = max_count - len(images)  # How many augmentations are needed

        print(f"Class '{class_name}': {len(images)} images available, {num_images_to_generate} more needed.")

        
        for img_path in images:
            img = cv2.imread(img_path)
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            cv2.imwrite(os.path.join(output_class_dir, os.path.basename(img_path)),
                        cv2.cvtColor(img, cv2.COLOR_RGB2BGR))

        
        class_augmentations = 0  
        if num_images_to_generate > 0:
           
            selected_images = random.sample(images, num_images_to_generate)

            
            for i, img_path in enumerate(selected_images):
                img = cv2.imread(img_path)
                img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

                
                augmented = augmentor(image=img)
                aug_img = augmented['image']
                aug_img_name = f"{os.path.splitext(os.path.basename(img_path))[0]}_aug{i}.jpg"
                aug_img_path = os.path.join(output_class_dir, aug_img_name)

            
                cv2.imwrite(aug_img_path, cv2.cvtColor(aug_img, cv2.COLOR_RGB2BGR),
                            [int(cv2.IMWRITE_JPEG_QUALITY), 80])

                class_augmentations += 1  

        total_augmentations += class_augmentations  
        print(f"Augmentation for class '{class_name}' completed. {class_augmentations} new images generated.")

    print(f"{subset} augmentation done.")


print(f"Total augmentations across all classes: {total_augmentations}")


{'/kaggle/input/potato-disease-leaf-datasetpld/PLD_3_Classes_256/Training/Late_Blight': 1132, '/kaggle/input/potato-disease-leaf-datasetpld/PLD_3_Classes_256/Training/Early_Blight': 1303, '/kaggle/input/potato-disease-leaf-datasetpld/PLD_3_Classes_256/Training/Healthy': 816}
Training: maximum class - Early_Blight and count - 1303
Class 'Late_Blight': 1132 images available, 171 more needed.
Augmentation for class 'Late_Blight' completed. 171 new images generated.
Class 'Early_Blight': 1303 images available, 0 more needed.
Augmentation for class 'Early_Blight' completed. 0 new images generated.
Class 'Healthy': 816 images available, 487 more needed.
Augmentation for class 'Healthy' completed. 487 new images generated.
Training augmentation done.
{'/kaggle/input/potato-disease-leaf-datasetpld/PLD_3_Classes_256/Validation/Late_Blight': 151, '/kaggle/input/potato-disease-leaf-datasetpld/PLD_3_Classes_256/Validation/Early_Blight': 163, '/kaggle/input/potato-disease-leaf-datasetpld/PLD_3_Clas

In [3]:
!zip -r balanced_data.zip /kaggle/working/balanced_data

  adding: kaggle/working/balanced_data/ (stored 0%)
  adding: kaggle/working/balanced_data/Training/ (stored 0%)
  adding: kaggle/working/balanced_data/Training/Late_Blight/ (stored 0%)
  adding: kaggle/working/balanced_data/Training/Late_Blight/Late_Blight_633.jpg (deflated 5%)
  adding: kaggle/working/balanced_data/Training/Late_Blight/Late_Blight_989.jpg (deflated 5%)
  adding: kaggle/working/balanced_data/Training/Late_Blight/Late_Blight_81_aug166.jpg (deflated 1%)
  adding: kaggle/working/balanced_data/Training/Late_Blight/Late_Blight_414.jpg (deflated 6%)
  adding: kaggle/working/balanced_data/Training/Late_Blight/Late_Blight_473.jpg (deflated 5%)
  adding: kaggle/working/balanced_data/Training/Late_Blight/Late_Blight_210.jpg (deflated 5%)
  adding: kaggle/working/balanced_data/Training/Late_Blight/Late_Blight_314.jpg (deflated 5%)
  adding: kaggle/working/balanced_data/Training/Late_Blight/Late_Blight_384.jpg (deflated 5%)
  adding: kaggle/working/balanced_data/Training/Late_Bli

In [4]:
!ls

balanced_data  balanced_data.zip


In [5]:
from IPython.display import FileLink
FileLink(r'balanced_data.zip')