In [None]:
import pandas as pd
import numpy as np
import os
import uuid
import cv2
import albumentations as A
from datetime import datetime

In [None]:
DIRECTORY_LIST = [
 'black_stem_borer',
 'white_stem_borer',
 'bacterial_panicle_blight',
 'bacterial_leaf_blight',
 'brown_spot',
 'leaf_roller',
 'hispa',
 'downy_mildew',
 'blast',
 'normal',
 'bacterial_leaf_streak',
 'tungro',
 'yellow_stem_borer'
]

In [None]:
input_data_dir = os.path.join('data','paddy-doctor-diseases-small')
input_data_dir_list = [os.path.join(input_data_dir,directory) for directory in DIRECTORY_LIST]
input_metadata_file_path = os.path.join(input_data_dir,'metadata.csv')

In [None]:
# Get current date and time for a unique directory name
timestamp = datetime.now().strftime('%Y%m%d_%H%M')
output_data_dir = os.path.join('data', f'paddy-doctor-diseases-augmented_{timestamp}')
output_metadata_file_path = os.path.join(output_data_dir, 'metadata.csv')

In [None]:
# Create the new output directory
os.makedirs(output_data_dir, exist_ok=True)

In [None]:
augmentation_pipeline = A.Compose([
    A.Resize(256, 256),  # Resize images to a fixed size of 256x256
    A.HorizontalFlip(p=0.5),  # Flip image horizontally with a 50% chance
    A.VerticalFlip(p=0.5),  # Flip image vertically with a 50% chance
    A.Rotate(limit=30, p=0.5),  # Rotate image within a range of -30 to 30 degrees
    A.GaussianBlur(blur_limit=(3, 7), p=0.3),  # Apply Gaussian blur with a 30% chance
    A.RandomBrightnessContrast(p=0.5),  # Randomly adjust brightness and contrast with a 50% chance
    A.HueSaturationValue(hue_shift_limit=20, sat_shift_limit=30, val_shift_limit=20, p=0.5),  # Randomly adjust hue, saturation, and value
    A.ShiftScaleRotate(shift_limit=0.1, scale_limit=0.1, rotate_limit=0, p=0.5),  # Shift and scale image without rotation
    A.GridDistortion(num_steps=5, distort_limit=0.3, p=0.5),  # Apply grid distortion for a warping effect with a 50% chance
    A.OpticalDistortion(distort_limit=0.2, shift_limit=0.2, p=0.1),  # Simulate optical distortions like lens effects
    A.ChannelShuffle(p=0.1),  # Shuffle color channels for varied color effects with a 30% chance
    A.RandomGamma(gamma_limit=(80, 120), p=0.3),  # Randomly adjust gamma values to control brightness intensity
    A.Equalize(p=0.3),  # Apply histogram equalization for better contrast with a 30% chance
    A.FancyPCA(alpha=0.1, p=0.3),  # Adjust color intensities using PCA for slight color augmentation
    A.CLAHE(clip_limit=4.0, tile_grid_size=(8, 8), p=0.3)  # Apply CLAHE to enhance contrast adaptively
])


In [None]:
metadata=[]
num_augmentations = 3

In [None]:
# Process images in each directory
for directory, label in zip(input_data_dir_list, DIRECTORY_LIST):
    for img_filename in os.listdir(directory):
        img_path = os.path.join(directory, img_filename)
        
        # Load image
        image = cv2.imread(img_path)
        if image is None:
            continue
        
        # Generate a new filename for the original image with class label prefix
        original_new_filename = f"{label}_{uuid.uuid4()}.jpg"
        original_output_path = os.path.join(output_data_dir, original_new_filename)
        
        # Save the original image with the new name
        cv2.imwrite(original_output_path, image)
        
        # Append to metadata for the original image
        metadata.append([original_new_filename, label])
        
        for _ in range(num_augmentations):
            # Apply augmentation
            augmented = augmentation_pipeline(image=image)['image']
            
            # Generate a new filename for the augmented image with class label prefix
            new_filename = f"{label}_{uuid.uuid4()}.jpg"
            
            # Define output path for saving the augmented image
            output_path = os.path.join(output_data_dir, new_filename)
            
            # Save augmented image
            cv2.imwrite(output_path, augmented)
            
            # Append to metadata for the augmented image
            metadata.append([new_filename, label])


In [None]:
# Convert metadata to DataFrame and save to CSV
metadata_df = pd.DataFrame(metadata, columns=['filename', 'class'])
metadata_df.to_csv(output_metadata_file_path, index=False)

16225