In [47]:
import tensorflow as tf
from PIL import Image
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import os
import random

In [48]:
current_path = os.getcwd()
plant_doc_path = os.path.join(current_path, 'datasets/original/PlantDoc')
plant_doc_train = os.path.join(plant_doc_path, 'train')
plant_doc_test = os.path.join(plant_doc_path, 'test')

def load_images_pd(set_path):
    images = {}
    for directory in os.listdir(set_path):
        images[directory] = []
        disease_directory = os.path.join(set_path, directory)
        for _, file in enumerate(os.listdir(disease_directory)):
            image_path = os.path.join(disease_directory, file)
            with Image.open(image_path) as img:
                images[directory] += [img.resize((224, 224)).copy()]

    return images

X_train = load_images_pd(plant_doc_train)
X_test = load_images_pd(plant_doc_test)

In [49]:
all_images_train = pd.DataFrame(data=((k, img) for k, v in X_train.items() for img in v))
all_images_test = pd.DataFrame(data=((k, img) for k, v in X_test.items() for img in v))

In [50]:
X_train, X_val, y_train, y_val = train_test_split(all_images_train[1], all_images_train[0], test_size=0.15, stratify=all_images_train[0])
X_test, y_test = all_images_test[1], all_images_test[0]

In [51]:
def get_crop_size(img, min_prop=0.7):
    def rand_prop(x):
        return x + (1. - x) * np.random.random()

    height, width, channels = img.shape

    if height > width:
        height_prop = rand_prop(min_prop)
        width_prop = rand_prop(height_prop)
    else:
        width_prop = rand_prop(min_prop)
        height_prop = rand_prop(width_prop)

    height = np.floor(height_prop * height).astype(int)
    width = np.floor(width_prop * width).astype(int)
    return height, width, channels

def random_augmentation(img):
    img = tf.keras.preprocessing.image.random_rotation(img, 20, row_axis=0, col_axis=1, channel_axis=2, fill_mode='reflect')
    img = tf.image.random_contrast(img, 0.8, 1.2)
    img = tf.image.random_brightness(img, 0.08)
    img = tf.image.random_hue(img, 0.025)
    img = tf.image.random_saturation(img, 0.85, 1.15)
    img = tf.image.random_jpeg_quality(img, 75, 95)
    img = tf.image.random_flip_up_down(img)
    img = tf.image.random_flip_left_right(img)
    img = tf.image.random_crop(img, get_crop_size(img, 0.67))
    return img.numpy()


def get_augmented_image(img):
    return Image.fromarray(random_augmentation(np.array(img.convert('RGB')))).resize((224, 224))

In [52]:
for img_label in y_train.unique():
    counter = 0
    current_class_indexes = y_train[y_train == img_label].index
    current_class_images = X_train[current_class_indexes]
    dest = f'datasets/augmented/PlantDoc/train/{img_label}/'
    os.makedirs(os.path.dirname(dest), exist_ok=True)
    for img in current_class_images:
        img = Image.fromarray(np.array(img.convert('RGB')))
        img.save(dest + str(counter) + '.jpg')
        counter += 1

    while counter < 150:
        augmented_image = get_augmented_image(X_train[random.choice(current_class_images.index)])
        augmented_image.save(dest + str(counter) + '.jpg')
        counter += 1

for img_label in y_val.unique():
    counter = 0
    current_class_indexes = y_val[y_val == img_label].index
    current_class_images = X_val[current_class_indexes]
    dest = f'datasets/augmented/PlantDoc/val/{img_label}/'
    os.makedirs(os.path.dirname(dest), exist_ok=True)
    for img in current_class_images:
        img = Image.fromarray(np.array(img.convert('RGB')))
        img.save(dest + str(counter) + '.jpg')
        counter += 1

for img_label in y_test.unique():
    counter = 0
    current_class_indexes = y_test[y_test == img_label].index
    current_class_images = X_test[current_class_indexes]
    dest = f'datasets/augmented/PlantDoc/test/{img_label}/'
    os.makedirs(os.path.dirname(dest), exist_ok=True)
    for img in current_class_images:
        img = Image.fromarray(np.array(img.convert('RGB')))
        img.save(dest + str(counter) + '.jpg')
        counter += 1

In [53]:
all_images_train[0].value_counts()

Corn leaf blight              179
Tomato Septoria leaf spot     140
Squash Powdery mildew leaf    124
Raspberry leaf                112
Potato leaf early blight      108
Corn rust leaf                106
Blueberry leaf                104
Peach leaf                    102
Tomato leaf late blight       101
Tomato leaf bacterial spot    101
Potato leaf late blight        97
Strawberry leaf                88
Tomato mold leaf               85
Apple Scab Leaf                83
Apple leaf                     82
Tomato Early blight leaf       79
Apple rust leaf                78
Tomato leaf yellow virus       70
Corn Gray leaf spot            64
Bell_pepper leaf spot          62
grape leaf                     57
Soyabean leaf                  57
grape leaf black rot           56
Tomato leaf                    55
Bell_pepper leaf               53
Cherry leaf                    47
Tomato leaf mosaic virus       44
Name: 0, dtype: int64