In [None]:
import numpy as np
import pandas as pd
import os
import cv2
import random
import matplotlib.pyplot as plt
from itertools import groupby
from random import shuffle

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix,classification_report,accuracy_score
from tensorflow.keras.utils import to_categorical
from tensorflow.keras import Sequential
from keras import layers
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import Flatten, Dense

In [None]:
IMAGE_SIZE = 240
target_plants = ['grape', 'potato', 'tomato', 'corn']

In [None]:
kaggle_dataset = '/kaggle/input/plant-disease-classification-merged-dataset'
classes = {}
index = 0

for folder_name in os.listdir(kaggle_dataset):
    parts = folder_name.split("__")
    main_class = parts[0].lower()
    sub_class = parts[1]
    
    if main_class in target_plants:
        
        if main_class not in classes:
            classes[main_class] = (index, {})
            index += 1
    
        class_index, sub_classes = classes[main_class]

        if sub_class not in sub_classes:
            sub_classes[sub_class] = len(sub_classes)

        classes[main_class] = (class_index, sub_classes)

In [None]:
classes

**Creating the dataframe**

In [None]:
data = []

for folder_name in os.listdir(kaggle_dataset):
    parts = folder_name.split("__")
    main_class = parts[0].lower()
    sub_class = parts[1]
    
    if main_class in target_plants:
        class_index, sub_classes = classes[main_class]
        sub_class_index = sub_classes[sub_class]
#         print(class_index, "   s:", sub_class_index)
        folder_path = os.path.join(kaggle_dataset, folder_name)


        for file_name in os.listdir(folder_path):
            file_path = os.path.join(folder_path, file_name)
            data.append({
                'plants': class_index,
                'diseases': sub_class_index,
                'path': file_path
            })


df = pd.DataFrame(data, columns=['plants', 'diseases', 'path'])

In [None]:
# rd= random.randint(0, len(df))
# pl = df.loc[rd]['plants']
# d = df.loc[rd]['diseases']
# p = df.loc[rd]['path']

# print(pl , " ", d , " ", p)

In [None]:
grouped = df.groupby('plants')
max_images_per_label = 2000
plant_lists = [[], [], [], []]

for idx, (plants_, group) in enumerate(grouped):
#     plant_lists.append((idx, group['path'].tolist()))
    values = group['path'].tolist()
    shuffle(values)
    if len(values) > max_images_per_label:
        values = values[:max_images_per_label]
    plant_lists[idx] = [(idx, path) for path in values]

In [None]:
print(len(plant_lists[0]))
print(len(plant_lists[1]))
print(len(plant_lists[2]))
print(len(plant_lists[3]))

In [None]:
plant_lists[0][440]

In [None]:
plants_array = np.array([item for sublist in plant_lists for item in sublist], dtype=object)

In [None]:
len(plants_array)

In [None]:
dataset = []

for row in plants_array:
    image_path = row[1]
    try:
        image = cv2.imread(image_path, cv2.IMREAD_COLOR)
        image = cv2.resize(image, (IMAGE_SIZE, IMAGE_SIZE))
    except:
        continue
    dataset.append([np.array(image), row[0] , image_path])

In [None]:
len(dataset)
print(len(classes))

In [None]:
x = np.array([i[0] for i in dataset]).reshape(-1, IMAGE_SIZE, IMAGE_SIZE, 3)
y = np.array([i[1] for i in dataset])

In [None]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2)
y_train = to_categorical(y_train, len(classes))
y_test = to_categorical(y_test, len(classes))

In [None]:
from keras.applications.vgg16 import VGG16
vgg16_path = '/kaggle/input/keras-pretrained-models/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5'
vgg = VGG16(
    weights = vgg16_path,
    include_top = False,
    input_shape = (IMAGE_SIZE, IMAGE_SIZE, 3)
)

for layer in vgg.layers:
    layer.trainable = False

# # Main model

In [None]:
model = Sequential()
model.add(vgg)
model.add(Dense(256, activation='relu')) 
model.add(layers.Dropout(rate=0.5))
model.add(Dense(128, activation='sigmoid'))
model.add(layers.Dropout(0.1))
model.add(Flatten())
model.add(Dense(len(classes),activation="softmax"))

In [None]:
model.summary()

In [None]:
model.compile(optimizer="adam",loss="categorical_crossentropy",metrics=["accuracy"])

In [None]:
history = model.fit(x_train,y_train,batch_size=32,epochs=15,validation_data=(x_test,y_test))

In [None]:
loss,accuracy = model.evaluate(x_test,y_test)
print("loss:",loss)
print("Accuracy:",accuracy)

In [None]:
plt.subplot(2, 2, 1)
plt.plot(history.history['accuracy'], label='Training Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.title('Plant Model')
plt.legend()
plt.show()

In [None]:
y_pred1 = model.predict(x_test)
y_pred=np.argmax(y_pred1,axis=1)
y_test =  np.argmax(y_test , axis = 1)
print(classification_report(y_test,y_pred))

In [None]:
sample = random.choice(range(len(x_test)))
image = x_test[sample]
category = y_test[sample]
pred_category = y_pred[sample]

plt.subplot(2,5,2)
plt.imshow(image)
plt.xlabel("Actual:{}\nPrediction:{}".format(category,pred_category))

In [None]:
for i in range(50):
    sample = random.choice(range(len(x_test)))
    category = y_test[sample]
    pred_category = y_pred[sample]
    
    print(category , "   ", pred_category)

In [None]:
# model.save('/kaggle/working/plant_model.h5')

# # Models For Diseases

In [None]:
tomato = []
potato = []
corn = []
grape = []
# grouped = df.groupby(['plants', 'diseases'])

for idx, (plants_, group) in enumerate(grouped):
    diseases = group['diseases'].tolist()
    paths = group['path'].tolist()
    
    if idx == 0:
        tomato.extend(list(zip(diseases, paths)))
    elif idx == 1:
        potato.extend(list(zip(diseases, paths)))
    elif idx == 2:
        corn.extend(list(zip(diseases, paths)))
    elif idx == 3:
        grape.extend(list(zip(diseases, paths)))
            

In [None]:
grape[0]

# Important functions

In [None]:
def preprocess(plant , n):
    grouped = groupby(sorted(plant, key=lambda x: x[0]), key=lambda x: x[0])
    selected_samples = []
    classes = {}
    aug_dict = {}
    
    for key, group in grouped:
        group_list = list(group)
        print(len(group_list))
        disease = group_list[0][1].split("/")[-2].split("__")
        classes[key] = disease[1]
        selected_group_samples = random.sample(group_list, min(len(group_list), n))
        selected_samples.extend(selected_group_samples)
        
        if len(group_list) < n:
            aug_dict[key] = n - len(group_list) #getting the class that need augmentation and getting number of image is needed
    return selected_samples , classes, aug_dict

def create_dataset(plant , aug):
    dataset = []
    keys = list(aug.keys())
    augmented = []
    
    for row in plant:
        if (row[0] in keys) and (row[0] not in augmented):
            augmented_img = augment_image(row[1], aug[row[0]]) #applying augmentation on the necessary class
            for img in augmented_img:
                image = cv2.resize(img, (IMAGE_SIZE, IMAGE_SIZE))
                dataset.append([image, row[0]])
            augmented.append(row[0])
        try: 
            image = cv2.imread(row[1], cv2.IMREAD_COLOR)
            image = cv2.resize(image, (IMAGE_SIZE, IMAGE_SIZE)) #resizing image
        except:
            continue
        dataset.append([np.array(image), row[0]])
    return dataset
    
def augment_image(image_path, n):
    image = cv2.imread(image_path, cv2.IMREAD_COLOR)
    # Convert image to numpy array
    img_array = np.array(image)
    
    # Initialize an ImageDataGenerator for augmentation
    datagen = ImageDataGenerator(
        rotation_range=20,
        width_shift_range=0.2,
        height_shift_range=0.2,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True,
        fill_mode='nearest'
    )
    
    augmented_images = []
    
    for _ in range(n):
        # Apply random transformations to the image
        augmented_img = datagen.random_transform(img_array)
        augmented_images.append(augmented_img)
    
    return augmented_images

def get_all_train_test(dataset , d_classes):
    x = np.array([i[0] for i in dataset]).reshape(-1, IMAGE_SIZE, IMAGE_SIZE, 3)
    y = np.array([i[1] for i in dataset])
    print(x.shape)
    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2)
    y_train = to_categorical(y_train, len(d_classes))
    y_test = to_categorical(y_test, len(d_classes))
    
    return x_train, y_train, x_test, y_test

def get_model(data_classes):
    model = Sequential()
    model.add(vgg)
    model.add(Dense(256, activation='relu')) 
    model.add(layers.Dropout(rate=0.5))
    model.add(Dense(128, activation='sigmoid'))
    model.add(layers.Dropout(0.1))
    model.add(Flatten())
    model.add(Dense(len(data_classes),activation="softmax"))
    
    return model

# # Corn Disease Model

In [None]:
corn_dataset, corn_classes, corn_aug = preprocess(corn, 1000)

In [None]:
print(corn_classes)

In [None]:
corn_dataset = create_dataset(corn_dataset, corn_aug)

In [None]:
x_train, y_train, x_test, y_test = get_all_train_test(corn_dataset , corn_classes)

In [None]:
corn_model = get_model(corn_classes)

In [None]:
corn_model.summary()

In [None]:
corn_model.compile(optimizer="adam",loss="categorical_crossentropy",metrics=["accuracy"])
corn_history = corn_model.fit(x_train,y_train,batch_size=32,epochs=15,validation_data=(x_test,y_test))

In [None]:
plt.subplot(2, 2, 1)
plt.plot(corn_history.history['accuracy'], label='Training Accuracy')
plt.plot(corn_history.history['val_accuracy'], label='Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.title('Corn')
plt.legend()
plt.show()

In [None]:
# corn_model.save('/kaggle/working/corn.h5')

In [None]:
loss,accuracy = corn_model.evaluate(x_test,y_test)
print("loss:",loss)
print("Accuracy:",accuracy)

In [None]:
y_pred1 = corn_model.predict(x_test)
y_pred=np.argmax(y_pred1,axis=1)
y_test =  np.argmax(y_test, axis = 1)
print(classification_report(y_test,y_pred))

# # Grape Disease Model

In [None]:
grape_dataset, grape_classes, grape_aug = preprocess(grape, 1000)

In [None]:
print(grape_classes)

In [None]:
grape_dataset = create_dataset(grape_dataset, grape_aug)

In [None]:
len(grape_dataset)

In [None]:
x_train, y_train, x_test, y_test = get_all_train_test(grape_dataset , grape_classes)

In [None]:
grape_model = get_model(grape_classes)
grape_model.summary()

In [None]:
grape_model.compile(optimizer="adam",loss="categorical_crossentropy",metrics=["accuracy"])
grape_history = grape_model.fit(x_train,y_train,batch_size=32,epochs=15,validation_data=(x_test,y_test))

In [None]:
plt.subplot(2, 2, 1)
plt.plot(grape_history.history['accuracy'], label='Training Accuracy')
plt.plot(grape_history.history['val_accuracy'], label='Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.title('Grape')
plt.legend()
plt.show()

In [None]:
loss,accuracy = grape_model.evaluate(x_test,y_test)
print("loss:",loss)
print("Accuracy:",accuracy)

In [None]:
y_pred1 = grape_model.predict(x_test)
y_pred=np.argmax(y_pred1,axis=1)
y_test =  np.argmax(y_test, axis = 1)
print(classification_report(y_test,y_pred))

In [None]:
# grape_model.save('/kaggle/working/grape.h5')

# # Tomato Disease Model

In [None]:
tomato_dataset , tomato_classes, tomato_aug = preprocess(tomato, 1200)

In [None]:
print(tomato_classes)

In [None]:
tomato_dataset = create_dataset(tomato_dataset, tomato_aug)

In [None]:
len(tomato_dataset)

In [None]:
x_train, y_train, x_test, y_test = get_all_train_test(tomato_dataset , tomato_classes)

In [None]:
tomato_model = get_model(tomato_classes)
tomato_model.summary()

In [None]:
tomato_model.compile(optimizer="adam",loss="categorical_crossentropy",metrics=["accuracy"])
tomato_history = tomato_model.fit(x_train,y_train,batch_size=32,epochs=15,validation_data=(x_test,y_test))

In [None]:
loss,accuracy = tomato_model.evaluate(x_test,y_test)
print("loss:",loss)
print("Accuracy:",accuracy)

In [None]:
plt.subplot(2, 2, 1)
plt.plot(tomato_history.history['accuracy'], label='Training Accuracy')
plt.plot(tomato_history.history['val_accuracy'], label='Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.title('Tomato')
plt.legend()
plt.show()

In [None]:
y_pred1 = tomato_model.predict(x_test)
y_pred=np.argmax(y_pred1,axis=1)
y_test =  np.argmax(y_test, axis = 1)
print(classification_report(y_test,y_pred))

In [None]:
# tomato_model.save('/kaggle/working/tomate.h5')

# # Potato Disease Model

In [None]:
potato_dataset, potato_classes, potato_aug = preprocess(potato, 1000)

In [None]:
print(potato_classes)

In [None]:
potato_dataset = create_dataset(potato_dataset, potato_aug)

In [None]:
len(potato_dataset)

In [None]:
x_train, y_train, x_test, y_test = get_all_train_test(potato_dataset , potato_classes)

In [None]:
potato_model = get_model(potato_classes)
potato_model.summary()

In [None]:
potato_model.compile(optimizer="adam",loss="categorical_crossentropy",metrics=["accuracy"])
potato_history = potato_model.fit(x_train,y_train,batch_size=32,epochs=15,validation_data=(x_test,y_test))

In [None]:
plt.subplot(2, 2, 1)
plt.plot(potato_history.history['accuracy'], label='Training Accuracy')
plt.plot(potato_history.history['val_accuracy'], label='Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.title('Potato')
plt.legend()
plt.show()

In [None]:
loss,accuracy = potato_model.evaluate(x_test,y_test)
print("loss:",loss)
print("Accuracy:",accuracy)

In [None]:
y_pred1 = potato_model.predict(x_test)
y_pred=np.argmax(y_pred1,axis=1)
y_test =  np.argmax(y_test, axis = 1)
print(classification_report(y_test,y_pred))