### Importing necessary libraries

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import kagglehub
from matplotlib.image import imread
import cv2
import random
import os
from PIL import Image
import tensorflow as tf
from tensorflow. keras.utils import img_to_array, array_to_img
from keras.optimizers import Adam
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D
from keras.layers import Flatten, Dense
from sklearn. model_selection import train_test_split
from keras.utils import to_categorical


### Defining the path of dataset directory

In [None]:
dataset_path = kagglehub.dataset_download("vipoooool/new-plant-diseases-dataset")

path = os.path.join(dataset_path, "New Plant Diseases Dataset(Augmented)/New Plant Diseases Dataset(Augmented)/train")

### Visualizing the images and Resize images

In [None]:
# Plotting 12 images to check dataset

plt.figure(figsize = (12, 12))

folders = os.listdir(path)
selected_images = []

# Select two images from each of the first 6 directories
for folder in folders[:6]:
    folder_path = os.path.join(path, folder)
    if os.path.isdir(folder_path):
        images = os.listdir(folder_path)
        selected_images.extend([os.path.join(folder_path, img) for img in random.sample(images, min(2, len(images)))])

# Plot the selected images
for i, img_path in enumerate(selected_images, 1):
    plt.subplot(4, 3, i)
    plt.tight_layout()
    rand_img = Image.open(img_path)
    plt.imshow(rand_img)
    plt.xlabel(rand_img.size[0], fontsize=10)  # width of image
    plt.ylabel(rand_img.size[1], fontsize=10)  # height of image
    folder_name = os.path.basename(os.path.dirname(img_path))
    plt.title(folder_name, fontsize=8)  # Add folder name as label

### Convert the images into a Numpy array and normalize them

In [None]:
def convert_image_to_array(image_dir):
    try:
        image = cv2.imread(image_dir)
        if image is not None :
            image = Image.open(image_dir).convert('RGB')
            return img_to_array(image)
        else :
            return np.array([])
    except Exception as e:
        print(f"Error : {e}")

In [None]:
image_list, label_list = [], []
labels = ['Tomato___Septoria_leaf_spot','Tomato___Tomato_Yellow_Leaf_Curl_Virus', 'Tomato___healthy']
binary_labels = [i for i in range(len(labels))]

# Reading and converting image to numpy array

for i, label in enumerate(labels):
    plant_image_list = os.listdir(f"{path}/{label}")
    for file in plant_image_list:
        image_path = f"{path}/{label}/{file}"
        image_list.append(convert_image_to_array(image_path))
        label_list.append(binary_labels[i])

### Visualize the class count and Check for class imbalance

In [None]:
# Visualize the number of classes count

label_counts = pd.DataFrame(label_list).value_counts()
label_counts.head()

It is a balanced dataset as we can see, let's observe the shape of the images now.

In [None]:
image_list[0].shape

### Splitting the dataset into train, validate and test sets

In [None]:
x_train, x_test, y_train, y_test = train_test_split(image_list, label_list, test_size=0.2, random_state = 10) 

In [None]:
# Now we will normalize the dataset of our images. As pixel values ranges from 0 to 255 so we will divide each image pixel with 255 to normalize the dataset.

x_train = np.array(x_train, dtype=np.float16) / 225.0
x_test = np.array(x_test, dtype=np.float16) / 225.0
x_train = x_train.reshape(-1, 256, 256, 3)
x_test = x_test.reshape(-1, 256, 256, 3)

### Performing one-hot encoding on target variable

In [None]:
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)

### Creating the model architecture, compile the model and then fit it using the training data

In [None]:
model = Sequential()
model.add(Conv2D(32, (3, 3), padding = "same",input_shape = (256, 256, 3), activation = "relu"))
model.add(MaxPooling2D(pool_size = (3, 3)))
model.add(Conv2D(16, (3, 3), padding = "same", activation = "relu"))
model.add(MaxPooling2D(pool_size = (2, 2)))
model.add(Flatten())
model.add(Dense(8, activation = "relu"))
model.add(Dense(3, activation = "softmax"))
model.summary()

In [None]:
model.compile(loss = 'categorical_crossentropy', optimizer = Adam(0.0001), metrics = ['accuracy'])

In [None]:
# Splitting the training data set into training and validation data sets

x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size = 0.2, random_state = 10)

In [None]:
# Training the model

epochs = 50
batch_size = 128
history = model.fit(x_train, y_train, batch_size = batch_size, epochs = epochs, validation_data = (x_val, y_val))

In [None]:
model.save("../models/plant_disease_model.h5")

### Plot the accuracy and loss against each epoch

In [None]:
# Plot the training history

plt.figure(figsize = (12, 5))
plt.plot(history.history['accuracy'], color = 'r')
plt.plot(history.history['val_accuracy'], color = 'b')
plt.title('Model Accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epochs')
plt.legend(['train', 'val'])

plt.show()

In [None]:
print("Calculating model accuracy")

scores = model.evaluate(x_test, y_test)
print(f"Test Accuracy: {scores[1] * 100}")

### Make predictions on testing data

In [None]:
y_pred = model.predict(x_test)

### Visualizing the original and predicted labels for the test images

In [None]:
# Plotting image to compare

img = array_to_img(x_test[11])
img

In [None]:
# Finding max value from predition list and comaparing original value vs predicted

print("Originally : ", labels[np.argmax(y_test[11])])
print("Predicted : ", labels[np.argmax(y_pred[4])])
print(y_pred[2])

In [None]:
matches = np.sum(np.argmax(y_pred, axis=1) == np.argmax(y_test, axis=1))

print(f"Number of matches: {(matches/len(y_test))*100}")