In [1]:
import numpy as np
import pickle
import cv2
from os import listdir
from sklearn.preprocessing import LabelBinarizer, MultiLabelBinarizer
from keras.models import Sequential
from keras.layers import BatchNormalization, Conv2D, MaxPooling2D
from keras.layers import Activation, Flatten, Dropout, Dense
from keras import backend as K
from tensorflow.keras.utils import img_to_array
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from keras.optimizers import Adam
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, classification_report


In [None]:
EPOCHS = 40
INIT_LR = 1e-3
BS = 32
default_image_size = tuple((256, 256))
image_size = 0
directory_root = '/kaggle/input/plantdisease'
width=256
height=256
depth=3

def convertImageToArray(image_dir):
    try:
        image = cv2.imread(image_dir)
        if image is not None :
            image = cv2.resize(image, default_image_size)   
            return img_to_array(image)
        else :
            return np.array([])
    except Exception as e:
        print(f"Error : {e}")
        return None

image_list, label_list = [], []
try:
    print("[INFO] Loading images ...")
    root_dir = listdir(directory_root)
    for directory in root_dir :
        # remove .DS_Store from list
        if directory == ".DS_Store" :
            root_dir.remove(directory)

    for plant_folder in root_dir :
        plant_disease_folder_list = listdir(f"{directory_root}/{plant_folder}")
        
        for disease_folder in plant_disease_folder_list :
            # remove .DS_Store from list
            if disease_folder == ".DS_Store" :
                plant_disease_folder_list.remove(disease_folder)

        for plant_disease_folder in plant_disease_folder_list:
            print(f"[INFO] Processing {plant_disease_folder} ...")
            plant_disease_image_list = listdir(f"{directory_root}/{plant_folder}/{plant_disease_folder}/")
                
            for single_plant_disease_image in plant_disease_image_list :
                if single_plant_disease_image == ".DS_Store" :
                    plant_disease_image_list.remove(single_plant_disease_image)

            for image in plant_disease_image_list[:200]:
                image_directory = f"{directory_root}/{plant_folder}/{plant_disease_folder}/{image}"
                if image_directory.endswith(".jpg") == True or image_directory.endswith(".JPG") == True:
                    image_list.append(convertImageToArray(image_directory))
                    label_list.append(plant_disease_folder)
    print("[INFO] Image loading completed")  
except Exception as e:
    print(f"Error : {e}")

image_size = len(image_list)

In [None]:
label_binarizer = LabelBinarizer()
image_labels = label_binarizer.fit_transform(label_list)
pickle.dump(label_binarizer,open('label_transform.pkl', 'wb'))
n_classes = len(label_binarizer.classes_)

print(label_binarizer.classes_)

np_image_list = np.array(image_list, dtype=np.float16) / 225.0



In [None]:
print("[INFO] Spliting data to train, test")
x_train, x_test, y_train, y_test = train_test_split(np_image_list, image_labels, test_size=0.2, random_state = 42) 

In [None]:
aug = ImageDataGenerator(
    rotation_range=30,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode="nearest"
)


def showImages(gen):
    '''
    This function takes the data generator and shows a sample of the images
    '''
    try:
        # Get classes from label binarizer
        classes = list(label_binarizer.classes_)
        
        # Prepare the generator
        images, labels = next(gen.flow(x_train, y_train, batch_size=BS))
        
        # Calculate number of displayed samples
        length = len(labels)
        sample = min(length, 25)
        
        plt.figure(figsize=(20, 20))
        for i in range(sample):
            plt.subplot(5, 5, i + 1)
            image = images[i]
            plt.imshow(image)
            
            # Find the class index
            index = np.argmax(labels[i])
            class_name = classes[index]
            
            plt.title(class_name, color='blue', fontsize=12)
            plt.axis('off')
        plt.tight_layout()
        plt.show()
    except Exception as e:
        print(f"Error in showImages: {e}")

showImages(aug)

In [None]:
model = Sequential()
inputShape = (height, width, depth)
chanDim = -1
if K.image_data_format() == "channels_first":
    inputShape = (depth, height, width)
    chanDim = 1
model.add(Conv2D(32, (3, 3), padding="same",input_shape=inputShape))
model.add(Activation("relu"))
model.add(BatchNormalization(axis=chanDim))
model.add(MaxPooling2D(pool_size=(3, 3)))
model.add(Dropout(0.25))
model.add(Conv2D(64, (3, 3), padding="same"))
model.add(Activation("relu"))
model.add(BatchNormalization(axis=chanDim))
model.add(Conv2D(64, (3, 3), padding="same"))
model.add(Activation("relu"))
model.add(BatchNormalization(axis=chanDim))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Conv2D(128, (3, 3), padding="same"))
model.add(Activation("relu"))
model.add(BatchNormalization(axis=chanDim))
model.add(Conv2D(128, (3, 3), padding="same"))
model.add(Activation("relu"))
model.add(BatchNormalization(axis=chanDim))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(1024, activation='relu'))
model.add(Activation("relu"))
model.add(BatchNormalization())
model.add(Dropout(0.5))
model.add(Dense(n_classes))
model.add(Activation("softmax"))

model.summary()

In [None]:
opt = Adam(learning_rate=INIT_LR)
# distribution
model.compile(loss="binary_crossentropy", optimizer=opt,metrics=["accuracy"])
# train the network
print("[INFO] training network...")

In [None]:
from tensorflow.keras.callbacks import ReduceLROnPlateau
from tensorflow.keras.callbacks import EarlyStopping
lr_scheduler = ReduceLROnPlateau(monitor="val_loss", factor=0.5, patience=3, min_lr=1e-6)

early_stopping = EarlyStopping(monitor="val_loss", patience=10, restore_best_weights=True)

#This part can be run multiple times to increase training and val accuracy
history = model.fit(
    aug.flow(x_train, y_train, batch_size=BS),
    validation_data=(x_test, y_test),
    steps_per_epoch=len(x_train) // BS,
    epochs=EPOCHS, verbose=1,callbacks=[early_stopping]
    )

In [None]:
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(1, len(acc) + 1)
#Train and validation accuracy
plt.plot(epochs, acc, 'b', label='Training accurarcy')
plt.plot(epochs, val_acc, 'r', label='Validation accurarcy')
plt.title('Training and Validation accurarcy')
plt.legend()

plt.figure()
#Train and validation loss
plt.plot(epochs, loss, 'b', label='Training loss')
plt.plot(epochs, val_loss, 'r', label='Validation loss')
plt.title('Training and Validation loss')
plt.legend()
plt.show()


In [None]:
print("[INFO] Calculating model accuracy")
scores = model.evaluate(x_test, y_test)
print(f"Test Accuracy: {scores[1]*100}")

In [None]:
# save the model to disk
print("[INFO] Saving model...")
pickle.dump(model,open('cnn_model.pkl', 'wb'))

In [None]:
import sqlite3

# Create a connection to SQLite database
conn = sqlite3.connect('plant_disease_data.db')

# Create a table to store image metadata and labels
conn.execute('''
    CREATE TABLE IF NOT EXISTS image_metadata (
        id INTEGER PRIMARY KEY,
        plant_folder TEXT,
        disease_folder TEXT,
        image_path TEXT,
        label TEXT
    )
''')

# Function to insert image metadata
def insertImageData(plant_folder, disease_folder, image_path, label):
    conn.execute('''
        INSERT INTO image_metadata 
        (plant_folder, disease_folder, image_path, label) 
        VALUES (?, ?, ?, ?)
    ''', (plant_folder, disease_folder, image_path, label))
    conn.commit()

# Modify your image loading code to also store metadata
for plant_folder in root_dir:
    plant_disease_folder_list = listdir(f"{directory_root}/{plant_folder}")
    plant_disease_folder_list = [d for d in plant_disease_folder_list if d != ".DS_Store"]
    
    for disease_folder in plant_disease_folder_list:
        plant_disease_image_list = listdir(f"{directory_root}/{plant_folder}/{disease_folder}")
        plant_disease_image_list = [img for img in plant_disease_image_list if img != ".DS_Store"]

        for image in plant_disease_image_list[:200]:
            image_directory = f"{directory_root}/{plant_folder}/{disease_folder}/{image}"
            if image_directory.endswith((".jpg", ".JPG")):
                # Existing image processing
                image_list.append(convertImageToArray(image_directory))
                label_list.append(disease_folder)

                # Store metadata in database
                insertImageData(plant_folder, disease_folder, image_directory, disease_folder)


# Close the connection when done

In [None]:
import pandas as pd
def viewPandasDatabase():
    # Read the entire table into a pandas DataFrame
    df = pd.read_sql_query("SELECT * FROM image_metadata", conn)
    print("Database Contents:")
    print(df)
    
    # Additional useful information
    print("\nDatabase Summary:")
    print(f"Total Records: {len(df)}")
    print(f"Unique Plant Folders: {df['plant_folder'].nunique()}")
    print(f"Unique Disease Folders: {df['disease_folder'].nunique()}")

viewPandasDatabase()
conn.close()