In [None]:
#importing the libraries
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential
from tensorflow.python.keras import regularizers
from keras import optimizers, metrics, models
from tensorflow.keras.optimizers import Adam

from keras.layers import Dense, Conv2D, Flatten, Activation, MaxPooling2D, Dropout,BatchNormalization

In [None]:
#importing the dataset 
data_dir="./leaf_diseases"


In [None]:
print(os.listdir(data_dir))

In [None]:
#set the batch_size and image_size
BATCH_SIZE=32
IMAGE_SIZE=256


### Splitting the dataset into training(80%) and testing(20%)

In [None]:

train_ds = tf.keras.preprocessing.image_dataset_from_directory(
    data_dir,
    validation_split=0.2,
    subset="training",
    seed=123,
    image_size=(IMAGE_SIZE, IMAGE_SIZE),
    batch_size=BATCH_SIZE
)


In [None]:
test_ds = tf.keras.preprocessing.image_dataset_from_directory(
    data_dir,
    validation_split=0.2,
    subset="validation",
    seed=123,
    image_size=(IMAGE_SIZE,IMAGE_SIZE),
    batch_size=BATCH_SIZE
)


In [None]:
len(train_ds)

In [None]:
len(test_ds)

In [None]:
class_names = train_ds.class_names
print(class_names)

In [None]:
#plotting the samples
plt.figure(figsize=(10, 10))
for image_batch, labels_batch in train_ds.take(1):
    for i in range(12):
        ax = plt.subplot(3, 4, i + 1)
        plt.imshow(image_batch[i].numpy().astype("uint8"))
        plt.title(class_names[labels_batch[i]])
        plt.axis("off")

## CNN algorithm is used by doing some variations to improve accuracy

### Initial model 

In [None]:
# 1-initial model
num_classes = 8
model = Sequential([
  layers.Dense(10, activation='relu'),
  layers.Flatten(),
  layers.Dense(num_classes)
])
     

In [None]:
model.compile(optimizer='adam',
              loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])


In [None]:
model.summary()

In [None]:
# 1-initial model training
epochs=10
history = model.fit(
  train_ds,
  validation_data=test_ds,
  epochs=epochs
)

In [None]:
test_loss, test_acc=model.evaluate(test_ds,verbose=2)
print('\nTest accuraccy:',test_acc)

In [None]:
plt.style.use('seaborn')

plt.figure(figsize=(20,10))
plt.subplot(1, 2, 1)
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.suptitle('Optimizer : Adam', fontsize=14)
plt.xlabel('Epoch', fontsize=12)
plt.ylabel('Loss', fontsize=12)
plt.legend(loc='upper right')

plt.subplot(1, 2, 2)
plt.plot(history.history['accuracy'], label='Training Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.xlabel('Epoch', fontsize=12)
plt.ylabel('Accuracy', fontsize=12)
plt.legend(loc='lower right')
plt.show()

## Second model in cnn i.e. to prevent overfitting

In [None]:
#second model
# To prevent overfitting
num_classes = 8
model = Sequential([
  layers.experimental.preprocessing.Rescaling(1./255, input_shape=(IMAGE_SIZE, IMAGE_SIZE, 3)),
  layers.Dense(10, activation='relu',kernel_regularizer=regularizers.l2(0.001)),
  layers.Dropout(0.5),
  layers.Flatten(),
  layers.Dense(num_classes,kernel_regularizer=regularizers.l2(0.001)),
  layers.Dropout(0.5),
])

In [None]:
# Compiling after overfitting prevention
model.compile(optimizer='adam',
              loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

In [None]:
model.summary()

In [None]:
epochs=10
history = model.fit(
  train_ds,
  validation_data=test_ds,
  epochs=epochs
)

In [None]:
test_loss, test_acc=model.evaluate(test_ds,verbose=2)
print('\nTest accuraccy:',test_acc)

In [None]:
plt.style.use('seaborn')

plt.figure(figsize=(20,10))
plt.subplot(1, 2, 1)
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.suptitle('Optimizer : Adam', fontsize=14)
plt.xlabel('Epoch', fontsize=12)
plt.ylabel('Loss', fontsize=12)
plt.legend(loc='upper right')

plt.subplot(1, 2, 2)
plt.plot(history.history['accuracy'], label='Training Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.xlabel('Epoch', fontsize=12)
plt.ylabel('Accuracy', fontsize=12)
plt.legend(loc='lower right')
plt.show()

## model 3 on cnn  added 4 layers and epochs to 50

In [None]:
#adding more layers and also batch normalization so that neural networks get faster and add hidden layers for better performance


model=models.Sequential()

#adding first cnn layer
model.add(Conv2D(32,(3,3),padding='same',kernel_regularizer=regularizers.l2(0.001)))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Dropout(0.25))

#adding second cnn layer
model.add(Conv2D(64,(3,3),padding='same',kernel_regularizer=regularizers.l2(0.001)))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.25))

#adding third cnn layer
model.add(Conv2D(128,(3,3),padding='same',kernel_regularizer=regularizers.l2(0.001)))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.30))

#adding fourth cnn layer
model.add(Conv2D(256,(3,3),padding='same',kernel_regularizer=regularizers.l2(0.001)))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.40))


model.add(Flatten())

model.add(Dense(8,activation='softmax'))



In [None]:
model.compile(optimizer='adam',
              loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])
     


In [None]:
#increasing epochs i.e. hyperparameters tuning
epochs=50
history = model.fit(
  train_ds,
  validation_data=test_ds, 
  epochs=epochs
)

In [None]:
test_loss, test_acc=model.evaluate(test_ds,verbose=2)
print('\nTest accuraccy:',test_acc)

In [None]:
plt.style.use('seaborn')

plt.figure(figsize=(20,10))
plt.subplot(1, 2, 1)
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.suptitle('Optimizer : Adam', fontsize=14)
plt.xlabel('Epoch', fontsize=12)
plt.ylabel('Loss', fontsize=12)
plt.legend(loc='upper right')

plt.subplot(1, 2, 2)
plt.plot(history.history['accuracy'], label='Training Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.xlabel('Epoch', fontsize=12)
plt.ylabel('Accuracy', fontsize=12)
plt.legend(loc='lower right')
plt.show()

## fourth model using cnn added 2 more layers and uses batch normalization

In [None]:
#adding more Layers and also batch normalization 

model=models.Sequential()

#adding first cnn Layer 
model.add(Conv2D(32,(3,3),padding='same',kernel_regularizer=regularizers.12(0  model.add(BatchNormalization())
model.add(Activation('relu')) model.add(Dropout(0.25))

#adding second cnn Layer 
model.add(Conv2D(64,(3,3),padding='same',kernel_regularizer=regularizers.12(0  model.add(BatchNormalization())
model.add(Activation('relu')) model.add(MaxPooling2D(pool_size=(2,2))) model.add(Dropout(0.25))

#adding third cnn Layer 
model.add(Conv2D(128,(3,3),padding='same',kernel_regularizer=regularizers.12(1  model.add(BatchNormalization())
model.add(Activation('relu')) model.add(MaxPooling2D(pool_size=(2,2))) model.add(Dropout(0.30))

#adding fourth cnn Layer 
model.add(Conv2D(256,(3,3),padding='same',kernel_regularizer=regularizers.12(1  model.add(BatchNormalization())
model.add(Activation('relu')) model.add(MaxPooling2D(pool_size=(2,2))) model.add(Dropout(0.40))

#adding fifth cnn Layer
model.add(Conv2D(512,(3,3) , padding='same', kernel_regularizer=regularizers.. model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2,2))) model.add(Dropout(0.50))

#adding sixth cnn Layer
model.add(Conv2D(1024,(3,3) , padding='same", kernel_regularizer=regularizers model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2,2))) model.add(Dropout(0.50))

model.add(Flatten()) model.add(Dense(8,activation='softmax'))


In [None]:
model.compile(optimizer='adam',
              loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

In [None]:
#increasing epochs i.e. hyperparameters tuning
epochs=50
history = model.fit(
  train_ds,
  validation_data=test_ds, 
  epochs=epochs
)

In [None]:
test_loss, test_acc=model.evaluate(test_ds,verbose=2)
print('\nTest accuraccy:',test_acc)

In [None]:
plt.style.use('seaborn')

plt.figure(figsize=(20,10))
plt.subplot(1, 2, 1)
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.suptitle('Optimizer : Adam', fontsize=14)
plt.xlabel('Epoch', fontsize=12)
plt.ylabel('Loss', fontsize=12)
plt.legend(loc='upper right')

plt.subplot(1, 2, 2)
plt.plot(history.history['accuracy'], label='Training Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.xlabel('Epoch', fontsize=12)
plt.ylabel('Accuracy', fontsize=12)
plt.legend(loc='lower right')
plt.show()

### plotting graphs for cnn models which has variations in each new model 

In [None]:
#Run prediction on a sample image
import numpy as np
for images_batch, labels_batch in test_ds.take(1):
    
    first_image = images_batch[0].numpy().astype('uint8')
    first_label = labels_batch[0].numpy()
    
    print("first image to predict")
    plt.imshow(first_image)
    print("actual label:",class_names[first_label])
    
    batch_prediction = model.predict(images_batch)
    print("predicted label:",class_names[np.argmax(batch_prediction[0])])

## Predicting the image using the above model

In [None]:
# A function for prediction
def predict(model, img):
    img_array = tf.keras.preprocessing.image.img_to_array(images[i].numpy())
    img_array = tf.expand_dims(img_array, 0)

    predictions = model.predict(img_array)

    predicted_class = class_names[np.argmax(predictions[0])]
    confidence = round(100 * (np.max(predictions[0])), 2)
    return predicted_class, confidence

In [None]:
#Now run function on few sample images
plt.figure(figsize=(15, 15))
for images, labels in test_ds.take(1):
    for i in range(9):
        ax = plt.subplot(3, 3, i + 1)
        plt.imshow(images[i].numpy().astype("uint8"))
        
        predicted_class, confidence = predict(model, images[i].numpy())
        actual_class = class_names[labels[i]] 
        
        plt.title(f"Actual: {actual_class},\n Predicted: {predicted_class}.\n Confidence: {confidence}%")
        
        plt.axis("off")

In [None]:
model.save("./leaf_diseases.h5")

  ### Trying to classify the images using other ML models such as Random forest, KNN, Decision tree, Naive Bayes

In [None]:
import os
import numpy as np
import cv2
from sklearn.model_selection import KFold
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB

# Set the path to the dataset directory
dataset_dir = "./leaf_diseases"

# Define the image dimensions
img_width = 256
img_height = 256

# Initialize the data and label arrays
X = []
y = []

# Loop over each subdirectory in the dataset directory
for class_dir in os.listdir(dataset_dir):
    class_path = os.path.join(dataset_dir, class_dir)
    
    # Loop over each image in the class directory
    for image_name in os.listdir(class_path):
        image_path = os.path.join(class_path, image_name)
        
        # Load the image and resize it to the desired dimensions
        image = cv2.imread(image_path)
        image = cv2.resize(image, (img_width, img_height))
        
        # Append the image and class label to the data and label arrays
        X.append(image)
        y.append(class_dir)

# Convert the data and label arrays to numpy arrays
X = np.array(X)
y = np.array(y)

# Initialize the classifiers
rf_clf = RandomForestClassifier()
knn_clf = KNeighborsClassifier()
dt_clf = DecisionTreeClassifier()
nb_clf = GaussianNB()

# Define the number of folds for cross-validation
n_splits = 5

# Initialize the KFold object
kfold = KFold(n_splits=n_splits, shuffle=True)

# Loop over each fold
for fold_idx, (train_index, test_index) in enumerate(kfold.split(X)):
    print("Fold: ", fold_idx+1)
    
    # Split the data and label arrays into training and testing subsets
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    
    # Reshape the data arrays to 1D arrays
    X_train = X_train.reshape(X_train.shape[0], -1)
    X_test = X_test.reshape(X_test.shape[0], -1)
    
    # Train and test the Random Forest classifier
    rf_clf.fit(X_train, y_train)
    rf_acc = rf_clf.score(X_test, y_test)
    print("Random Forest accuracy: ", rf_acc)
    
    # Train and test the KNN classifier
    knn_clf.fit(X_train, y_train)
    knn_acc = knn_clf.score(X_test, y_test)
    print("KNN accuracy: ", knn_acc)
    
    # Train and test the Decision Tree classifier
    dt_clf.fit(X_train, y_train)
    dt_acc = dt_clf.score(X_test, y_test)
    print("Decision Tree accuracy: ", dt_acc)
    
    # Train and test the Naive Bayes classifier
    nb_clf.fit(X_train, y_train)
    nb_acc = nb_clf.score(X_test, y_test)
    print("Naive Bayes accuracy: ", nb_acc)
    
    print("\n")


In [None]:
#plotting the graph for all  other ML models
import matplotlib.pyplot as plt

# Define the classifiers and their corresponding colors
classifiers = ["Random Forest", "KNN", "Decision Tree", "Naive Bayes"]
colors = ["red", "green", "blue", "orange"]

# Initialize the accuracy array for each classifier
rf_accs = []
knn_accs = []
dt_accs = []
nb_accs = []

# Loop over each fold
for fold_idx, (train_index, test_index) in enumerate(kfold.split(X)):
    
    # Split the data and label arrays into training and testing subsets
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    
    # Reshape the data arrays to 1D arrays
    X_train = X_train.reshape(X_train.shape[0], -1)
    X_test = X_test.reshape(X_test.shape[0], -1)
    
    # Train and test the classifiers
    rf_clf.fit(X_train, y_train)
    rf_acc = rf_clf.score(X_test, y_test)
    rf_accs.append(rf_acc)
    
    knn_clf.fit(X_train, y_train)
    knn_acc = knn_clf.score(X_test, y_test)
    knn_accs.append(knn_acc)
    
    dt_clf.fit(X_train, y_train)
    dt_acc = dt_clf.score(X_test, y_test)
    dt_accs.append(dt_acc)
    
    nb_clf.fit(X_train, y_train)
    nb_acc = nb_clf.score(X_test, y_test)
    nb_accs.append(nb_acc)

# Initialize the figure and axis objects
fig, ax = plt.subplots()

# Plot the bar chart for each classifier
ax.bar([0, 1, 2, 3], [np.mean(rf_accs), np.mean(knn_accs), np.mean(dt_accs), np.mean(nb_accs)], color=colors)

# Add the labels to the x-axis and y-axis
ax.set_xticks([0, 1, 2, 3])
ax.set_xticklabels(classifiers)
ax.set_ylabel("Accuracy")

# Show the plot
plt.show()


## Final graph for all models accuracy

In [None]:
import matplotlib.pyplot as plt
 
# Data
accuracies = [0.9449, 0.87, 0.675, 0.69, 0.5425]
classifiers = ['CNN', 'Random Forest', 'KNN', 'Decision Tree', 'Naive Bayes']
 
colors = ['blue', 'green', 'purple', 'orange', 'red']
 
# Creating axes instance
fig, ax = plt.subplots(figsize=(8, 6))
 
# Creating bar plot
ax.bar(classifiers, accuracies, color=colors)
 
# Set title and labels for axes
ax.set_title('Accuracy of different classifiers')
ax.set_xlabel('Classifiers')
ax.set_ylabel('Accuracy')
 
# Show plot
plt.show()
