# PRCP- 1001- RiceLeaf disease detection

Problem Statement

Task 1:-Prepare a complete data analysis report on the given data.

Task 2:-Create a model which can classify the three major attacking diseases of rice plants like leaf blast, bacterial blight and brown spot.

Task3:- Analyze various techniques like Data Augmentation, etc and create a report on that.


Dataset Link:
This dataset contains 120 jpg images of disease-infected rice leaves. The images are grouped into 3 classes based on the type of disease. There are 40 images in each class.
Classes
●	Leaf smut
●	Brown spot
●	Bacterial leaf blight
Domain: 

Link : https://d3ilbtxij3aepc.cloudfront.net/projects/CDS-Capstone-Projects/PRCPceLeaf.zip










Model Comparison Report

Create a report stating the performance of multiple models on this data and suggest the best model for production.

Report on Challenges faced

Create a report which should include challenges you faced on data and what technique used with proper reason.


Note:-All above tasks have to be done on a single jupyter notebook and share the samefor final submission of the project.





## PYTHON IMPLEMENTATION

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import matplotlib.pyplot as plt
import os
import zipfile
from sklearn.metrics import classification_report, confusion_matrix
import warnings
warnings.filterwarnings('ignore')
%matplotlib inline


In [None]:
!pip install tensorflow

In [None]:
# Extract dataset
!wget https://d3ilbtxij3aepc.cloudfront.net/projects/CDS-Capstone-Projects/PRCP-1001-RiceLeaf.zip
with zipfile.ZipFile("PRCP-1001-RiceLeaf.zip", 'r') as zip_ref:
    zip_ref.extractall("RiceLeafDataset")


In [None]:
# Dataset Paths
base_dir = "RiceLeafDataset"

In [None]:
# Data Analysis
categories = ["Leaf smut", "Brown spot", "Bacterial leaf blight"]
category_paths = [os.path.join(base_dir, cat) for cat in categories]

for category, path in zip(categories, category_paths):
    print(f"Category: {category}, Number of Images: {len(os.listdir(path))}")

In [None]:
# Visualize sample images
def plot_samples(category_paths, categories):
    plt.figure(figsize=(12, 8))
    for i, path in enumerate(category_paths):
        img_path = os.path.join(path, os.listdir(path)[0])
        img = plt.imread(img_path)
        plt.subplot(1, 3, i + 1)
        plt.imshow(img)
        plt.title(categories[i])
        plt.axis("off")
    plt.show()

plot_samples(category_paths, categories)


In [None]:
# Data Preparation
img_size = (128, 128)
batch_size = 16

In [None]:
# Image Data Generators
data_gen = ImageDataGenerator(rescale=1.0/255,
                               validation_split=0.2,
                               horizontal_flip=True,
                               zoom_range=0.2)

train_gen = data_gen.flow_from_directory(base_dir,
                                         target_size=img_size,
                                         batch_size=batch_size,
                                         class_mode='categorical',
                                         subset='training')

val_gen = data_gen.flow_from_directory(base_dir,
                                       target_size=img_size,
                                       batch_size=batch_size,
                                       class_mode='categorical',
                                       subset='validation')


In [None]:
# Model Building
def build_model():
    model = models.Sequential([
        layers.Conv2D(32, (3, 3), activation='relu', input_shape=(128, 128, 3)),
        layers.MaxPooling2D((2, 2)),

        layers.Conv2D(64, (3, 3), activation='relu'),
        layers.MaxPooling2D((2, 2)),

        layers.Conv2D(128, (3, 3), activation='relu'),
        layers.MaxPooling2D((2, 2)),

        layers.Flatten(),
        layers.Dense(128, activation='relu'),
        layers.Dense(3, activation='softmax')
    ])

    model.compile(optimizer='adam',
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])

    return model

model = build_model()
model.summary()


In [None]:
# Training the Model
epochs = 20
history = model.fit(train_gen,
                    validation_data=val_gen,
                    epochs=epochs)


In [None]:
# Plot Training History
def plot_training_history(history):
    plt.figure(figsize=(12, 5))
    plt.subplot(1, 2, 1)
    plt.plot(history.history['accuracy'], label='Train Accuracy')
    plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
    plt.legend()
    plt.title('Accuracy')

    plt.subplot(1, 2, 2)
    plt.plot(history.history['loss'], label='Train Loss')
    plt.plot(history.history['val_loss'], label='Validation Loss')
    plt.legend()
    plt.title('Loss')
    plt.show()

plot_training_history(history)


In [None]:
# Evaluation
val_preds = model.predict(val_gen)
y_pred = np.argmax(val_preds, axis=1)
y_true = val_gen.classes

print("Classification Report:\n", classification_report(y_true, y_pred, target_names=categories))
print("Confusion Matrix:\n", confusion_matrix(y_true, y_pred))


In [None]:
# Save the Model
model.save("rice_leaf_disease_model.h5")



## Challenges and Improvements
## - Small dataset size: Used data augmentation.
## - Class imbalance: Ensure balanced class distribution.
## - Limited computing power: Used a simple CNN architecture instead of a heavy pretrained model.



In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.utils import img_to_array
import matplotlib.pyplot as plt
import random
import cv2
import os
from PIL import Image
import warnings
warnings.filterwarnings('ignore')
%matplotlib inline


In [None]:
import os

input_folder = r"C:\Users\Admin\Documents\CDS PROJECT\RICE_LEAF"

# Check if the folder exists
if os.path.exists(input_folder):
    print("Path exists:", input_folder)
    print("Contents:", os.listdir(input_folder))
else:
    print("Path does not exist. Check the folder path.")


In [None]:
input_folder = r"C:\Users\Admin\Documents\CDS PROJECT\RICE_LEAF"


In [None]:
import splitfolders

output_folder = "/content/drive/MyDrive/Documents/CDS PROJECT/RICE_LEAF_SPLIT/"  # Or a valid path for local

# Perform the split
splitfolders.ratio(input_folder, output=output_folder, seed=1337, ratio=(0.8, 0.1, 0.1))

print("Dataset split completed.")


In [None]:
train_dir = r"C:\Users\Admin\Documents\CDS PROJECT\RICE_LEAF\train"
validation_dir = r"C:\Users\Admin\Documents\CDS PROJECT\RICE_LEAF\validation"
test_dir = r"C:\Users\Admin\Documents/CDS PROJECT\RICE_LEAF\test"

In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Create data generators
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=40,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True
)

val_datagen = ImageDataGenerator(rescale=1./255)

# Define paths to training and validation directories
train_dir = r"C:\Users\Admin\Documents\CDS PROJECT\RICE_LEAF\train"  # Replace with the actual path
validation_dir = r"C:\Users\Admin\Documents\CDS PROJECT\RICE_LEAF\validation"  # Replace with the actual path

# Create train and validation generators
batch_size = 16
train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(180, 180),
    batch_size=batch_size,
    color_mode='rgb',
    class_mode='categorical'
)

val_generator = val_datagen.flow_from_directory(
    validation_dir,
    target_size=(180, 180),
    batch_size=batch_size,
    color_mode='rgb',
    class_mode='categorical'
)

# Calculate the number of steps per epoch
train_steps_per_epoch = train_generator.samples // batch_size
val_steps_per_epoch = val_generator.samples // batch_size

print(f"Training steps per epoch: {train_steps_per_epoch}")
print(f"Validation steps per epoch: {val_steps_per_epoch}")


In [None]:
# plotting train images with their labels
def plots(ims, figsize=(20,25), rows=4, interp = False, title = None):
    f = plt.figure(figsize=figsize)
    cols = len(ims) // rows if len(ims) % 2 ==0 else len(ims) // rows+1
    for i in range(len(ims)):
        sp = f.add_subplot(rows, cols, i+1)
        sp.axis('off')
        sp.set_title(class_names[title[i].tolist().index(1)],fontsize=16)
        plt.imshow(ims[i])


# Make list of classes
class_names = ['Bacterial leaf blight', 'Brown spot', 'Leaf smut']

In [None]:
import matplotlib.pyplot as plt

imgs, labels = next(train_generator)
plots(imgs, title=labels)

In [None]:
from keras import models, layers

# Create the model
model = models.Sequential()

# Add the layers
model.add(layers.Conv2D(filters=32, kernel_size=(3,3), activation='relu', input_shape=(180,180,3)))
model.add(layers.MaxPool2D(pool_size=(2,2)))
model.add(layers.Conv2D(filters=64, kernel_size=(3,3), activation='relu'))
model.add(layers.MaxPool2D(pool_size=(2,2)))
model.add(layers.Conv2D(filters=128, kernel_size=(3,3), activation='relu'))
model.add(layers.MaxPool2D(pool_size=(2,2)))
model.add(layers.Conv2D(filters=256, kernel_size=(3,3), activation='relu'))
model.add(layers.MaxPool2D(pool_size=(2,2)))

# Add dropout to prevent overfitting
model.add(layers.Dropout(rate=0.5))

# Flatten the output from the convolutional layers
model.add(layers.Flatten())

# Add the output layer with softmax activation (for multi-class classification)
model.add(layers.Dense(3, activation='softmax'))

# Print model summary to check the architecture
model.summary()


In [None]:
pip install pydot

In [None]:
pip install graphviz

In [None]:
pip install pydotplus

In [None]:
from tensorflow.keras import optimizers
model.compile(
    loss='categorical_crossentropy',
    optimizer='adam',
    metrics=['accuracy']
)


In [None]:
history = model.fit(
    train_generator,
    epochs=50,  # Number of epochs to train the model
    validation_data=val_generator,  # Validation data
)


In [None]:
model.save("model.h5")

In [None]:
# Step:9 Plotting the training accuracy and validation accuracy
# Plotting the traning loss and validation loss
import matplotlib.pyplot as plt
accuracy = history.history["accuracy"]
val_accuracy = history.history["val_accuracy"]
loss = history.history["loss"]
val_loss = history.history["val_loss"]
epochs = range(1, len(accuracy) + 1)
plt.plot(epochs, accuracy, "r", label="Training accuracy")
plt.plot(epochs, val_accuracy, "b", label="Validation accuracy")
plt.title("Training and validation accuracy")
plt.xlabel("epochs")
plt.ylabel("accuracy")
plt.legend()
plt.figure()
plt.plot(epochs, loss, "r", label="Training loss")
plt.plot(epochs, val_loss, "b", label="Validation loss")
plt.title("Training and validation loss")
plt.xlabel("epochs")
plt.ylabel("loss")
plt.legend()
plt.show()

In [None]:
model = tf.keras.models.load_model("model.h5")
model.summary()

In [None]:
test_dir = r'C:\Users\Admin\Documents\CDS PROJECT\RICE_LEAF\test'

test_datagen = ImageDataGenerator(rescale=1.0 / 255)

test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=(180, 180),
    batch_size=16,
    color_mode='rgb',
    class_mode='categorical'
)


In [None]:
model.evaluate(test_generator)

In [None]:
# Visualise the prediction of the model
imgs, labels = next(test_generator)
fig =plt.figure(figsize=(15,15))
columns = 3
rows = 3
for i in range(columns*rows):
    fig.add_subplot(rows, columns, i+1)
    img_t = np.expand_dims(imgs[i],axis=0)
    prediction = model.predict(img_t)
    idx = prediction[0].tolist().index(max(prediction[0]))
    plt.text(20,58, class_names[idx],color='red',fontsize=10,bbox=dict(facecolor='white',alpha=0.8))
    plt.imshow(imgs[i])