In [29]:
import kagglehub

path = kagglehub.dataset_download("jawadali1045/20k-multi-class-crop-disease-images")

print("Path to dataset files:", path)

Path to dataset files: /kaggle/input/20k-multi-class-crop-disease-images


In [42]:
import kagglehub
import os
from PIL import Image
import random
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Rescaling, RandomFlip, RandomRotation, RandomZoom

# Load the dataset
path = kagglehub.dataset_download("jawadali1045/20k-multi-class-crop-disease-images")
print("Path to dataset files:", path)

dataset_path = path
train_path = os.path.join(dataset_path, 'Train')
validation_path = os.path.join(dataset_path, 'Validation')


# Explore the dataset (optional, for verification)
print("\nContents of Train directory:")
train_classes = os.listdir(train_path)
print(train_classes)

print("\nContents of Validation directory:")
validation_classes = os.listdir(validation_path)
print(validation_classes)

# Count images in each class within Train and Validation (optional, for verification)
train_class_counts = {}
print("\nTrain Class distribution:")
for class_name in train_classes:
    class_dir = os.path.join(train_path, class_name)
    if os.path.isdir(class_dir):
        image_files = [f for f in os.listdir(class_dir) if f.lower().endswith(('.jpg', '.png', '.jpeg'))]
        train_class_counts[class_name] = len(image_files)
        print(f"{class_name}: {len(image_files)} images")

validation_class_counts = {}
print("\nValidation Class distribution:")
for class_name in validation_classes:
    class_dir = os.path.join(validation_path, class_name)
    if os.path.isdir(class_dir):
        image_files = [f for f in os.listdir(class_dir) if f.lower().endswith(('.jpg', '.png', '.jpeg'))]
        validation_class_counts[class_name] = len(image_files)
        print(f"{class_name}: {len(image_files)} images")


# Define image dimensions and batch size
img_height = 128
img_width = 128
batch_size = 32

# Get the class names by inspecting the directories
class_names = sorted([name for name in os.listdir(train_path) if os.path.isdir(os.path.join(train_path, name))])
print("Class names:", class_names)
num_classes = len(class_names)
print("Number of classes:", num_classes)

# Function to safely parse image and label from file path
def safe_parse_image(file_path_tensor, class_names_tensor):
    file_path = file_path_tensor.numpy().decode('utf-8')
    class_names = [name.decode('utf-8') for name in class_names_tensor.numpy()]

    try:
        # Extract label from the parent directory name
        parts = file_path.split(os.sep)
        class_name = parts[-2]
        try:
            label = class_names.index(class_name)
        except ValueError:
             print(f"Warning: Class name '{class_name}' not found in class_names.")
             return tf.zeros(shape=(img_height, img_width, 3), dtype=tf.float32), tf.constant(-1, dtype=tf.int64)


        # Read and decode image
        img = tf.io.read_file(file_path)
        img = tf.image.decode_image(img, channels=3, expand_animations=False)

        # Check if the image is successfully decoded and has expected channels
        if img.shape.rank == 3 and img.shape[-1] == 3:
             img = tf.image.convert_image_dtype(img, tf.float32)
             img = tf.image.resize(img, [img_height, img_width])
             return img, tf.constant(label, dtype=tf.int64)
        else:
             # Return placeholder for invalid images with correct shape
             print(f"Warning: Invalid image shape for {file_path}")
             return tf.zeros(shape=(img_height, img_width, 3), dtype=tf.float32), tf.constant(-1, dtype=tf.int64)

    except Exception as e:
        # Handle decoding errors by returning placeholder with correct shape
        tf.print(f"Error decoding image {file_path}: {e}")
        return tf.zeros(shape=(img_height, img_width, 3), dtype=tf.float32), tf.constant(-1, dtype=tf.int64)


# Wrapper function to use safe_parse_image with tf.py_function
@tf.function
def tf_safe_parse_image(file_path):
    # Pass class_names as a tensor to tf.py_function
    img, label = tf.py_function(
        func=safe_parse_image,
        inp=[file_path, tf.constant(class_names, dtype=tf.string)],
        Tout=[tf.float32, tf.int64]
    )
    # Explicitly set the shape of the output tensors
    img.set_shape((img_height, img_width, 3))
    label.set_shape(()) # Scalar label
    return img, label

# Function to filter out invalid entries (where label is -1)
def filter_invalid_entries(image, label):
    return tf.not_equal(label, -1)


# Get list of all image file paths in the train directory
train_list_ds = tf.data.Dataset.list_files(os.path.join(train_path, '*/*'))

# Get list of all image file paths in the validation directory
test_list_ds = tf.data.Dataset.list_files(os.path.join(validation_path, '*/*'))


# Determine the size of the dataset by counting files manually
valid_train_files = [os.path.join(root, file) for root, _, files in os.walk(train_path) for file in files if file.lower().endswith(('.jpg', '.png', '.jpeg', '.bmp', '.gif'))]
dataset_size = len(valid_train_files)
print(f"Found {dataset_size} potential image files in the training directory.")


# Split the list of file paths into train and validation sets
train_size = int(0.8 * dataset_size)
val_size = dataset_size - train_size # The rest for validation

# Shuffle and take for splitting
train_list_ds = train_list_ds.shuffle(dataset_size, seed=42).take(train_size)
val_list_ds = train_list_ds.skip(train_size).take(val_size) # Split from the shuffled train list


# Create train and validation datasets by mapping the safe parsing function
train_ds = train_list_ds.map(tf_safe_parse_image, num_parallel_calls=tf.data.AUTOTUNE)
val_ds = val_list_ds.map(tf_safe_parse_image, num_parallel_calls=tf.data.AUTOTUNE)

# Filter out any invalid entries resulting from decoding errors
train_ds = train_ds.filter(filter_invalid_entries)
val_ds = val_ds.filter(filter_invalid_entries)


# Create test dataset from the validation folder and filter invalid entries
test_ds = test_list_ds.map(tf_safe_parse_image, num_parallel_calls=tf.data.AUTOTUNE)
test_ds = test_ds.filter(filter_invalid_entries)


# Batch the datasets
AUTOTUNE = tf.data.AUTOTUNE
train_ds = train_ds.batch(batch_size).cache().prefetch(buffer_size=tf.data.AUTOTUNE)
val_ds = val_ds.batch(batch_size).cache().prefetch(buffer_size=tf.data.AUTOTUNE)
test_ds = test_ds.batch(batch_size).cache().prefetch(buffer_size=tf.data.AUTOTUNE)


print("\nDatasets created and optimized using manual loading with error handling and explicit shape setting.")

# Add data augmentation layers
data_augmentation = Sequential([
    RandomFlip("horizontal_and_vertical"),
    RandomRotation(0.2),
    RandomZoom(0.2),
])

# Define the CNN model architecture
model = Sequential([
    # Data augmentation layer
    data_augmentation,
    # Rescaling layer to normalize pixel values - provide input_shape here
    Rescaling(1./255, input_shape=(img_height, img_width, 3)),

    # Convolutional and Pooling layers
    Conv2D(16, kernel_size=(3, 3), activation='relu', padding='same'),
    MaxPooling2D(pool_size=(2, 2)),

    Conv2D(32, kernel_size=(3, 3), activation='relu', padding='same'),
    MaxPooling2D(pool_size=(2, 2)),

    Conv2D(64, kernel_size=(3, 3), activation='relu', padding='same'),
    MaxPooling2D(pool_size=(2, 2)),

    # Flatten the output for the dense layers
    Flatten(),

    # Dense layers for classification
    Dense(128, activation='relu'),
    Dense(num_classes, activation='softmax') # Output layer with softmax for multi-class classification
])

# Compile the model
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

# Train the model
epochs = 15 # Define the number of training epochs
history = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=epochs
)

# Print model summary
model.summary()

Path to dataset files: /kaggle/input/20k-multi-class-crop-disease-images

Contents of Train directory:
['bacterial_blight in Cotton', 'RedRust sugarcane', 'Healthy Maize', 'Wheat___Yellow_Rust', 'Tungro', 'Wheat mite', 'Anthracnose on Cotton', 'Healthy Wheat', 'Cotton Aphid', 'Common_Rust', 'American Bollworm on Cotton', 'Yellow Rust Sugarcane', 'Flag Smut', 'Wheat scab', 'Army worm', 'cotton whitefly', 'Wheat leaf blight', 'Healthy cotton', 'Wilt', 'bollrot on Cotton', 'Rice Blast', 'Becterial Blight in Rice', 'cotton mealy bug', 'maize ear rot', 'Wheat Stem fly', 'Mosaic sugarcane', 'Sugarcane Healthy', 'red cotton bug', 'maize stem borer', 'Brownspot', 'bollworm on Cotton', 'pink bollworm in cotton', 'Leaf Curl', 'Wheat aphid', 'maize fall armyworm', 'Wheat Brown leaf Rust', 'Leaf smut', 'Wheat black rust', 'thirps on  cotton', 'RedRot sugarcane', 'Gray_Leaf_Spot', 'Wheat powdery mildew']

Contents of Validation directory:
['RedRust sugarcane', 'Healthy Maize', 'Wheat___Yellow_Rust'

  super().__init__(**kwargs)


     12/Unknown [1m19s[0m 1s/step - accuracy: 0.0702 - loss: 3.6916Error decoding image /kaggle/input/20k-multi-class-crop-disease-images/Train/maize ear rot/00000073.jpg: {{function_node __wrapped__DecodeImage_device_/job:localhost/replica:0/task:0/device:CPU:0}} Unknown image file format. One of JPEG, PNG, GIF, BMP required. [Op:DecodeImage] name: 
     43/Unknown [1m53s[0m 1s/step - accuracy: 0.0922 - loss: 3.5216Error decoding image /kaggle/input/20k-multi-class-crop-disease-images/Train/pink bollworm in cotton/Image_39.jpg: {{function_node __wrapped__DecodeImage_device_/job:localhost/replica:0/task:0/device:CPU:0}} Unknown image file format. One of JPEG, PNG, GIF, BMP required. [Op:DecodeImage] name: 
     47/Unknown [1m58s[0m 1s/step - accuracy: 0.0932 - loss: 3.5073Error decoding image /kaggle/input/20k-multi-class-crop-disease-images/Train/Wheat powdery mildew/00001348.jpg: {{function_node __wrapped__DecodeImage_device_/job:localhost/replica:0/task:0/device:CPU:0}} Unknow



[1m386/386[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m408s[0m 1s/step - accuracy: 0.1021 - loss: 3.2272
Epoch 2/15




[1m386/386[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m316s[0m 717ms/step - accuracy: 0.0996 - loss: 3.1169
Epoch 3/15




[1m386/386[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m309s[0m 684ms/step - accuracy: 0.1000 - loss: 3.1154
Epoch 4/15




[1m386/386[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m344s[0m 742ms/step - accuracy: 0.1001 - loss: 3.1146
Epoch 5/15




[1m386/386[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m295s[0m 673ms/step - accuracy: 0.1035 - loss: 3.1138
Epoch 6/15




[1m386/386[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 667ms/step - accuracy: 0.1053 - loss: 3.1133



[1m386/386[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m262s[0m 679ms/step - accuracy: 0.1053 - loss: 3.1133
Epoch 7/15




[1m386/386[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 676ms/step - accuracy: 0.1058 - loss: 3.1128



[1m386/386[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m262s[0m 679ms/step - accuracy: 0.1058 - loss: 3.1128
Epoch 8/15




[1m386/386[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m261s[0m 674ms/step - accuracy: 0.1037 - loss: 3.1124
Epoch 9/15




[1m386/386[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 672ms/step - accuracy: 0.1012 - loss: 3.1121



[1m386/386[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m262s[0m 679ms/step - accuracy: 0.1012 - loss: 3.1121
Epoch 10/15




[1m386/386[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m322s[0m 678ms/step - accuracy: 0.1059 - loss: 3.1114
Epoch 11/15




[1m386/386[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 662ms/step - accuracy: 0.1062 - loss: 3.1109



[1m386/386[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m262s[0m 678ms/step - accuracy: 0.1062 - loss: 3.1109
Epoch 12/15




[1m386/386[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 665ms/step - accuracy: 0.1071 - loss: 3.1103



[1m386/386[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m262s[0m 678ms/step - accuracy: 0.1071 - loss: 3.1103
Epoch 13/15




[1m386/386[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m261s[0m 677ms/step - accuracy: 0.1069 - loss: 3.1098
Epoch 14/15




[1m386/386[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m254s[0m 659ms/step - accuracy: 0.1065 - loss: 3.1092
Epoch 15/15




[1m386/386[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 657ms/step - accuracy: 0.1070 - loss: 3.1089



[1m386/386[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m262s[0m 679ms/step - accuracy: 0.1070 - loss: 3.1089




## LinkedIn Project Description

Developed a Convolutional Neural Network (CNN) for multi-class crop disease image classification using TensorFlow and Keras. The project involved downloading and processing a large dataset of over 20,000 images, implementing data augmentation, and training a CNN model to identify 42 different crop diseases. This work highlights skills in image data handling, deep learning model development, and applying AI to agricultural challenges for improved crop health and yield.