In [None]:
# %pip install opencv-python-headless numpy scikit-learn tensorflow

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import os
import shutil
import cv2
import numpy as np
from sklearn.model_selection import train_test_split  # For proper train/validation split
from sklearn.preprocessing import LabelEncoder
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator  # For data augmentation
import matplotlib.pyplot as plt

In [None]:
# 1. Dataset Path and Organization:

dataset_path = '/content/drive/MyDrive/Projects/Plant Disease Prediction/Groundnut_Leaf_dataset'  # Your dataset path
train_dir = os.path.join(dataset_path, 'train')
test_dir = os.path.join(dataset_path, 'test')

# Get the class names (disease names)
class_names = os.listdir(train_dir)
num_classes = len(class_names)
print(f"Number of classes: {num_classes}")
print(f"Class names: {class_names}")


In [None]:
# 2. Data Loading and Preprocessing:

images = []
labels = []

for split_dir in [train_dir, test_dir]:  # Loop through train and test
    for class_name in class_names:
        class_dir = os.path.join(split_dir, class_name)
        for filename in os.listdir(class_dir):
            if filename.endswith(('.jpg', '.png', '.jpeg')):
                img_path = os.path.join(class_dir, filename)
                img = cv2.imread(img_path)
                if img is not None:
                    img = cv2.resize(img, (224, 224))  # Resize
                    images.append(img)
                    labels.append(class_name)
                else:
                    print(f"Error reading image: {img_path}")

images = np.array(images)
labels = np.array(labels)

In [None]:
# Label Encoding:
le = LabelEncoder()
labels_encoded = le.fit_transform(labels)

In [None]:
# Split the training data into train and validation sets:
X_train, X_val, y_train, y_val = train_test_split(
    images[:len(os.listdir(train_dir)*5)], labels_encoded[:len(os.listdir(train_dir)*5)], test_size=0.2, random_state=42, stratify=labels_encoded[:len(os.listdir(train_dir)*5)] # 80% train, 20% validation
)

X_test = images[len(os.listdir(train_dir)*5):]
y_test = labels_encoded[len(os.listdir(train_dir)*5):]

print("Train data shape:", X_train.shape)
print("Validation data shape:", X_val.shape)
print("Test data shape:", X_test.shape)

In [None]:
# 3. Data Augmentation (Important for small datasets):
datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

datagen.fit(X_train)  # Fit the datagen on the training data

In [None]:
# 4. CNN Model Building:
model = tf.keras.models.Sequential([
    tf.keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=(224, 224, 3)),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Conv2D(64, (3, 3), activation='relu'), # Added another Conv layer
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(128, activation='relu'), # Increased Dense layer units
    tf.keras.layers.Dropout(0.5),  # Added dropout for regularization
    tf.keras.layers.Dense(num_classes, activation='softmax') # Output layer with softmax
])

model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

In [None]:
# 5. Model Training with Data Augmentation:
epochs = 20  # Adjust as needed
batch_size = 32 # Adjust as needed

history = model.fit(
    datagen.flow(X_train, y_train, batch_size=batch_size),  # Use datagen.flow
    epochs=epochs,
    validation_data=(X_val, y_val),  # Use validation data
    steps_per_epoch=len(X_train) // batch_size  # Calculate steps per epoch
)

In [None]:
# 6. Model Evaluation:
loss, accuracy = model.evaluate(X_test, y_test, verbose=0)
print(f"Test Loss: {loss}")
print(f"Test Accuracy: {accuracy}")

In [None]:
# 7. Saving the Model and Label Encoder:

models_dir = '/content/drive/MyDrive/saved_models'
os.makedirs(models_dir, exist_ok=True)

model_save_path = os.path.join(models_dir, 'groundnut_disease_model')
model.save(model_save_path)

le_save_path = os.path.join(models_dir, 'label_encoder.pkl')

with open(le_save_path, 'wb') as f:
    pickle.dump(le, f)

print(f"Model saved to: {model_save_path}")
print(f"Label encoder saved to: {le_save_path}")

In [None]:
# 8. Plotting Training History

plt.plot(history.history['accuracy'], label='Train Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()
plt.show()

plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.show()