# **Import Libraries**

In [None]:
# Step 1: Import necessary libraries
import pandas as pd
import os
from google.colab import drive
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import ImageDataGenerator, array_to_img
from tensorflow.keras import layers, models
from tensorflow.keras.callbacks import Callback, ModelCheckpoint
from tensorflow.keras.models import load_model
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt

# **Mount Google Drive and Define Paths and Create Folders**

In [2]:
# Step 2: Mount Google Drive to access files
drive.mount('/content/drive')

# Step 3: Define paths for input, output, CSV, and models
input_folder = "/content/drive/MyDrive/Dataset/train/train_data"  # Folder containing original images
output_folder = "/content/drive/MyDrive/Dataset/train/augmented_images"  # Folder to save augmented images
csv_folder = "/content/drive/MyDrive/Dataset/train/other/train.csv"  # Path to the CSV file
models_folder = "/content/drive/MyDrive/Dataset/train/models"  # Folder to save trained models

# Step 4: Create folders if they don't exist
os.makedirs(output_folder, exist_ok=True)  # Create folder for augmented images
os.makedirs(models_folder, exist_ok=True)  # Create folder for saving models

NameError: name 'drive' is not defined

# **Load CSV File and Add Class Names**

In [None]:
# Step 5: Load the CSV file containing image IDs and labels
df = pd.read_csv(csv_folder)
print("CSV file loaded successfully. First few rows:")
print(df.head())

# Step 6: Define a mapping for labels to class names
label_mapping = {
    0: "Cassava Bacterial Blight (CBB)",
    1: "Cassava Brown Streak Disease (CBSD)",
    2: "Cassava Green Mottle (CGM)",
    3: "Cassava Mosaic Disease (CMD)",
    4: "Healthy"
}

# Step 7: Add a new column to the DataFrame for class names
df['class_name'] = df['label'].map(label_mapping)
print("DataFrame with class names added:")
print(df.head())

# **Split Dataset into Training and Validation Sets**

In [None]:
# Step 8: Split the dataset into training and validation sets
train_df, val_df = train_test_split(df, test_size=0.2, random_state=42)
print("Training samples:", len(train_df))
print("Validation samples:", len(val_df))

# **Define Image Dimensions and Data Augmentation**

In [None]:
# Step 9: Define image dimensions and batch size
img_height, img_width = 150, 150  # Resize images to 150x150 pixels
batch_size = 32  # Number of images processed in each batch

# Step 10: Set up data augmentation for training data
train_datagen = ImageDataGenerator(
    rescale=1./255,  # Normalize pixel values to [0, 1]
    rotation_range=20,  # Randomly rotate images by 20 degrees
    width_shift_range=0.2,  # Randomly shift images horizontally by 20%
    height_shift_range=0.2,  # Randomly shift images vertically by 20%
    shear_range=0.2,  # Apply shear transformations
    zoom_range=0.2,  # Randomly zoom images by 20%
    horizontal_flip=True,  # Randomly flip images horizontally
    fill_mode='nearest'  # Fill missing pixels with the nearest value
)

# Step 11: Only rescale validation data (no augmentation)
val_datagen = ImageDataGenerator(rescale=1./255)

# **Create Data Generators**

In [None]:
# Step 12: Create a training data generator
train_generator = train_datagen.flow_from_dataframe(
    dataframe=train_df,  # Training DataFrame
    directory=input_folder,  # Folder containing images
    x_col='image_id',  # Column with image filenames
    y_col='label',  # Column with integer labels
    target_size=(img_height, img_width),  # Resize images
    batch_size=batch_size,  # Batch size
    class_mode='raw'  # Use 'raw' for integer labels
)

# Step 13: Create a validation data generator
val_generator = val_datagen.flow_from_dataframe(
    dataframe=val_df,  # Validation DataFrame
    directory=input_folder,  # Folder containing images
    x_col='image_id',  # Column with image filenames
    y_col='label',  # Column with integer labels
    target_size=(img_height, img_width),  # Resize images
    batch_size=batch_size,  # Batch size
    class_mode='raw',  # Use 'raw' for integer labels
    shuffle=False  # Do not shuffle validation data
)

# **Visualize and Save Augmented Images**

In [None]:
# Step 14: Function to visualize and save augmented images in separate folders
def visualize_and_save_augmented_images(generator, save_dir, num_images=10):
    # Get the first batch of augmented images
    images, labels = next(generator)

    # Create a subfolder for each image and save the augmented image
    for i in range(num_images):
        image_folder = os.path.join(save_dir, f'augmented_image_{i + 1}')  # Create folder for each image
        os.makedirs(image_folder, exist_ok=True)

        # Save the augmented image
        img = array_to_img(images[i])  # Convert array to image
        img.save(os.path.join(image_folder, f'augmented_image_{i + 1}.jpg'))  # Save image

        # Display the image
        plt.figure()
        plt.imshow(images[i])
        plt.title(f'Label: {labels[i]}')
        plt.axis('off')
        plt.show()

# Step 15: Call the function to visualize and save augmented images
visualize_and_save_augmented_images(train_generator, output_folder)

# **Define and Compile the Model**

In [None]:
# Step 16: Define the CNN model architecture
num_classes = len(label_mapping)  # Number of unique classes
model = models.Sequential([
    layers.Conv2D(32, (3, 3), activation='relu', input_shape=(img_height, img_width, 3)),  # First convolutional layer
    layers.MaxPooling2D((2, 2)),  # Max pooling layer
    layers.Conv2D(64, (3, 3), activation='relu'),  # Second convolutional layer
    layers.MaxPooling2D((2, 2)),  # Max pooling layer
    layers.Conv2D(128, (3, 3), activation='relu'),  # Third convolutional layer
    layers.MaxPooling2D((2, 2)),  # Max pooling layer
    layers.Flatten(),  # Flatten the output
    layers.Dense(512, activation='relu'),  # Fully connected layer
    layers.Dropout(0.5),  # Dropout layer to prevent overfitting
    layers.Dense(num_classes, activation='softmax')  # Output layer with softmax activation
])

# Step 17: Compile the model
model.compile(
    optimizer='adam',  # Optimizer
    loss='sparse_categorical_crossentropy',  # Loss function for integer labels
    metrics=['accuracy']  # Metric to monitor
)

# Step 18: Print the model summary
print("Model Summary:")
model.summary()

# **Define Callbacks and Train the Model**

In [None]:
# Step 19: Define callbacks for training
epochs = 20  # Number of training epochs

# Custom callback to print accuracy after each epoch
class SaveAndPrintAccuracy(Callback):
    def on_epoch_end(self, epoch, logs=None):
        accuracy = logs.get('accuracy')
        val_accuracy = logs.get('val_accuracy')
        print(f"\nEpoch {epoch + 1}: Training Accuracy = {accuracy:.4f}, Validation Accuracy = {val_accuracy:.4f}")

# ModelCheckpoint callback to save the best model
checkpoint_callback = ModelCheckpoint(
    filepath=os.path.join(models_folder, 'model_epoch_{epoch:02d}_val_acc_{val_accuracy:.4f}.keras'),
    save_freq='epoch',  # Save after each epoch
    save_weights_only=False,  # Save the entire model
    verbose=1  # Print messages
)

# Step 20: Train the model
history = model.fit(
    train_generator,
    steps_per_epoch=train_generator.samples // batch_size,  # Number of steps per epoch
    validation_data=val_generator,
    validation_steps=val_generator.samples // batch_size,  # Number of validation steps
    epochs=epochs,  # Number of epochs
    callbacks=[checkpoint_callback, SaveAndPrintAccuracy()]  # Callbacks
)

# **Save and Load the Final Model**

In [None]:
# Step 21: Save the final trained model
final_model_path = os.path.join(models_folder, 'disease_classifier.h5')
model.save(final_model_path)
print(f"Final model saved at: {final_model_path}")

# Step 22: Load the saved model
model = load_model(final_model_path)
print("Model loaded successfully.")

# **Prediction Function and Example Usage**

In [None]:
# Step 23: Function to predict the class of a new image
def predict_image(image_path):
    img = tf.keras.preprocessing.image.load_img(image_path, target_size=(img_height, img_width))  # Load image
    img_array = tf.keras.preprocessing.image.img_to_array(img)  # Convert image to array
    img_array = np.expand_dims(img_array, axis=0)  # Add batch dimension
    img_array /= 255.0  # Normalize pixel values
    predictions = model.predict(img_array)  # Make predictions
    predicted_class = np.argmax(predictions, axis=1)  # Get the predicted class
    predicted_label = label_mapping[predicted_class[0]]  # Map class index to label
    return predicted_label

# Step 24: Example usage of the prediction function
test_image_path = os.path.join(input_folder, 'example_image.jpg')  # Replace with an actual image path
if os.path.exists(test_image_path):
    print(f'Predicted class: {predict_image(test_image_path)}')
else:
    print(f"Test image not found at {test_image_path}")