# Data Preprocessing for Coffee Rust Detection

This notebook is used for preprocessing the dataset of coffee leaves, including both healthy and infected samples. The preprocessing steps include loading images, resizing them, and applying data augmentation techniques to enhance the dataset for training.

In [None]:
import os
import numpy as np
import cv2
import matplotlib.pyplot as plt
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Define paths
healthy_dir = '../data/healthy'
infected_dir = '../data/infected'

# Function to load images from a directory
def load_images_from_directory(directory):
    images = []
    labels = []
    for label, category in enumerate(['healthy', 'infected']):
        path = os.path.join(directory, category)
        for img_name in os.listdir(path):
            img_path = os.path.join(path, img_name)
            img = cv2.imread(img_path)
            img = cv2.resize(img, (224, 224))  # Resize to match MobileNetV2 input size
            images.append(img)
            labels.append(label)
    return np.array(images), np.array(labels)

# Load images
images, labels = load_images_from_directory('../data')
print(f'Loaded {len(images)} images with labels {np.unique(labels)}')

In [None]:
# Data Augmentation
datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

# Example of using the data generator
sample_image = images[0].reshape((1, 224, 224, 3))  # Reshape for the generator
i = 0
for batch in datagen.flow(sample_image, batch_size=1):
    plt.imshow(batch[0].astype('uint8'))
    plt.axis('off')
    plt.show()
    i += 1
    if i > 5:
        break  # Show 6 augmented images