In [None]:
import os

dataset_path = '/home/aubct/Documents/RiceLeafDataset'
for class_name in os.listdir(dataset_path):
    class_path = os.path.join(dataset_path, class_name)
    print(f"Class: {class_name}, Number of Images: {len(os.listdir(class_path))}")


Class: brown_spot, Number of Images: 1480
Class: narrow_brown_spot, Number of Images: 1416
Class: tungro, Number of Images: 1740
Class: rice_hispa, Number of Images: 1461
Class: bacterial_leaf_blight, Number of Images: 1386
Class: healthy, Number of Images: 1491
Class: leaf_scald, Number of Images: 1670
Class: neck_blast, Number of Images: 1000
Class: sheath_blight, Number of Images: 1578
Class: leaf_blast, Number of Images: 1801


In [None]:
import cv2
import numpy as np

def load_and_preprocess_images(dataset_path, size=(299, 299)):
    images = []
    labels = []

    for class_name in os.listdir(dataset_path):
        class_path = os.path.join(dataset_path, class_name)
        for filename in os.listdir(class_path):
            img_path = os.path.join(class_path, filename)
            img = cv2.imread(img_path)
            img_resized = cv2.resize(img, size)
            img_denoised = cv2.bilateralFilter(img_resized, d=11, sigmaColor=75, sigmaSpace=60)
            img_gray = cv2.cvtColor(img_denoised, cv2.COLOR_BGR2GRAY)

            clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
            img_clahe = clahe.apply(img_gray)

            edges = cv2.Canny(img_clahe, 100, 200)

            img_combined = cv2.addWeighted(img_clahe, 0.7, edges, 0.3, 0)

            images.append(img_combined)
            labels.append(class_name)

    return np.array(images), np.array(labels)

X, y = load_and_preprocess_images(dataset_path)

In [None]:
def apply_adaptive_thresholding(images):
    segmented_images = []

    for img in images:
        adaptive_thresh = cv2.adaptiveThreshold(
            img, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
            cv2.THRESH_BINARY_INV, 11, 2
        )
        segmented_images.append(adaptive_thresh)

    return np.array(segmented_images)

X_thresholded = apply_adaptive_thresholding(X)

print(f"Thresholded Images shape: {X_thresholded.shape}")


Thresholded Images shape: (15023, 299, 299)


In [None]:
import h5py
import numpy as np

with h5py.File("thresholded_images.h5", "w") as hf:
    hf.create_dataset("X_thresholded", data=X_thresholded)

print("Thresholded images saved successfully!")

with h5py.File("preprocessed_data.h5", "w") as hf:
    hf.create_dataset("X", data=X)
    hf.create_dataset("y", data=y.astype("S"))

print("Preprocessed images and labels saved successfully!")


Thresholded images saved successfully!


In [None]:
import h5py
import numpy as np

with h5py.File("thresholded_images.h5", "r") as hf:
    X_thresholded = np.array(hf["X_thresholded"])

print(f"Loaded Images shape: {X_thresholded.shape}")

Loaded Images shape: (15023, 299, 299)


In [None]:
import h5py

with h5py.File("preprocessed_data.h5", "w") as hf:
    hf.create_dataset("X", data=X)
    hf.create_dataset("y", data=y.astype("S"))

print("Preprocessed images and labels saved successfully!")

Preprocessed images and labels saved successfully!


In [None]:
import h5py
import numpy as np

with h5py.File("preprocessed_data.h5", "r") as hf:
    X_loaded = np.array(hf["X"])
    y_loaded = np.array(hf["y"]).astype(str)

print(f"Loaded Images shape: {X_loaded.shape}")
print(f"Loaded Labels shape: {y_loaded.shape}")


Loaded Images shape: (15023, 299, 299)
Loaded Labels shape: (15023,)


In [None]:
from sklearn.preprocessing import LabelEncoder

label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y_loaded)
print(y_encoded)

[1 1 1 ... 3 3 3]


In [None]:
import joblib
joblib.dump(label_encoder, "label_encoder.pkl")

['label_encoder.pkl']

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_temp, y_train, y_temp = train_test_split(X_loaded, y_encoded, test_size=0.2, random_state=42)

X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

print(f"Training set: {len(X_train)} images")
print(f"Validation set: {len(X_val)} images")
print(f"Test set: {len(X_test)} images")

Training set: 12018 images
Validation set: 1502 images
Test set: 1503 images


In [None]:
X_train_rgb = np.stack([X_train] * 3, axis=-1)
X_val_rgb = np.stack([X_val] * 3, axis=-1)
X_test_rgb = np.stack([X_test] * 3, axis=-1)


In [None]:
import numpy as np

def batch_normalize(X, batch_size=1000):
    X = X.astype(np.float32)
    for i in range(0, len(X), batch_size):
        X[i:i+batch_size] /= 255.0
    return X

X_train_rgb = batch_normalize(X_train_rgb)
X_val_rgb = batch_normalize(X_val_rgb)
X_test_rgb = batch_normalize(X_test_rgb)

print(X_train_rgb.dtype, X_val_rgb.dtype, X_test_rgb.dtype)


float32 float32 float32


In [None]:
print(np.min(X_train_rgb), np.max(X_train_rgb))


0.0 1.0


In [None]:
print(X_train_rgb.shape)
print(X_val_rgb.shape)
print(X_test_rgb.shape)

(12018, 299, 299, 3)
(1502, 299, 299, 3)
(1503, 299, 299, 3)


In [None]:
with h5py.File('labels.h5', 'w') as f:
    f.create_dataset('y_encoded', data=y_encoded)
    f.create_dataset('classes', data=np.array(label_encoder.classes_, dtype='S'))
    f.create_dataset("y_train", data=y_train)
    f.create_dataset("y_val", data=y_val)
    f.create_dataset("y_test", data=y_test)

print("Labels saved successfully.")

Labels saved successfully.


In [None]:
import h5py

with h5py.File("rgb_datasets.h5", "w") as hf:
    hf.create_dataset("X_train_rgb", data=X_train_rgb)
    hf.create_dataset("X_val_rgb", data=X_val_rgb)
    hf.create_dataset("X_test_rgb", data=X_test_rgb)

print("Datasets saved successfully!")


Datasets saved successfully!


In [None]:
import h5py
import numpy as np

with h5py.File("rgb_datasets.h5", "r") as hf:
    X_train_rgb = np.array(hf["X_train_rgb"])
    X_val_rgb = np.array(hf["X_val_rgb"])
    X_test_rgb = np.array(hf["X_test_rgb"])

print(f"Train Set Shape: {X_train_rgb.shape}")
print(f"Validation Set Shape: {X_val_rgb.shape}")
print(f"Test Set Shape: {X_test_rgb.shape}")


Train Set Shape: (12018, 299, 299, 3)
Validation Set Shape: (1502, 299, 299, 3)
Test Set Shape: (1503, 299, 299, 3)
