In [2]:
#get data
from google.colab import userdata
import os
os.environ['KAGGLE_KEY'] = 'b403e4ec66331d29f144d139ed6a2299'
os.environ['KAGGLE_USERNAME'] = 'xstella'

In [3]:
!kaggle datasets download -d rashikrahmanpritom/plant-disease-recognition-dataset --path datasets/   --unzip

Dataset URL: https://www.kaggle.com/datasets/rashikrahmanpritom/plant-disease-recognition-dataset
License(s): CC0-1.0
Downloading plant-disease-recognition-dataset.zip to datasets
100% 1.25G/1.25G [00:09<00:00, 218MB/s]
100% 1.25G/1.25G [00:09<00:00, 136MB/s]


In [4]:
TRAIN = './datasets/Train/Train/'
TEST = './datasets/Test/Test/'
VAL = './datasets/Validation/Validation/'
os.listdir(VAL)

['Rust', 'Healthy', 'Powdery']

In [5]:
import numpy as np
import matplotlib.pyplot as plt
import glob
import cv2
import tensorflow as tf
import seaborn as sns
import pandas as pd
from skimage.filters import sobel
from skimage.feature import graycomatrix, graycoprops

In [6]:
SIZE = 256

In [7]:
train_images = []
train_labels = []

for dir_path in glob.glob(os.path.join(TRAIN, "*")):
  label = os.path.basename(dir_path)
  print(label)
  if not os.listdir(dir_path):
        print(f"Skipping empty directory: {dir_path}")
        continue
  for img_path in glob.glob(os.path.join(dir_path, "*.jpg")):
        # print(img_path)


        # Read the image in grayscale mode
        img = cv2.imread(img_path, 0)

        #Bilateral Filter -> Noise reduction
        bilateral_image = cv2.bilateralFilter(img, 15, 75, 75)




        # Resize the image
        img = cv2.resize(bilateral_image, (SIZE, SIZE))

        # Append the image and label to the respective lists
        train_images.append(img)
        train_labels.append(label)

train_images = np.array(train_images)
train_labels = np.array(train_labels)

print("Training data loaded:")
print(f"Number of images: {len(train_images)}")
print(f"Image shape: {train_images.shape[1:]}")  # Print image dimensions
print(f"Number of labels: {len(train_labels)}")
print(f"Unique labels: {np.unique(train_labels)}")  # View unique labels

Rust
Healthy
Powdery
Training data loaded:
Number of images: 1322
Image shape: (256, 256)
Number of labels: 1322
Unique labels: ['Healthy' 'Powdery' 'Rust']


In [8]:
test_images = []
test_labels = []

for dir_path in glob.glob(os.path.join(TEST, "*")):
  label = os.path.basename(dir_path)
  if not os.listdir(dir_path):
        print(f"Skipping empty directory: {dir_path}")
        continue
  for img_path in glob.glob(os.path.join(dir_path, "*.jpg")):
        # print(img_path)

        # Read the image in grayscale mode
        img = cv2.imread(img_path, 0)

        #Bilateral Filter -> Noise reduction
        bilateral_image = cv2.bilateralFilter(img, 15, 75, 75)

        # Resize the image
        img = cv2.resize(bilateral_image, (SIZE, SIZE))

        # Append the image and label to the respective lists
        test_images.append(img)
        test_labels.append(label)

test_images = np.array(test_images)
test_labels = np.array(test_labels)

print("testing data loaded:")
print(f"Number of images: {len(test_images)}")
print(f"Image shape: {test_images.shape[1:]}")  # Print image dimensions
print(f"Number of labels: {len(test_labels)}")
print(f"Unique labels: {np.unique(test_labels)}")  # View unique labels

testing data loaded:
Number of images: 150
Image shape: (256, 256)
Number of labels: 150
Unique labels: ['Healthy' 'Powdery' 'Rust']


In [9]:
val_images = []
val_labels = []

for dir_path in glob.glob(os.path.join(VAL, "*")):
  label = os.path.basename(dir_path)
  if not os.listdir(dir_path):
        print(f"Skipping empty directory: {dir_path}")
        continue
  for img_path in glob.glob(os.path.join(dir_path, "*.jpg")):
        # print(img_path)

        # Read the image in grayscale mode
        img = cv2.imread(img_path, 0)

        #Bilateral Filter -> Noise reduction
        bilateral_image = cv2.bilateralFilter(img, 15, 75, 75)

        # Resize the image
        img = cv2.resize(bilateral_image, (SIZE, SIZE))

        # Append the image and label to the respective lists
        val_images.append(img)
        val_labels.append(label)

val_images = np.array(val_images)
val_labels = np.array(val_labels)

print("valing data loaded:")
print(f"Number of images: {len(val_images)}")
print(f"Image shape: {val_images.shape[1:]}")  # Print image dimensions
print(f"Number of labels: {len(val_labels)}")
print(f"Unique labels: {np.unique(val_labels)}")  # View unique labels

valing data loaded:
Number of images: 60
Image shape: (256, 256)
Number of labels: 60
Unique labels: ['Healthy' 'Powdery' 'Rust']


In [10]:
#label encoding
map_ = {'Healthy':0,'Powdery':1, 'Rust':2}

In [11]:
#encoding
train_labels = np.array(pd.Series(train_labels).map(map_).values)
test_labels = np.array(pd.Series(test_labels).map(map_).values)
val_labels = np.array(pd.Series(val_labels).map(map_).values)

In [12]:
def feature_extractor(dataset):
    image_dataset = pd.DataFrame()
    dfs=[]
    for image in range(dataset.shape[0]):
        df = pd.DataFrame()
        img = dataset[image, :,:]

        GLCM = graycomatrix(img, [1], [0])
        GLCM_Energy = graycoprops(GLCM, 'energy')[0]
        df['Energy'] = GLCM_Energy
        GLCM_corr = graycoprops(GLCM, 'correlation')[0]
        df['Corr'] = GLCM_corr
        GLCM_diss = graycoprops(GLCM, 'dissimilarity')[0]
        df['Diss_sim'] = GLCM_diss
        GLCM_hom = graycoprops(GLCM, 'homogeneity')[0]
        df['Homogen'] = GLCM_hom
        GLCM_contr = graycoprops(GLCM, 'contrast')[0]
        df['Contrast'] = GLCM_contr


        GLCM2 = graycomatrix(img, [3], [0])
        GLCM_Energy2 = graycoprops(GLCM2, 'energy')[0]
        df['Energy2'] = GLCM_Energy2
        GLCM_corr2 = graycoprops(GLCM2, 'correlation')[0]
        df['Corr2'] = GLCM_corr2
        GLCM_diss2 = graycoprops(GLCM2, 'dissimilarity')[0]
        df['Diss_sim2'] = GLCM_diss2
        GLCM_hom2 = graycoprops(GLCM2, 'homogeneity')[0]
        df['Homogen2'] = GLCM_hom2
        GLCM_contr2 = graycoprops(GLCM2, 'contrast')[0]
        df['Contrast2'] = GLCM_contr2

        GLCM3 = graycomatrix(img, [5], [0])
        GLCM_Energy3 = graycoprops(GLCM3, 'energy')[0]
        df['Energy3'] = GLCM_Energy3
        GLCM_corr3 = graycoprops(GLCM3, 'correlation')[0]
        df['Corr3'] = GLCM_corr3
        GLCM_diss3 = graycoprops(GLCM3, 'dissimilarity')[0]
        df['Diss_sim3'] = GLCM_diss3
        GLCM_hom3 = graycoprops(GLCM3, 'homogeneity')[0]
        df['Homogen3'] = GLCM_hom3
        GLCM_contr3 = graycoprops(GLCM3, 'contrast')[0]
        df['Contrast3'] = GLCM_contr3

        GLCM4 = graycomatrix(img, [0], [np.pi/4])
        GLCM_Energy4 = graycoprops(GLCM4, 'energy')[0]
        df['Energy4'] = GLCM_Energy4
        GLCM_corr4 = graycoprops(GLCM4, 'correlation')[0]
        df['Corr4'] = GLCM_corr4
        GLCM_diss4 = graycoprops(GLCM4, 'dissimilarity')[0]
        df['Diss_sim4'] = GLCM_diss4
        GLCM_hom4 = graycoprops(GLCM4, 'homogeneity')[0]
        df['Homogen4'] = GLCM_hom4
        GLCM_contr4 = graycoprops(GLCM4, 'contrast')[0]
        df['Contrast4'] = GLCM_contr4

        GLCM5 = graycomatrix(img, [0], [np.pi/2])
        GLCM_Energy5 = graycoprops(GLCM5, 'energy')[0]
        df['Energy5'] = GLCM_Energy5
        GLCM_corr5 = graycoprops(GLCM5, 'correlation')[0]
        df['Corr5'] = GLCM_corr5
        GLCM_diss5 = graycoprops(GLCM5, 'dissimilarity')[0]
        df['Diss_sim5'] = GLCM_diss5
        GLCM_hom5 = graycoprops(GLCM5, 'homogeneity')[0]
        df['Homogen5'] = GLCM_hom5
        GLCM_contr5 = graycoprops(GLCM5, 'contrast')[0]
        df['Contrast5'] = GLCM_contr5

        dfs.append(df)

    image_dataset = pd.concat(dfs, ignore_index=True)
    return image_dataset

In [13]:
X_train = feature_extractor(train_images)
X_test = feature_extractor(test_images)
X_val = feature_extractor(val_images)

In [14]:
print(X_train.shape)
print(X_test.shape)
print(X_val.shape)

(1322, 25)
(150, 25)
(60, 25)


In [15]:
X_train

Unnamed: 0,Energy,Corr,Diss_sim,Homogen,Contrast,Energy2,Corr2,Diss_sim2,Homogen2,Contrast2,...,Energy4,Corr4,Diss_sim4,Homogen4,Contrast4,Energy5,Corr5,Diss_sim5,Homogen5,Contrast5
0,0.014394,0.908254,10.475689,0.140419,280.599556,0.010244,0.697256,21.350868,0.065698,927.206692,...,0.084087,1.0,0.0,1.0,0.0,0.084087,1.0,0.0,1.0,0.0
1,0.018915,0.930555,7.597059,0.178067,128.982200,0.014357,0.818205,13.009928,0.096257,336.394562,...,0.096074,1.0,0.0,1.0,0.0,0.096074,1.0,0.0,1.0,0.0
2,0.024619,0.970829,4.675123,0.297281,72.332567,0.015854,0.880655,10.733479,0.137799,295.144578,...,0.089219,1.0,0.0,1.0,0.0,0.089219,1.0,0.0,1.0,0.0
3,0.014174,0.888805,11.026302,0.133655,336.857950,0.010421,0.666176,21.294111,0.063757,1010.959008,...,0.084645,1.0,0.0,1.0,0.0,0.084645,1.0,0.0,1.0,0.0
4,0.019078,0.969381,6.236627,0.224338,101.661412,0.013510,0.873194,13.422956,0.109214,419.334023,...,0.086623,1.0,0.0,1.0,0.0,0.086623,1.0,0.0,1.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1317,0.025538,0.985577,3.650414,0.296716,29.457950,0.016761,0.920695,8.783689,0.144194,162.485950,...,0.093600,1.0,0.0,1.0,0.0,0.093600,1.0,0.0,1.0,0.0
1318,0.024105,0.983228,5.723300,0.259717,92.281510,0.018981,0.949731,9.732738,0.160489,276.345912,...,0.079135,1.0,0.0,1.0,0.0,0.079135,1.0,0.0,1.0,0.0
1319,0.020930,0.961271,6.119868,0.238586,116.792111,0.013651,0.861266,13.252795,0.104081,418.640208,...,0.086864,1.0,0.0,1.0,0.0,0.086864,1.0,0.0,1.0,0.0
1320,0.025382,0.987088,3.913281,0.313396,44.463343,0.016803,0.953592,8.373363,0.156257,160.187994,...,0.087958,1.0,0.0,1.0,0.0,0.087958,1.0,0.0,1.0,0.0


In [16]:
X_test

Unnamed: 0,Energy,Corr,Diss_sim,Homogen,Contrast,Energy2,Corr2,Diss_sim2,Homogen2,Contrast2,...,Energy4,Corr4,Diss_sim4,Homogen4,Contrast4,Energy5,Corr5,Diss_sim5,Homogen5,Contrast5
0,0.019467,0.914868,7.848483,0.205831,165.947472,0.014943,0.773585,13.564476,0.112357,439.867589,...,0.096967,1.0,0.0,1.0,0.0,0.096967,1.0,0.0,1.0,0.0
1,0.018107,0.959779,6.896906,0.188404,128.627237,0.013923,0.834688,14.502810,0.104437,527.233757,...,0.086508,1.0,0.0,1.0,0.0,0.086508,1.0,0.0,1.0,0.0
2,0.019887,0.934868,6.967647,0.195977,124.440502,0.015227,0.844907,11.661885,0.110212,296.365875,...,0.096659,1.0,0.0,1.0,0.0,0.096659,1.0,0.0,1.0,0.0
3,0.020390,0.962433,5.989568,0.208547,89.691713,0.014846,0.846433,12.520411,0.118336,365.773283,...,0.091503,1.0,0.0,1.0,0.0,0.091503,1.0,0.0,1.0,0.0
4,0.026720,0.988552,3.107184,0.360977,33.385401,0.017463,0.952684,6.944170,0.177835,138.085814,...,0.084782,1.0,0.0,1.0,0.0,0.084782,1.0,0.0,1.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
145,0.068195,0.986088,5.232552,0.289939,87.201302,0.060216,0.920265,13.367635,0.158643,499.035033,...,0.099389,1.0,0.0,1.0,0.0,0.099389,1.0,0.0,1.0,0.0
146,0.018469,0.935403,7.832598,0.193498,177.624632,0.012718,0.750160,16.719584,0.094735,686.189415,...,0.087346,1.0,0.0,1.0,0.0,0.087346,1.0,0.0,1.0,0.0
147,0.023732,0.982381,4.973131,0.264223,86.629320,0.016248,0.915972,12.362077,0.117009,412.054240,...,0.078632,1.0,0.0,1.0,0.0,0.078632,1.0,0.0,1.0,0.0
148,0.018005,0.940406,8.269945,0.182681,159.177757,0.014137,0.845025,13.758940,0.101742,413.649101,...,0.094875,1.0,0.0,1.0,0.0,0.094875,1.0,0.0,1.0,0.0


In [17]:
import pickle as pkl

In [18]:
with open('X_train.pkl', 'wb') as file:
  pkl.dump(X_train, file)
with open('X_test.pkl', 'wb') as file:
  pkl.dump(X_test, file)
with open('X_val.pkl', 'wb') as file:
  pkl.dump(X_val, file)

In [19]:
with open('y_train.pkl', 'wb') as file:
  pkl.dump(train_labels, file)
with open('y_test.pkl', 'wb') as file:
  pkl.dump(test_labels, file)
with open('y_val.pkl', 'wb') as file:
  pkl.dump(val_labels, file)
with open('label_map.pkl', 'wb') as file:
  pkl.dump(map_, file)