# Importing Libraries

In [None]:
import os
import cv2
import numpy as np
import pandas as pd
from skimage.feature import greycomatrix, greycoprops
from matplotlib import pyplot as plt
%matplotlib inline
from os import listdir
from os.path import isfile, join

# Function to Create a new dataframe

In [None]:
def create_empty_df():
    df = pd.DataFrame()
    df['area'] = None
    df['perimeter'] = None
    df['red_mean'] = None
    df['green_mean'] = None
    df['blue_mean'] = None
    df['f1'] = None
    df['f2'] = None
    df['red_std'] = None
    df['green_std'] = None
    df['blue_std'] = None
    df['f4'] = None
    df['f5'] = None
    df['f6'] = None
    df['f7'] = None
    df['f8'] = None
    df['label'] = None
    return df

# Function to extract the features

In [None]:
def feature_extractor(filename):
    '''
    input params: 
    filename : path of the file that we want to process

    Output params:
    l : Feature vector
    '''

    try:
        main_img = cv2.imread(filename)
        img = cv2.cvtColor(main_img, cv2.COLOR_BGR2RGB)
    except:
        return "Invalid"

    #Preprocessing
    

    gs = cv2.cvtColor(img,cv2.COLOR_RGB2GRAY)
    blur = cv2.GaussianBlur(gs, (25,25),0)
    ret_otsu,im_bw_otsu = cv2.threshold(blur,0,255,cv2.THRESH_BINARY_INV+cv2.THRESH_OTSU)
    kernel = np.ones((25,25),np.uint8)
    closing = cv2.morphologyEx(im_bw_otsu, cv2.MORPH_CLOSE, kernel)

    #Shape features
    contours, _ = cv2.findContours(closing,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
    cnt = contours[0]
    M = cv2.moments(cnt)
    area = cv2.contourArea(cnt)
    if area==0:
        return "Invalid"
    perimeter = cv2.arcLength(cnt,True)

    current_frame = main_img
    filtered_image = closing/255

    #Elementwise Multiplication of range bounded filtered_image with current_frame
    current_frame[0:current_frame.shape[0], 0:current_frame.shape[1], 0] = np.multiply(current_frame[0:current_frame.shape[0], 0:current_frame.shape[1], 0], filtered_image) #B channel
    current_frame[0:current_frame.shape[0], 0:current_frame.shape[1], 1] = np.multiply(current_frame[0:current_frame.shape[0], 0:current_frame.shape[1], 1], filtered_image) #G channel
    current_frame[0:current_frame.shape[0], 0:current_frame.shape[1], 2] = np.multiply(current_frame[0:current_frame.shape[0], 0:current_frame.shape[1], 2], filtered_image) #R channel

    img = current_frame


    #Color features
    red_channel = img[:,:,0]
    green_channel = img[:,:,1] #show the intensities of green channe
    blue_channel = img[:,:,2]

    red_mean = np.mean(red_channel)
    green_mean = np.mean(green_channel)
    blue_mean = np.mean(blue_channel)
    #standard deviation for colour feature from the image.    
    red_std = np.std(red_channel)
    green_std = np.std(green_channel)
    blue_std = np.std(blue_channel)
    
    #amt.of green color in the image
    gr = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
    boundaries = [([30,0,0],[70,255,255])]
    for (lower, upper) in boundaries:
        mask = cv2.inRange(gr, (36, 0, 0), (70, 255,255))
        ratio_green = cv2.countNonZero(mask)/(img.size/3)
        f1=np.round(ratio_green, 2)
    #amt. of non green part of the image   
    f2=1-f1

    #Texture features using grey level cooccurance matrix
    img=cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    g=greycomatrix(img, [1], [0, np.pi/4, np.pi/2, 3*np.pi/4])

    #with the help of glcm find the contrast
    contrast = greycoprops(g, 'contrast')
    f4=contrast[0][0]+contrast[0][1]+contrast[0][2]+contrast[0][3]
    #[0][3] represent no. of times grey level 3 appears at the right of 0


    #with the help of glcm find the dissimilarity 
    dissimilarity = greycoprops(g, prop='dissimilarity')
    f5=dissimilarity[0][0]+dissimilarity[0][1]+dissimilarity[0][2]+dissimilarity[0][3]

    #with the help of glcm find the homogeneity
    homogeneity = greycoprops(g, prop='homogeneity')
    f6=homogeneity[0][0]+homogeneity[0][1]+homogeneity[0][2]+homogeneity[0][3]

    energy = greycoprops(g, prop='energy')
    f7=energy[0][0]+energy[0][1]+energy[0][2]+energy[0][3]


    correlation = greycoprops(g,prop= 'correlation')
    f8=correlation[0][0]+correlation[0][1]+correlation[0][2]+correlation[0][3]



    l = [area, perimeter, red_mean, green_mean, blue_mean,
         f1, f2, red_std, green_std, blue_std,
        f4,f5,f6,f7,f8]
    return l

# Function to process one folder

In [None]:
def process_folder(folderpath,df_f,label_f):
    '''
    input params:
    folderpath : Path of the folder that we want to process
    df_f = dataframe for specific disease
    label_f : label corresponding to the specific disease

    Output params:
    df_f = Dataframe consisting processed vectors
    '''
    imagelist = os.listdir(folderpath)  # stores all the imagepaths in the python list
    for image in imagelist:
        imagepath = os.path.join(folderpath, image)
        im_feature = feature_extractor(imagepath) 
        if im_feature == "Invalid":
            continue
        im_feature.append(label_f)  # appending label to feature vector
        df_f.loc[len(df_f)] = im_feature 
        if len(df_f)%500 ==0:
            print(len(df_f))

    return df_f


# Function to process one plant

In [None]:
def process_plant(folderpaths, labels, savepath):
    '''
    input params:
    folderpaths : List of the folderpaths for specific Plant
    labels : List of labels 
    savepath : Path to export datasheet

    Output params:
    None
    '''
    datasheet = create_empty_df()
    for i in range(len(folderpaths)):
        datasheet = process_folder(folderpaths[i],datasheet,labels[i])

    datasheet.to_excel(savepath)

    return None

# Data Preprocessing

**Apple**

In [None]:
folderpaths = ['/content/drive/MyDrive/Plant Disease Detection /Raw_Dataset/New Plant Diseases Dataset(Augmented)/New Plant Diseases Dataset(Augmented)/train/Apple___healthy',
               '/content/drive/MyDrive/Plant Disease Detection /Raw_Dataset/New Plant Diseases Dataset(Augmented)/New Plant Diseases Dataset(Augmented)/train/Apple___Apple_scab',
               '/content/drive/MyDrive/Plant Disease Detection /Raw_Dataset/New Plant Diseases Dataset(Augmented)/New Plant Diseases Dataset(Augmented)/train/Apple___Black_rot',
               '/content/drive/MyDrive/Plant Disease Detection /Raw_Dataset/New Plant Diseases Dataset(Augmented)/New Plant Diseases Dataset(Augmented)/train/Apple___Cedar_apple_rust'

]

labels = [0,1,2,3]
savepath = '/content/drive/MyDrive/Plant Disease Detection /Processed_data&models/Apple/dataset.xlsx'
process_plant(folderpaths, labels, savepath)

500
1000
1500
2000
2500
3000
3500
4000
4500
5000
5500
6000
6500
7000
7500


**Corn**

In [None]:
global_folder = '/content/drive/MyDrive/Plant Disease Detection /Raw_Dataset/New Plant Diseases Dataset(Augmented)/New Plant Diseases Dataset(Augmented)/train/'

In [None]:
folderpaths = [global_folder+ 'Corn_(maize)___healthy',
              global_folder+ 'Corn_(maize)___Cercospora_leaf_spot Gray_leaf_spot',
              global_folder+ 'Corn_(maize)___Common_rust_',
              global_folder+ 'Corn_(maize)___Northern_Leaf_Blight'
              ]

labels = [0,1,2,3]
savepath = '/content/drive/MyDrive/Plant Disease Detection /Processed_data&models/Corn/dataset.xlsx'
process_plant(folderpaths, labels, savepath)

500
1000
1500
2000
2500
3000
3500
4000
4500
5000
5500
6000
6500
7000


**Grape**

In [None]:
global_folder = '/content/drive/MyDrive/Plant Disease Detection /Raw_Dataset/New Plant Diseases Dataset(Augmented)/New Plant Diseases Dataset(Augmented)/train/'

folderpaths = [global_folder+ 'Grape___healthy',
              global_folder+ 'Grape___Black_rot',
              global_folder+ 'Grape___Esca_(Black_Measles)',
              global_folder+ 'Grape___Leaf_blight_(Isariopsis_Leaf_Spot)'
              ]

labels = [0,1,2,3]
savepath = '/content/drive/MyDrive/Plant Disease Detection /Processed_data&models/Grapes/dataset.xlsx'
process_plant(folderpaths, labels, savepath)

500
1000
1500
2000
2500
3000
3500
4000
4500
5000
5500
6000
6500
7000


**Tomato**

In [None]:
global_folder = '/content/drive/MyDrive/Plant Disease Detection /Raw_Dataset/New Plant Diseases Dataset(Augmented)/New Plant Diseases Dataset(Augmented)/train/'

folderpaths = [global_folder+ 'Tomato___healthy',
              global_folder+ 'Tomato___Bacterial_spot',
              global_folder+ 'Tomato___Early_blight',
              global_folder+ 'Tomato___Late_blight',
              global_folder+ 'Tomato___Leaf_Mold',
              global_folder+ 'Tomato___Septoria_leaf_spot',
              global_folder+'Tomato___Spider_mites Two-spotted_spider_mite',
              global_folder+ 'Tomato___Target_Spot',
              global_folder+'Tomato___Tomato_Yellow_Leaf_Curl_Virus',
              global_folder+ 'Tomato___Tomato_mosaic_virus'
              ]

labels = [0,1,2,3,4,5,6,7,8,9,10]
savepath = '/content/drive/MyDrive/Plant Disease Detection /Processed_data&models/Tomato/dataset.xlsx'
process_plant(folderpaths, labels, savepath)

500
1000
1500
2000
2500
3000
3500
4000
4500
5000
5500
6000
6500
7000
7500
8000
8500
9000
9500
10000
10500
11000
11500
12000
12500
13000
13500
14000
14500
15000
15500
16000
16500
17000
17500
18000


**Potato**

In [None]:
global_folder = '/content/drive/MyDrive/Plant Disease Detection /Raw_Dataset/New Plant Diseases Dataset(Augmented)/New Plant Diseases Dataset(Augmented)/train/'

folderpaths = [global_folder+ 'Potato___healthy',
              global_folder+ 'Potato___Early_blight',
              global_folder+ 'Potato___Late_blight'
              ]

labels = [0,1,2]
savepath = '/content/drive/MyDrive/Plant Disease Detection /Processed_data&models/Potato/dataset.xlsx'
process_plant(folderpaths, labels, savepath)

500
1000
1500
2000
2500
3000
3500
4000
4500
5000
5500


In [None]:
from google.colab import drive
drive.flush_and_unmount()