In [None]:
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import MinMaxScaler
import numpy as np
import mahotas
import cv2
import os
import h5py
images_per_class       = 800
fixed_size             = tuple((256, 256))
train_path             = "dataset/train"
h5_train_data          = 'output/train_data.h5'
h5_train_labels        = 'output/train_labels.h5'
bins                   = 8

In [None]:
def rgb_bgr(image):
    rgb_img = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    return rgb_img

In [None]:
def bgr_hsv(rgb_img):
    hsv_img = cv2.cvtColor(rgb_img, cv2.COLOR_RGB2HSV)
    return hsv_img

In [None]:
def img_segmentation(rgb_img,hsv_img):
    lower_green = np.array([25,0,20])
    upper_green = np.array([100,255,255])
    healthy_mask = cv2.inRange(hsv_img, lower_green, upper_green)
    result = cv2.bitwise_and(rgb_img,rgb_img, mask=healthy_mask)
    lower_brown = np.array([10,0,10])
    upper_brown = np.array([30,255,255])
    disease_mask = cv2.inRange(hsv_img, lower_brown, upper_brown)
    disease_result = cv2.bitwise_and(rgb_img, rgb_img, mask=disease_mask)
    final_mask = healthy_mask + disease_mask
    final_result = cv2.bitwise_and(rgb_img, rgb_img, mask=final_mask)
    return final_result


In [None]:
# feature-descriptor-3: Color Histogram
def fd_histogram(image, mask=None):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    hist  = cv2.calcHist([image], [0, 1, 2], None, [bins, bins, bins], [0, 256, 0, 256, 0, 256])
    cv2.normalize(hist, hist)
    return hist.flatten()

In [None]:
train_labels = os.listdir(train_path)
train_labels.sort()
print(train_labels)
HOG_HISTO=[]
labels       = []

In [None]:
from skimage.feature import hog
from skimage import exposure
# loop over the training data sub-folders
for training_name in train_labels:
    # join the training data path and each species training folder
    dir = os.path.join(train_path, training_name)
    # get the current training label
    current_label = training_name
    # loop over the images in each sub-folder
    for x in range(1,images_per_class+1):
        # get the image file name
        file = dir + "/" + str(x) + ".jpg"
        # read the image and resize it to a fixed-size
        image = cv2.imread(file)
        image = cv2.resize(image, fixed_size)
        # Running Function Bit By Bit      
        RGB_BGR       = rgb_bgr(image)
        BGR_HSV       = bgr_hsv(RGB_BGR)
        IMG_SEGMENT   = img_segmentation(RGB_BGR,BGR_HSV)
        # Call for Global Fetaure Descriptors
        gray_image = cv2.cvtColor(IMG_SEGMENT, cv2.COLOR_BGR2GRAY)
        fd_hog = hog(gray_image, orientations=9, pixels_per_cell=(16,16), cells_per_block=(2, 2), transform_sqrt=True, block_norm='L2-Hys')
        fv_histogram  = fd_histogram(IMG_SEGMENT)
        # Concatenate 
        HH_feature = np.hstack([fv_histogram, fd_hog])
        # update the list of labels and feature vectors
        labels.append(current_label)
        HOG_HISTO.append(HH_feature)

    print("[STATUS] processed folder: {}".format(current_label))

print("[STATUS] completed Global Feature Extraction...")

In [None]:
targetNames = np.unique(labels)
le          = LabelEncoder()
target      = le.fit_transform(labels)
print("[STATUS] training labels encoded...")

In [None]:
from sklearn.preprocessing import MinMaxScaler
scaler            = MinMaxScaler(feature_range=(0, 1))
rescaled_features = scaler.fit_transform(HOG_HISTO)
print("[STATUS] feature vector normalized...")

In [None]:
rescaled_features.shape

In [None]:
num_trees = 100
test_size = 0.20
seed      = 9

In [None]:
from sklearn.model_selection import train_test_split
(trainDataHH, testDataHH, trainLabelsHH, testLabelsHH) = train_test_split(np.array(rescaled_features), np.array(target),test_size=test_size,random_state=seed)
print("[STATUS] splitted train and test data...")
print("Train data  : {}".format(trainDataHH.shape))
print("Test data   : {}".format(testDataHH.shape)) 

In [None]:
import seaborn as sns
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report
clf  = RandomForestClassifier(n_estimators=num_trees, random_state=seed)
clf.fit(trainDataHH, trainLabelsHH)
y_predict_RFC = clf.predict(testDataHH)
cm_RFC = confusion_matrix(testLabelsHH, y_predict_RFC)
print(classification_report(testLabelsHH, y_predict_RFC))
accuracy_RFC = accuracy_score(testLabelsHH, y_predict_RFC)
print(f"Accuracy: {accuracy_RFC:}")

In [None]:
y_predict_RFC


In [None]:
y_predict_RFC1 = clf.predict_proba(testDataHH)


In [None]:
y_predict_RFC1.shape

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
import seaborn as sns
clf2 = LogisticRegression(random_state=seed,max_iter=1000) 
clf2.fit(trainDataHH, trainLabelsHH)
y_predict_logistic = clf2.predict(testDataHH)
cm_logistic = confusion_matrix(testLabelsHH, y_predict_logistic)
print(classification_report(testLabelsHH, y_predict_logistic))
accuracy_logistic = accuracy_score(testLabelsHH, y_predict_logistic)
print(f"Accuracy: {accuracy_logistic:}")


In [None]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
import seaborn as sns
clf3 = KNeighborsClassifier(n_neighbors=5) 
clf3.fit(trainDataHH, trainLabelsHH)
y_predict_SNS = clf3.predict(testDataHH)
cm_SNS = confusion_matrix(testLabelsHH, y_predict_SNS)
print(classification_report(testLabelsHH, y_predict_SNS))
accuracy_SNS = accuracy_score(testLabelsHH, y_predict_SNS)
print(f"Accuracy: {accuracy_SNS:}")


In [None]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
import seaborn as sns

clf4 = DecisionTreeClassifier(random_state=seed)
clf4.fit(trainDataHH, trainLabelsHH)
y_predict_DTC = clf4.predict(testDataHH)
cm_DTC = confusion_matrix(testLabelsHH, y_predict_DTC)
print(classification_report(testLabelsHH, y_predict_DTC))
accuracy_DTC = accuracy_score(testLabelsHH, y_predict_DTC)
print(f"Accuracy: {accuracy_DTC:}")


In [None]:
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
import seaborn as sns

clf5 = GaussianNB()
clf5.fit(trainDataHH, trainLabelsHH)
y_predict_GNB = clf5.predict(testDataHH)
cm_GNB = confusion_matrix(testLabelsHH, y_predict_GNB)
print(classification_report(testLabelsHH, y_predict_GNB))
accuracy_GNB = accuracy_score(testLabelsHH, y_predict_GNB)
print(f"Accuracy: {accuracy_GNB:}")


In [None]:
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
import seaborn as sns

clf6 = SVC(kernel='linear')
clf6.fit(trainDataHH, trainLabelsHH)
y_predict_SVC = clf6.predict(testDataHH)
cm_SVC = confusion_matrix(testLabelsHH, y_predict_SVC)
print(classification_report(testLabelsHH, y_predict_SVC))
accuracy_svc = accuracy_score(testLabelsHH, y_predict_SVC)
print(f"Accuracy: {accuracy_svc:}")


In [None]:
print("All Accuracy of Machine learning Models......")
print(f"SVC Accuracy => {accuracy_svc}")
print(f"Gaussian Naive Bayes Accuracy => {accuracy_GNB}")
print(f"Decision Tree Classifier Accuracy => {accuracy_DTC}")
print(f"Logistic Regression Accuracy => {accuracy_logistic}")
print(f"k-nearest neighbors Accuracy => {accuracy_SNS}")
print(f"Random Forest Classifier Accuracy =>{accuracy_RFC}")

In [None]:
''' HOG 16*16
All Accuracy of Machine learning Models......
SVC Accuracy => 0.878125
Gaussian Naive Bayes Accuracy => 0.871875
Decision Tree Classifier Accuracy => 0.89375
Logistic Regression Accuracy => 0.878125
k-nearest neighbors Accuracy => 0.778125
Random Forest Classifier Accuracy =>0.93125

In [None]:
'''  HOG 32,32
All Accuracy of Machine learning Models......
SVC Accuracy => 0.91875
Gaussian Naive Bayes Accuracy => 0.8875
Decision Tree Classifier Accuracy => 0.934375
Logistic Regression Accuracy => 0.925
k-nearest neighbors Accuracy => 0.803125
Random Forest Classifier Accuracy =>0.953125