In [25]:
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import MinMaxScaler
import numpy as np
import mahotas
import cv2
import os
import h5py
images_per_class       = 800
fixed_size             = tuple((256, 256))
train_path             = "dataset/train"
h5_train_data          = 'output/train_data.h5'
h5_train_labels        = 'output/train_labels.h5'
bins                   = 8

In [26]:
def rgb_bgr(image):
    rgb_img = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    return rgb_img

In [27]:
def bgr_hsv(rgb_img):
    hsv_img = cv2.cvtColor(rgb_img, cv2.COLOR_RGB2HSV)
    return hsv_img

In [28]:
def img_segmentation(rgb_img,hsv_img):
    lower_green = np.array([25,0,20])
    upper_green = np.array([100,255,255])
    healthy_mask = cv2.inRange(hsv_img, lower_green, upper_green)
    result = cv2.bitwise_and(rgb_img,rgb_img, mask=healthy_mask)
    lower_brown = np.array([10,0,10])
    upper_brown = np.array([30,255,255])
    disease_mask = cv2.inRange(hsv_img, lower_brown, upper_brown)
    disease_result = cv2.bitwise_and(rgb_img, rgb_img, mask=disease_mask)
    final_mask = healthy_mask + disease_mask
    final_result = cv2.bitwise_and(rgb_img, rgb_img, mask=final_mask)
    return final_result


In [29]:
# feature-descriptor-1: Hu Moments
def fd_hu_moments(image):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    feature = cv2.HuMoments(cv2.moments(image)).flatten()
    return feature

In [30]:
train_labels = os.listdir(train_path)
train_labels.sort()
print(train_labels)
Hu_features = []
labels       = []

['diseased', 'healthy']


In [31]:
from skimage.feature import hog
from skimage import exposure
for training_name in train_labels:
    dir = os.path.join(train_path, training_name)
    current_label = training_name
    for x in range(1, images_per_class + 1):
        file = os.path.join(dir, str(x) + ".jpg")
        image = cv2.imread(file)
        image = cv2.resize(image, fixed_size)
        RGB_BGR = rgb_bgr(image)
        BGR_HSV = bgr_hsv(RGB_BGR)
        IMG_SEGMENT = img_segmentation(RGB_BGR, BGR_HSV) 
        fv_hu_moments = fd_hu_moments(IMG_SEGMENT)
        labels.append(current_label)
        Hu_features.append(fv_hu_moments)
    
    print("[STATUS] processed folder: {}".format(current_label))
    
print("[STATUS] completed Hu_Monents Feature Extraction...")

[STATUS] processed folder: diseased
[STATUS] processed folder: healthy
[STATUS] completed Hu_Monents Feature Extraction...


In [32]:
print("[STATUS] feature vector size {}".format(np.array(Hu_features).shape))

[STATUS] feature vector size (1600, 7)


In [33]:
print("[STATUS] training Labels {}".format(np.array(labels).shape))

[STATUS] training Labels (1600,)


In [34]:
targetNames = np.unique(labels)
le          = LabelEncoder()
target      = le.fit_transform(labels)
print("[STATUS] training labels encoded...")

[STATUS] training labels encoded...


In [35]:
from sklearn.preprocessing import MinMaxScaler
scaler            = MinMaxScaler(feature_range=(0, 1))
rescaled_features = scaler.fit_transform(Hu_features)
print("[STATUS] feature vector normalized...")

[STATUS] feature vector normalized...


In [36]:
print("[STATUS] target labels: {}".format(target))
print("[STATUS] target labels shape: {}".format(target.shape))

[STATUS] target labels: [0 0 0 ... 1 1 1]
[STATUS] target labels shape: (1600,)


In [37]:
rescaled_features.shape

(1600, 7)

In [38]:
target.shape

(1600,)

In [39]:
num_trees = 100
test_size = 0.20
seed      = 9

In [40]:
from sklearn.model_selection import train_test_split
(trainDataHu, testDataHu, trainLabelsHu, testLabelsHu) = train_test_split(np.array(rescaled_features), np.array(target),test_size=test_size,random_state=seed)
print("[STATUS] splitted train and test data...")
print("Train data  : {}".format(trainDataHu.shape))
print("Test data   : {}".format(testDataHu.shape)) 

[STATUS] splitted train and test data...
Train data  : (1280, 7)
Test data   : (320, 7)


In [41]:
import seaborn as sns
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report
clf  = RandomForestClassifier(n_estimators=num_trees, random_state=seed)
clf.fit(trainDataHu, trainLabelsHu)
y_predict_RFC = clf.predict(testDataHu)
cm_RFC = confusion_matrix(testLabelsHu, y_predict_RFC)
print(classification_report(testLabelsHu, y_predict_RFC))
accuracy_rfc= accuracy_score(testLabelsHu, y_predict_RFC)
print(f"Accuracy: {accuracy_rfc:}")

              precision    recall  f1-score   support

           0       0.70      0.74      0.72       158
           1       0.73      0.69      0.71       162

    accuracy                           0.72       320
   macro avg       0.72      0.72      0.72       320
weighted avg       0.72      0.72      0.72       320

Accuracy: 0.715625


In [42]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
import seaborn as sns

#clf2 = LogisticRegression(random_state=seed) 
clf2 = LogisticRegression(random_state=seed,max_iter=1000) 
clf2.fit(trainDataHu, trainLabelsHu)
y_predict_LR = clf2.predict(testDataHu)
cm_logistic = confusion_matrix(testLabelsHu, y_predict_LR)
print(classification_report(testLabelsHu, y_predict_LR))
accuracy_LR= accuracy_score(testLabelsHu, y_predict_LR)
print(f"Accuracy: {accuracy_LR:}")

              precision    recall  f1-score   support

           0       0.63      0.74      0.68       158
           1       0.69      0.57      0.62       162

    accuracy                           0.65       320
   macro avg       0.66      0.65      0.65       320
weighted avg       0.66      0.65      0.65       320

Accuracy: 0.653125


In [43]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
import seaborn as sns
clf3 = KNeighborsClassifier(n_neighbors=5)
clf3.fit(trainDataHu, trainLabelsHu)
y_predict_KNN = clf3.predict(testDataHu)
cm_KNN = confusion_matrix(testLabelsHu, y_predict_KNN)
print(classification_report(testLabelsHu, y_predict_KNN))
accuracy_KNN= accuracy_score(testLabelsHu, y_predict_KNN)
print(f"Accuracy: {accuracy_KNN:}")

              precision    recall  f1-score   support

           0       0.70      0.72      0.71       158
           1       0.72      0.70      0.71       162

    accuracy                           0.71       320
   macro avg       0.71      0.71      0.71       320
weighted avg       0.71      0.71      0.71       320

Accuracy: 0.709375


In [44]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
import seaborn as sns

clf4 = DecisionTreeClassifier(random_state=seed)
clf4.fit(trainDataHu, trainLabelsHu)
y_predict_DTC= clf4.predict(testDataHu)
cm_DTC = confusion_matrix(testLabelsHu, y_predict_DTC)
print(classification_report(testLabelsHu, y_predict_DTC))
accuracy_DTC= accuracy_score(testLabelsHu, y_predict_DTC)
print(f"Accuracy: {accuracy_DTC:}")

              precision    recall  f1-score   support

           0       0.66      0.63      0.64       158
           1       0.65      0.68      0.66       162

    accuracy                           0.65       320
   macro avg       0.65      0.65      0.65       320
weighted avg       0.65      0.65      0.65       320

Accuracy: 0.653125


In [45]:
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
import seaborn as sns

clf5 = GaussianNB()
clf5.fit(trainDataHu, trainLabelsHu)
y_predict_GNB = clf5.predict(testDataHu)
cm_GNB = confusion_matrix(testLabelsHu, y_predict_GNB)
print(classification_report(testLabelsHu, y_predict_GNB))
accuracy_GNB= accuracy_score(testLabelsHu, y_predict_GNB)
print(f"Accuracy: {accuracy_GNB:}")

              precision    recall  f1-score   support

           0       0.52      1.00      0.69       158
           1       1.00      0.11      0.20       162

    accuracy                           0.55       320
   macro avg       0.76      0.56      0.44       320
weighted avg       0.76      0.55      0.44       320

Accuracy: 0.55


In [46]:
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
import seaborn as sns

clf6 = SVC(kernel='linear')
clf6.fit(trainDataHu, trainLabelsHu)
y_predict_SVC = clf6.predict(testDataHu)
cm_SVC = confusion_matrix(testLabelsHu, y_predict_SVC)
print(classification_report(testLabelsHu, y_predict_SVC))
accuracy_SVC= accuracy_score(testLabelsHu, y_predict_SVC)
print(f"Accuracy: {accuracy_SVC:}")

              precision    recall  f1-score   support

           0       0.62      0.87      0.72       158
           1       0.79      0.48      0.59       162

    accuracy                           0.67       320
   macro avg       0.71      0.67      0.66       320
weighted avg       0.71      0.67      0.66       320

Accuracy: 0.671875


In [47]:
print("All Accuracy of Machine learning Models......")
print(f"SVC Accuracy => {accuracy_SVC}")
print(f"Gaussian Naive Bayes Accuracy => {accuracy_GNB}")
print(f"Decision Tree Classifier Accuracy => {accuracy_DTC}")
print(f"Logistic Regression Accuracy => {accuracy_LR}")
print(f"k-nearest neighbors Accuracy => {accuracy_KNN}")
print(f"Random Forest Classifier Accuracy =>{accuracy_rfc}")

All Accuracy of Machine learning Models......
SVC Accuracy => 0.671875
Gaussian Naive Bayes Accuracy => 0.55
Decision Tree Classifier Accuracy => 0.653125
Logistic Regression Accuracy => 0.653125
k-nearest neighbors Accuracy => 0.709375
Random Forest Classifier Accuracy =>0.715625


In [48]:
'''All Accuracy of Machine learning Models......
SVC Accuracy => 0.671875
Gaussian Naive Bayes Accuracy => 0.55
Decision Tree Classifier Accuracy => 0.653125
Logistic Regression Accuracy => 0.653125
k-nearest neighbors Accuracy => 0.709375
Random Forest Classifier Accuracy =>0.715625

SyntaxError: incomplete input (1391433233.py, line 1)

For the Feature of Hu-Moments
All Accuracy of Machine learning Models......
SVC Accuracy => 0.671875
Gaussian Naive Bayes Accuracy => 0.55
Decision Tree Classifier Accuracy => 0.653125
Logistic Regression Accuracy => 0.653125
k-nearest neighbors Accuracy => 0.709375
Random Forest Classifier Accuracy =>0.715625