In [2]:
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import MinMaxScaler
import numpy as np
import mahotas
import cv2
import os
import h5py
images_per_class       = 800
fixed_size             = tuple((256, 256))
train_path             = "dataset/train"
h5_train_data          = 'output/train_data.h5'
h5_train_labels        = 'output/train_labels.h5'
bins                   = 8

In [3]:
def rgb_bgr(image):
    rgb_img = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    return rgb_img

In [4]:
def bgr_hsv(rgb_img):
    hsv_img = cv2.cvtColor(rgb_img, cv2.COLOR_RGB2HSV)
    return hsv_img

In [5]:
def img_segmentation(rgb_img,hsv_img):
    lower_green = np.array([25,0,20])
    upper_green = np.array([100,255,255])
    healthy_mask = cv2.inRange(hsv_img, lower_green, upper_green)
    result = cv2.bitwise_and(rgb_img,rgb_img, mask=healthy_mask)
    lower_brown = np.array([10,0,10])
    upper_brown = np.array([30,255,255])
    disease_mask = cv2.inRange(hsv_img, lower_brown, upper_brown)
    disease_result = cv2.bitwise_and(rgb_img, rgb_img, mask=disease_mask)
    final_mask = healthy_mask + disease_mask
    final_result = cv2.bitwise_and(rgb_img, rgb_img, mask=final_mask)
    return final_result


In [6]:
train_labels = os.listdir(train_path)
train_labels.sort()
print(train_labels)
Hog_features = []
labels       = []

['diseased', 'healthy']


In [7]:
from skimage.feature import hog
from skimage import exposure
for training_name in train_labels:
    dir = os.path.join(train_path, training_name)
    current_label = training_name
    for x in range(1, images_per_class + 1):
        file = os.path.join(dir, str(x) + ".jpg")
        image = cv2.imread(file)
        image = cv2.resize(image, fixed_size)
        RGB_BGR = rgb_bgr(image)
        BGR_HSV = bgr_hsv(RGB_BGR)
        IMG_SEGMENT = img_segmentation(RGB_BGR, BGR_HSV) 
        gray_image = cv2.cvtColor(IMG_SEGMENT, cv2.COLOR_BGR2GRAY)
        fd_hog = hog(gray_image, orientations=9, pixels_per_cell=(8,8), cells_per_block=(2, 2), transform_sqrt=True, block_norm='L2-Hys')
        labels.append(current_label)
        Hog_features.append(fd_hog)
    
    print("[STATUS] processed folder: {}".format(current_label))
    
print("[STATUS] completed Hog Feature Extraction...")

[STATUS] processed folder: diseased
[STATUS] processed folder: healthy
[STATUS] completed Hog Feature Extraction...


In [8]:
print("[STATUS] feature vector size {}".format(np.array(Hog_features).shape))

[STATUS] feature vector size (1600, 34596)


In [9]:
print("[STATUS] training Labels {}".format(np.array(labels).shape))

[STATUS] training Labels (1600,)


In [10]:
targetNames = np.unique(labels)
le          = LabelEncoder()
target      = le.fit_transform(labels)
print("[STATUS] training labels encoded...")

[STATUS] training labels encoded...


In [11]:
from sklearn.preprocessing import MinMaxScaler
scaler            = MinMaxScaler(feature_range=(0, 1))
rescaled_features = scaler.fit_transform(Hog_features)
print("[STATUS] feature vector normalized...")

[STATUS] feature vector normalized...


In [12]:
print("[STATUS] target labels: {}".format(target))
print("[STATUS] target labels shape: {}".format(target.shape))

[STATUS] target labels: [0 0 0 ... 1 1 1]
[STATUS] target labels shape: (1600,)


In [13]:
rescaled_features.shape

(1600, 34596)

In [14]:
target.shape

(1600,)

In [15]:
num_trees = 100
test_size = 0.20
seed      = 9

In [16]:
from sklearn.model_selection import train_test_split
(trainDataHog, testDataHog, trainLabelsHog, testLabelsHog) = train_test_split(np.array(rescaled_features), np.array(target),test_size=test_size,random_state=seed)
print("[STATUS] splitted train and test data...")
print("Train data  : {}".format(trainDataHog.shape))
print("Test data   : {}".format(testDataHog.shape)) 

[STATUS] splitted train and test data...
Train data  : (1280, 34596)
Test data   : (320, 34596)


In [17]:
import seaborn as sns
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report
clf  = RandomForestClassifier(n_estimators=num_trees, random_state=seed)
clf.fit(trainDataHog, trainLabelsHog)
y_predict_RFC = clf.predict(testDataHog)
cm_RFC = confusion_matrix(testLabelsHog, y_predict_RFC)
print(classification_report(testLabelsHog, y_predict_RFC))
accuracy_RFC = accuracy_score(testLabelsHog, y_predict_RFC)
print(f"Accuracy: {accuracy_RFC:}")

              precision    recall  f1-score   support

           0       0.79      0.84      0.82       158
           1       0.84      0.78      0.81       162

    accuracy                           0.81       320
   macro avg       0.81      0.81      0.81       320
weighted avg       0.81      0.81      0.81       320

Accuracy: 0.8125


In [18]:
testLabelsHog.shape

(320,)

In [19]:
y_predict_RFC.shape

(320,)

In [20]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
import seaborn as sns

#clf2 = LogisticRegression(random_state=seed) 
clf2 = LogisticRegression(random_state=seed,max_iter=1000) 

clf2.fit(trainDataHog, trainLabelsHog)

y_predict_logistic = clf2.predict(testDataHog)

cm_logistic = confusion_matrix(testLabelsHog, y_predict_logistic)

print(classification_report(testLabelsHog, y_predict_logistic))

accuracy_logistic = accuracy_score(testLabelsHog, y_predict_logistic)
print(f"Accuracy: {accuracy_logistic:}")


              precision    recall  f1-score   support

           0       0.83      0.82      0.82       158
           1       0.82      0.84      0.83       162

    accuracy                           0.83       320
   macro avg       0.83      0.83      0.83       320
weighted avg       0.83      0.83      0.83       320

Accuracy: 0.828125


In [21]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
import seaborn as sns

clf3 = KNeighborsClassifier(n_neighbors=5) 

clf3.fit(trainDataHog, trainLabelsHog)

y_predict_SNS = clf3.predict(testDataHog)

cm_SNS = confusion_matrix(testLabelsHog, y_predict_SNS)

print(classification_report(testLabelsHog, y_predict_SNS))

accuracy_SNS = accuracy_score(testLabelsHog, y_predict_SNS)
print(f"Accuracy: {accuracy_SNS:}")


              precision    recall  f1-score   support

           0       0.89      0.59      0.71       158
           1       0.70      0.93      0.80       162

    accuracy                           0.76       320
   macro avg       0.79      0.76      0.75       320
weighted avg       0.79      0.76      0.76       320

Accuracy: 0.7625


In [22]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
import seaborn as sns

clf4 = DecisionTreeClassifier(random_state=seed)

clf4.fit(trainDataHog, trainLabelsHog)

y_predict_DTC = clf4.predict(testDataHog)

cm_DTC = confusion_matrix(testLabelsHog, y_predict_DTC)

print(classification_report(testLabelsHog, y_predict_DTC))
accuracy_DTC = accuracy_score(testLabelsHog, y_predict_DTC)
print(f"Accuracy: {accuracy_DTC:}")


              precision    recall  f1-score   support

           0       0.65      0.72      0.68       158
           1       0.69      0.62      0.66       162

    accuracy                           0.67       320
   macro avg       0.67      0.67      0.67       320
weighted avg       0.67      0.67      0.67       320

Accuracy: 0.66875


In [23]:
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
import seaborn as sns

clf5 = GaussianNB()

clf5.fit(trainDataHog, trainLabelsHog)

y_predict_GNB = clf5.predict(testDataHog)

cm_GNB = confusion_matrix(testLabelsHog, y_predict_GNB)

print(classification_report(testLabelsHog, y_predict_GNB))

accuracy_GNB = accuracy_score(testLabelsHog, y_predict_GNB)
print(f"Accuracy: {accuracy_GNB:}")


              precision    recall  f1-score   support

           0       0.70      0.54      0.61       158
           1       0.63      0.77      0.70       162

    accuracy                           0.66       320
   macro avg       0.67      0.66      0.65       320
weighted avg       0.67      0.66      0.65       320

Accuracy: 0.659375


In [24]:
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
import seaborn as sns

clf6 = SVC(kernel='linear')

clf6.fit(trainDataHog, trainLabelsHog)

y_predict_SVC = clf6.predict(testDataHog)

cm_SVC = confusion_matrix(testLabelsHog, y_predict_SVC)

print(classification_report(testLabelsHog, y_predict_SVC))

accuracy_svc = accuracy_score(testLabelsHog, y_predict_SVC)
print(f"Accuracy: {accuracy_svc:}")


              precision    recall  f1-score   support

           0       0.83      0.80      0.82       158
           1       0.81      0.84      0.83       162

    accuracy                           0.82       320
   macro avg       0.82      0.82      0.82       320
weighted avg       0.82      0.82      0.82       320

Accuracy: 0.821875


In [25]:
print("All Accuracy of Machine learning Models......")
print(f"SVC Accuracy => {accuracy_svc}")
print(f"Gaussian Naive Bayes Accuracy => {accuracy_GNB}")
print(f"Decision Tree Classifier Accuracy => {accuracy_DTC}")
print(f"Logistic Regression Accuracy => {accuracy_logistic}")
print(f"k-nearest neighbors Accuracy => {accuracy_SNS}")
print(f"Random Forest Classifier Accuracy =>{accuracy_RFC}")

All Accuracy of Machine learning Models......
SVC Accuracy => 0.821875
Gaussian Naive Bayes Accuracy => 0.659375
Decision Tree Classifier Accuracy => 0.66875
Logistic Regression Accuracy => 0.828125
k-nearest neighbors Accuracy => 0.7625
Random Forest Classifier Accuracy =>0.8125


In [26]:
'''For The Hog Feature Extraction If Pixel Size is 16*16 vecotr size is (1600, 8100) and  the efficencies are following
All Accuracy of Machine learning Models......
SVC Accuracy => 0.840625
Gaussian Naive Bayes Accuracy => 0.678125
Decision Tree Classifier Accuracy => 0.721875
Logistic Regression Accuracy => 0.853125
k-nearest neighbors Accuracy => 0.76875
Random Forest Classifier Accuracy =>0.834375

For The Hog Feature Extraction If Pixel Size is 32*32 vecotr size is (1600, 1764) and  the efficencies are following
SVC Accuracy => 0.834375
Gaussian Naive Bayes Accuracy => 0.7375
Decision Tree Classifier Accuracy => 0.678125
Logistic Regression Accuracy => 0.84375
k-nearest neighbors Accuracy => 0.796875
Random Forest Classifier Accuracy =>0.8375

SyntaxError: incomplete input (2131546193.py, line 1)

For The Hog Feature Extraction If Pixel Size is 16*16 vecotr size is (1600, 8100) and  the efficencies are following
All Accuracy of Machine learning Models......
SVC Accuracy => 0.840625
Gaussian Naive Bayes Accuracy => 0.678125
Decision Tree Classifier Accuracy => 0.721875
Logistic Regression Accuracy => 0.853125
k-nearest neighbors Accuracy => 0.76875
Random Forest Classifier Accuracy =>0.834375

For The Hog Feature Extraction If Pixel Size is 32*32 vecotr size is (1600, 1764) and  the efficencies are following
SVC Accuracy => 0.834375
Gaussian Naive Bayes Accuracy => 0.7375
Decision Tree Classifier Accuracy => 0.678125
Logistic Regression Accuracy => 0.84375
k-nearest neighbors Accuracy => 0.796875
Random Forest Classifier Accuracy =>0.8375

For The Hog Feature Extraction If Pixel Size is 8*8 vecotr size is (1600, 34596) and  the efficencies are following
All Accuracy of Machine learning Models......
1.SVC Accuracy => 0.821875
2.Gaussian Naive Bayes Accuracy => 0.659375
3.Decision Tree Classifier Accuracy => 0.66875
4.Logistic Regression Accuracy => 0.828125
5.k-nearest neighbors Accuracy => 0.7625
6.Random Forest Classifier Accuracy =>0.8125

In [None]:
'''For The Hog Feature Extraction If Pixel Size is 16*16 vecotr size is (1600, 8100) and  the efficencies are following
All Accuracy of Machine learning Models......
1.SVC Accuracy => 0.840625
2.Gaussian Naive Bayes Accuracy => 0.678125
3.Decision Tree Classifier Accuracy => 0.721875
4.Logistic Regression Accuracy => 0.853125
5.k-nearest neighbors Accuracy => 0.76875
6.Random Forest Classifier Accuracy =>0.834375

For The Hog Feature Extraction If Pixel Size is 32*32 vecotr size is (1600, 1764) and  the efficencies are following
1.SVC Accuracy => 0.834375
2.Gaussian Naive Bayes Accuracy => 0.7375
3.Decision Tree Classifier Accuracy => 0.678125
4.Logistic Regression Accuracy => 0.84375
5.k-nearest neighbors Accuracy => 0.796875
6.Random Forest Classifier Accuracy =>0.8375

For The Hog Feature Extraction If Pixel Size is 8*8 vecotr size is (1600, 34596) and  the efficencies are following
All Accuracy of Machine learning Models......
1.SVC Accuracy => 0.821875
2.Gaussian Naive Bayes Accuracy => 0.659375
3.Decision Tree Classifier Accuracy => 0.66875
4.Logistic Regression Accuracy => 0.828125
5.k-nearest neighbors Accuracy => 0.7625
6.Random Forest Classifier Accuracy =>0.8125