In [1]:
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import MinMaxScaler
import numpy as np
import mahotas
import cv2
import os
import h5py
images_per_class       = 800
fixed_size             = tuple((256, 256))
train_path             = "dataset/train"
h5_train_data          = 'output/train_data.h5'
h5_train_labels        = 'output/train_labels.h5'
bins                   = 8

In [2]:
def rgb_bgr(image):
    rgb_img = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    return rgb_img

In [3]:
def bgr_hsv(rgb_img):
    hsv_img = cv2.cvtColor(rgb_img, cv2.COLOR_RGB2HSV)
    return hsv_img

In [4]:
def img_segmentation(rgb_img,hsv_img):
    lower_green = np.array([25,0,20])
    upper_green = np.array([100,255,255])
    healthy_mask = cv2.inRange(hsv_img, lower_green, upper_green)
    result = cv2.bitwise_and(rgb_img,rgb_img, mask=healthy_mask)
    lower_brown = np.array([10,0,10])
    upper_brown = np.array([30,255,255])
    disease_mask = cv2.inRange(hsv_img, lower_brown, upper_brown)
    disease_result = cv2.bitwise_and(rgb_img, rgb_img, mask=disease_mask)
    final_mask = healthy_mask + disease_mask
    final_result = cv2.bitwise_and(rgb_img, rgb_img, mask=final_mask)
    return final_result


In [5]:
# feature-descriptor-2: Haralick Texture
def fd_haralick(image):
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    haralick = mahotas.features.haralick(gray).mean(axis=0)
    return haralick

In [6]:
train_labels = os.listdir(train_path)
train_labels.sort()
print(train_labels)
Haralick_features = []
labels       = []

['diseased', 'healthy']


In [7]:
for training_name in train_labels:
    dir = os.path.join(train_path, training_name)
    current_label = training_name
    for x in range(1, images_per_class + 1):
        file = os.path.join(dir, str(x) + ".jpg")
        image = cv2.imread(file)
        image = cv2.resize(image, fixed_size)
        RGB_BGR = rgb_bgr(image)
        BGR_HSV = bgr_hsv(RGB_BGR)
        IMG_SEGMENT = img_segmentation(RGB_BGR, BGR_HSV) 
        fv_Haralick = fd_haralick(IMG_SEGMENT)
        labels.append(current_label)
        Haralick_features.append(fv_Haralick)
    
    print("[STATUS] processed folder: {}".format(current_label))
    
print("[STATUS] completed Haralick Feature Extraction...")

[STATUS] processed folder: diseased
[STATUS] processed folder: healthy
[STATUS] completed Haralick Feature Extraction...


In [8]:
print("[STATUS] feature vector size {}".format(np.array(Haralick_features).shape))

[STATUS] feature vector size (1600, 13)


In [9]:
print("[STATUS] training Labels {}".format(np.array(labels).shape))

[STATUS] training Labels (1600,)


In [10]:
targetNames = np.unique(labels)
le          = LabelEncoder()
target      = le.fit_transform(labels)
print("[STATUS] training labels encoded...")

[STATUS] training labels encoded...


In [11]:
from sklearn.preprocessing import MinMaxScaler
scaler            = MinMaxScaler(feature_range=(0, 1))
rescaled_features = scaler.fit_transform(Haralick_features)
print("[STATUS] feature vector normalized...")

[STATUS] feature vector normalized...


In [25]:
print("[STATUS] target labels: {}".format(target))
print("[STATUS] target labels shape: {}".format(target.shape))

[STATUS] target labels: [0 0 0 ... 1 1 1]
[STATUS] target labels shape: (1600,)


In [26]:
rescaled_features.shape

(1600, 13)

In [27]:
target.shape

(1600,)

In [28]:
num_trees = 100
test_size = 0.20
seed      = 9

In [29]:
from sklearn.model_selection import train_test_split
(trainDataHaralick, testDataHaralick, trainLabelsHaralick, testLabelsHaralick) = train_test_split(np.array(rescaled_features), np.array(target),test_size=test_size,random_state=seed)
print("[STATUS] splitted train and test data...")
print("Train data  : {}".format(trainDataHaralick.shape))
print("Test data   : {}".format(testDataHaralick.shape)) 

[STATUS] splitted train and test data...
Train data  : (1280, 13)
Test data   : (320, 13)


In [30]:
import seaborn as sns
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report
clf  = RandomForestClassifier(n_estimators=num_trees, random_state=seed)
clf.fit(trainDataHaralick, trainLabelsHaralick)
y_predict_RFC = clf.predict(testDataHaralick)
cm_RFC = confusion_matrix(testLabelsHaralick, y_predict_RFC)
print(classification_report(testLabelsHaralick, y_predict_RFC))
accuracy_rfc= accuracy_score(testLabelsHaralick, y_predict_RFC)
print(f"Accuracy: {accuracy_rfc:}")

              precision    recall  f1-score   support

           0       0.77      0.83      0.80       158
           1       0.82      0.76      0.79       162

    accuracy                           0.79       320
   macro avg       0.80      0.79      0.79       320
weighted avg       0.80      0.79      0.79       320

Accuracy: 0.79375


In [31]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
import seaborn as sns
clf2 = LogisticRegression(random_state=seed) 
#clf2 = LogisticRegression(random_state=seed,max_iter=1000) 
clf2.fit(trainDataHaralick, trainLabelsHaralick)
y_predict_LR = clf2.predict(testDataHaralick)
cm_logistic = confusion_matrix(testLabelsHaralick, y_predict_LR)
print(classification_report(testLabelsHaralick, y_predict_LR))
accuracy_LR= accuracy_score(testLabelsHaralick, y_predict_LR)
print(f"Accuracy: {accuracy_LR:}")

              precision    recall  f1-score   support

           0       0.70      0.70      0.70       158
           1       0.70      0.70      0.70       162

    accuracy                           0.70       320
   macro avg       0.70      0.70      0.70       320
weighted avg       0.70      0.70      0.70       320

Accuracy: 0.7


In [32]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
import seaborn as sns
clf3 = KNeighborsClassifier(n_neighbors=5)
clf3.fit(trainDataHaralick, trainLabelsHaralick)
y_predict_KNN = clf3.predict(testDataHaralick)
cm_KNN = confusion_matrix(testLabelsHaralick, y_predict_KNN)
print(classification_report(testLabelsHaralick, y_predict_KNN))
accuracy_KNN= accuracy_score(testLabelsHaralick, y_predict_KNN)
print(f"Accuracy: {accuracy_KNN:}")

              precision    recall  f1-score   support

           0       0.76      0.77      0.76       158
           1       0.77      0.77      0.77       162

    accuracy                           0.77       320
   macro avg       0.77      0.77      0.77       320
weighted avg       0.77      0.77      0.77       320

Accuracy: 0.765625


In [33]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
import seaborn as sns

clf4 = DecisionTreeClassifier(random_state=seed)
clf4.fit(trainDataHaralick, trainLabelsHaralick)
y_predict_DTC= clf4.predict(testDataHaralick)
cm_DTC = confusion_matrix(testLabelsHaralick, y_predict_DTC)
print(classification_report(testLabelsHaralick, y_predict_DTC))
accuracy_DTC= accuracy_score(testLabelsHaralick, y_predict_DTC)
print(f"Accuracy: {accuracy_DTC:}")

              precision    recall  f1-score   support

           0       0.76      0.75      0.75       158
           1       0.76      0.77      0.76       162

    accuracy                           0.76       320
   macro avg       0.76      0.76      0.76       320
weighted avg       0.76      0.76      0.76       320

Accuracy: 0.75625


In [34]:
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
import seaborn as sns

clf5 = GaussianNB()
clf5.fit(trainDataHaralick, trainLabelsHaralick)
y_predict_GNB = clf5.predict(testDataHaralick)
cm_GNB = confusion_matrix(testLabelsHaralick, y_predict_GNB)
print(classification_report(testLabelsHaralick, y_predict_GNB))
accuracy_GNB= accuracy_score(testLabelsHaralick, y_predict_GNB)
print(f"Accuracy: {accuracy_GNB:}")

              precision    recall  f1-score   support

           0       0.68      0.63      0.66       158
           1       0.66      0.71      0.69       162

    accuracy                           0.67       320
   macro avg       0.67      0.67      0.67       320
weighted avg       0.67      0.67      0.67       320

Accuracy: 0.671875


In [35]:
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
import seaborn as sns

clf6 = SVC(kernel='linear')
clf6.fit(trainDataHaralick, trainLabelsHaralick)
y_predict_SVC = clf6.predict(testDataHaralick)
cm_SVC = confusion_matrix(testLabelsHaralick, y_predict_SVC)
print(classification_report(testLabelsHaralick, y_predict_SVC))
accuracy_SVC= accuracy_score(testLabelsHaralick, y_predict_SVC)
print(f"Accuracy: {accuracy_SVC:}")

              precision    recall  f1-score   support

           0       0.75      0.71      0.73       158
           1       0.73      0.77      0.75       162

    accuracy                           0.74       320
   macro avg       0.74      0.74      0.74       320
weighted avg       0.74      0.74      0.74       320

Accuracy: 0.740625


In [36]:
print("All Accuracy of Machine learning Models......")
print(f"SVC Accuracy => {accuracy_SVC}")
print(f"Gaussian Naive Bayes Accuracy => {accuracy_GNB}")
print(f"Decision Tree Classifier Accuracy => {accuracy_DTC}")
print(f"Logistic Regression Accuracy => {accuracy_LR}")
print(f"k-nearest neighbors Accuracy => {accuracy_KNN}")
print(f"Random Forest Classifier Accuracy =>{accuracy_rfc}")

All Accuracy of Machine learning Models......
SVC Accuracy => 0.740625
Gaussian Naive Bayes Accuracy => 0.671875
Decision Tree Classifier Accuracy => 0.75625
Logistic Regression Accuracy => 0.7
k-nearest neighbors Accuracy => 0.765625
Random Forest Classifier Accuracy =>0.79375


In [37]:
'''
For The Haralick features the Results are
All Accuracy of Machine learning Models......
SVC Accuracy => 0.740625
Gaussian Naive Bayes Accuracy => 0.671875
Decision Tree Classifier Accuracy => 0.75625
Logistic Regression Accuracy => 0.7
k-nearest neighbors Accuracy => 0.765625
Random Forest Classifier Accuracy =>0.79375

SyntaxError: incomplete input (2737229128.py, line 1)

All Accuracy of Machine learning Models......
SVC Accuracy => 0.740625
Gaussian Naive Bayes Accuracy => 0.671875
Decision Tree Classifier Accuracy => 0.75625
Logistic Regression Accuracy => 0.7
k-nearest neighbors Accuracy => 0.765625
Random Forest Classifier Accuracy =>0.79375
