In [2]:
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import MinMaxScaler
import numpy as np
import mahotas
import cv2
import os
import h5py
images_per_class       = 800
fixed_size             = tuple((256, 256))
train_path             = "dataset/train"
h5_train_data          = 'output/train_data.h5'
h5_train_labels        = 'output/train_labels.h5'
bins                   = 8

In [3]:
def rgb_bgr(image):
    rgb_img = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    return rgb_img

In [4]:
def bgr_hsv(rgb_img):
    hsv_img = cv2.cvtColor(rgb_img, cv2.COLOR_RGB2HSV)
    return hsv_img

In [5]:
def img_segmentation(rgb_img,hsv_img):
    lower_green = np.array([25,0,20])
    upper_green = np.array([100,255,255])
    healthy_mask = cv2.inRange(hsv_img, lower_green, upper_green)
    result = cv2.bitwise_and(rgb_img,rgb_img, mask=healthy_mask)
    lower_brown = np.array([10,0,10])
    upper_brown = np.array([30,255,255])
    disease_mask = cv2.inRange(hsv_img, lower_brown, upper_brown)
    disease_result = cv2.bitwise_and(rgb_img, rgb_img, mask=disease_mask)
    final_mask = healthy_mask + disease_mask
    final_result = cv2.bitwise_and(rgb_img, rgb_img, mask=final_mask)
    return final_result


In [6]:
# feature-descriptor-3: Color Histogram
def fd_histogram(image, mask=None):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    hist  = cv2.calcHist([image], [0, 1, 2], None, [bins, bins, bins], [0, 256, 0, 256, 0, 256])
    cv2.normalize(hist, hist)
    return hist.flatten()

In [7]:
train_labels = os.listdir(train_path)
train_labels.sort()
print(train_labels)
Histogram_features = []
labels       = []

['diseased', 'healthy']


In [8]:
from skimage import exposure
for training_name in train_labels:
    dir = os.path.join(train_path, training_name)
    current_label = training_name
    for x in range(1, images_per_class + 1):
        file = os.path.join(dir, str(x) + ".jpg")
        image = cv2.imread(file)
        image = cv2.resize(image, fixed_size)
        RGB_BGR = rgb_bgr(image)
        BGR_HSV = bgr_hsv(RGB_BGR)
        IMG_SEGMENT = img_segmentation(RGB_BGR, BGR_HSV) 
        fv_Histogram= fd_histogram(IMG_SEGMENT)
        labels.append(current_label)
        Histogram_features.append(fv_Histogram)
    
    print("[STATUS] processed folder: {}".format(current_label))
    
print("[STATUS] completed Histogram Feature Extraction...")

[STATUS] processed folder: diseased
[STATUS] processed folder: healthy
[STATUS] completed Histogram Feature Extraction...


In [9]:
print("[STATUS] feature vector size {}".format(np.array(Histogram_features).shape))

[STATUS] feature vector size (1600, 512)


In [10]:
print("[STATUS] training Labels {}".format(np.array(labels).shape))

[STATUS] training Labels (1600,)


In [11]:
targetNames = np.unique(labels)
le          = LabelEncoder()
target      = le.fit_transform(labels)
print("[STATUS] training labels encoded...")

[STATUS] training labels encoded...


In [12]:
from sklearn.preprocessing import MinMaxScaler
scaler            = MinMaxScaler(feature_range=(0, 1))
rescaled_features = scaler.fit_transform(Histogram_features)
print("[STATUS] feature vector normalized...")

[STATUS] feature vector normalized...


In [34]:
rescaled_features

array([[0.89868999, 0.03427632, 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.97074117, 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.96591603, 0.01332912, 0.02549925, ..., 0.        , 0.        ,
        0.        ],
       ...,
       [0.9815963 , 0.01239635, 0.03557221, ..., 0.        , 0.        ,
        0.        ],
       [0.96860402, 0.01473364, 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.9613786 , 0.0379812 , 0.09687979, ..., 0.        , 0.        ,
        0.        ]])

In [13]:
print("[STATUS] target labels: {}".format(target))
print("[STATUS] target labels shape: {}".format(target.shape))

[STATUS] target labels: [0 0 0 ... 1 1 1]
[STATUS] target labels shape: (1600,)


In [14]:
rescaled_features.shape

(1600, 512)

In [15]:
target.shape

(1600,)

In [16]:
num_trees = 100
test_size = 0.20
seed      = 9

In [17]:
from sklearn.model_selection import train_test_split
(trainDataHistogram, testDataHistogram, trainLabelsHistogram, testLabelsHistogram) = train_test_split(np.array(rescaled_features), np.array(target),test_size=test_size,random_state=seed)
print("[STATUS] splitted train and test data...")
print("Train data  : {}".format(trainDataHistogram.shape))
print("Test data   : {}".format(testDataHistogram.shape)) 

[STATUS] splitted train and test data...
Train data  : (1280, 512)
Test data   : (320, 512)


In [33]:
type(testLabelsHistogram)

numpy.ndarray

In [18]:
import seaborn as sns
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report
clf  = RandomForestClassifier(n_estimators=num_trees, random_state=seed)
clf.fit(trainDataHistogram, trainLabelsHistogram)
y_predict_RFC = clf.predict(testDataHistogram)
cm_RFC = confusion_matrix(testLabelsHistogram, y_predict_RFC)
print(classification_report(testLabelsHistogram, y_predict_RFC))
accuracy_rfc= accuracy_score(testLabelsHistogram, y_predict_RFC)
print(f"Accuracy: {accuracy_rfc:}")

              precision    recall  f1-score   support

           0       0.98      0.97      0.97       158
           1       0.97      0.98      0.98       162

    accuracy                           0.97       320
   macro avg       0.98      0.97      0.97       320
weighted avg       0.98      0.97      0.97       320

Accuracy: 0.975


In [19]:
y_predict_RFC

array([1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1,
       0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0,
       1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0,
       1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0,
       0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1,
       0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1,
       0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1,
       0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0,
       0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1,
       0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1,
       0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1,
       1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1,
       1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1,
       1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0,

In [20]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
import seaborn as sns

#clf2 = LogisticRegression(random_state=seed) 
clf2 = LogisticRegression(random_state=seed,max_iter=1000) 
clf2.fit(trainDataHistogram, trainLabelsHistogram)
y_predict_LR = clf2.predict(testDataHistogram)
cm_logistic = confusion_matrix(testLabelsHistogram, y_predict_LR)
print(classification_report(testLabelsHistogram, y_predict_LR))
accuracy_LR= accuracy_score(testLabelsHistogram, y_predict_LR)
print(f"Accuracy: {accuracy_LR:}")

              precision    recall  f1-score   support

           0       0.97      0.92      0.94       158
           1       0.92      0.98      0.95       162

    accuracy                           0.95       320
   macro avg       0.95      0.95      0.95       320
weighted avg       0.95      0.95      0.95       320

Accuracy: 0.946875


In [21]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
import seaborn as sns
clf3 = KNeighborsClassifier(n_neighbors=5)
clf3.fit(trainDataHistogram, trainLabelsHistogram)
y_predict_KNN = clf3.predict(testDataHistogram)
cm_KNN = confusion_matrix(testLabelsHistogram, y_predict_KNN)
print(classification_report(testLabelsHistogram, y_predict_KNN))
accuracy_KNN= accuracy_score(testLabelsHistogram, y_predict_KNN)
print(f"Accuracy: {accuracy_KNN:}")

              precision    recall  f1-score   support

           0       0.95      0.91      0.93       158
           1       0.91      0.95      0.93       162

    accuracy                           0.93       320
   macro avg       0.93      0.93      0.93       320
weighted avg       0.93      0.93      0.93       320

Accuracy: 0.928125


In [22]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
import seaborn as sns

clf4 = DecisionTreeClassifier(random_state=seed)
clf4.fit(trainDataHistogram, trainLabelsHistogram)
y_predict_DTC= clf4.predict(testDataHistogram)
cm_DTC = confusion_matrix(testLabelsHistogram, y_predict_DTC)
print(classification_report(testLabelsHistogram, y_predict_DTC))
accuracy_DTC= accuracy_score(testLabelsHistogram, y_predict_DTC)
print(f"Accuracy: {accuracy_DTC:}")

              precision    recall  f1-score   support

           0       0.91      0.94      0.93       158
           1       0.94      0.91      0.93       162

    accuracy                           0.93       320
   macro avg       0.93      0.93      0.93       320
weighted avg       0.93      0.93      0.93       320

Accuracy: 0.928125


In [23]:
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
import seaborn as sns

clf5 = GaussianNB()
clf5.fit(trainDataHistogram, trainLabelsHistogram)
y_predict_GNB = clf5.predict(testDataHistogram)
cm_GNB = confusion_matrix(testLabelsHistogram, y_predict_GNB)
print(classification_report(testLabelsHistogram, y_predict_GNB))
accuracy_GNB= accuracy_score(testLabelsHistogram, y_predict_GNB)
print(f"Accuracy: {accuracy_GNB:}")

              precision    recall  f1-score   support

           0       0.99      0.70      0.82       158
           1       0.77      0.99      0.87       162

    accuracy                           0.85       320
   macro avg       0.88      0.85      0.84       320
weighted avg       0.88      0.85      0.84       320

Accuracy: 0.846875


In [24]:
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
import seaborn as sns

clf6 = SVC(kernel='linear')
clf6.fit(trainDataHistogram, trainLabelsHistogram)
y_predict_SVC = clf6.predict(testDataHistogram)
cm_SVC = confusion_matrix(testLabelsHistogram, y_predict_SVC)
print(classification_report(testLabelsHistogram, y_predict_SVC))
accuracy_SVC= accuracy_score(testLabelsHistogram, y_predict_SVC)
print(f"Accuracy: {accuracy_SVC:}")

              precision    recall  f1-score   support

           0       0.98      0.94      0.96       158
           1       0.95      0.98      0.96       162

    accuracy                           0.96       320
   macro avg       0.96      0.96      0.96       320
weighted avg       0.96      0.96      0.96       320

Accuracy: 0.9625


In [25]:
print("All Accuracy of Machine learning Models......")
print(f"SVC Accuracy => {accuracy_SVC}")
print(f"Gaussian Naive Bayes Accuracy => {accuracy_GNB}")
print(f"Decision Tree Classifier Accuracy => {accuracy_DTC}")
print(f"Logistic Regression Accuracy => {accuracy_LR}")
print(f"k-nearest neighbors Accuracy => {accuracy_KNN}")
print(f"Random Forest Classifier Accuracy =>{accuracy_rfc}")

All Accuracy of Machine learning Models......
SVC Accuracy => 0.9625
Gaussian Naive Bayes Accuracy => 0.846875
Decision Tree Classifier Accuracy => 0.928125
Logistic Regression Accuracy => 0.946875
k-nearest neighbors Accuracy => 0.928125
Random Forest Classifier Accuracy =>0.975


In [26]:
'''
For Histrogram Features The Accuracy of Model are Following
All Accuracy of Machine learning Models......
SVC Accuracy => 0.9625
Gaussian Naive Bayes Accuracy => 0.846875
Decision Tree Classifier Accuracy => 0.928125
Logistic Regression Accuracy => 0.946875
k-nearest neighbors Accuracy => 0.928125
Random Forest Classifier Accuracy =>0.975

SyntaxError: incomplete input (1035519318.py, line 1)

All Accuracy of Machine learning Models......
SVC Accuracy => 0.9625
Gaussian Naive Bayes Accuracy => 0.846875
Decision Tree Classifier Accuracy => 0.928125
Logistic Regression Accuracy => 0.946875
k-nearest neighbors Accuracy => 0.928125
Random Forest Classifier Accuracy =>0.975