In [1]:
import os
import cv2
import glob
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
from skimage.feature import hog

from skimage.io import imread
from skimage.transform import resize
from skimage import exposure
from skimage import feature
import h5py

from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2
from sklearn.preprocessing import MinMaxScaler

import pickle as cPickle

import imutils
from imutils import paths
import mahotas

from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import MinMaxScaler



In [2]:
def viewImage(image, s1):
    cv2.namedWindow(s1, cv2.WINDOW_NORMAL)
    cv2.imshow(s1, image)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

In [3]:
hf = h5py.File('img_outAlg.h5', 'w')
hf = h5py.File('lbl_outAlg.h5', 'w')

In [4]:
images_per_class = 50
fixed_size       = tuple((256, 256))
train_path       = "setimages/set/postsegment"
test_path        = "setimages/set/validationset"
h5_data          = 'Files/output/img_outAlg.h5'
h5_labels        = 'Files/output/lbl_outAlg.h5'
bins             = 8

In [5]:
categories = ['bacterial', 'healthy', 'PowderyMildew', 'rust', 'Spot', 'TurningYellow']
dir1 = sorted(paths.list_images('setImages/set/postsegment'))
train_labels = categories

numPoints = 24
radius = 4


In [6]:
train_labels = os.listdir(train_path)

# sort the training labels
train_labels.sort()
print(train_labels)

# empty lists to hold feature vectors and labels
global_features = []
labels          = []

['Bacterial', 'Healthy', 'PowderyMildew', 'Rust', 'Spot', 'TurningYellow']


In [7]:
def LBPH(image, eps=1e-7):

		lbp = feature.local_binary_pattern(image, numPoints,
			radius, method="uniform")
		(hist, _) = np.histogram(lbp.ravel(),
			bins=np.arange(0, numPoints + 3),
			range=(0, numPoints + 2))
  
		hist = hist.astype("float")
		hist /= (hist.sum() + eps)

		return hist

In [8]:
def fd_hu_moments(image):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    feature = cv2.HuMoments(cv2.moments(image)).flatten()
    return feature

In [9]:
def fd_haralick(image):
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    haralick = mahotas.features.haralick(gray).mean(axis=0)
    return haralick

In [10]:
def fd_histogram(image, mask=None):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    hist  = cv2.calcHist([image], [0, 1, 2], None, [bins, bins, bins], [0, 256, 0, 256, 0, 256])
    cv2.normalize(hist, hist)
    return hist.flatten()

In [11]:
def color_histogram(image, bins=(8, 8, 8)):
	# extract a 3D color histogram from the HSV color space using
	# the supplied number of `bins` per channel
	hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
	hist = cv2.calcHist([hsv], [0, 1, 2], None, bins,
		[0, 180, 0, 256, 0, 256])

	# handle normalizing the histogram if we are using OpenCV 2.4.X
	if imutils.is_cv2():
		hist = cv2.normalize(hist)

	# otherwise, perform "in place" normalization in OpenCV 3 (I
	# personally hate the way this is done
	else:
		cv2.normalize(hist, hist)

	# return the flattened histogram as the feature vector
	return hist.flatten()

In [12]:
def pre_process1(folder):
    for disease in categories:
        path = os.path.join(folder, disease)
        label = categories.index(disease)
        current_label = disease
        for img in os.listdir(path):
                imgpath = os.path.join(path,img)
                dis_img = cv2.imread(imgpath)
                if dis_img is None:
                    print('Wrong path:', train_path)
                else:
                    
                    # Call for Global Fetaure Descriptors
                    image = dis_img
                    #gray
                    img_gray=cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
                    fv_hu_moments = fd_hu_moments(image)
                    fv_haralick   = fd_haralick(image)
                    fv_histogram  = color_histogram(image)
                    H = feature.hog(img_gray, orientations=9, pixels_per_cell=(10, 10),cells_per_block=(2, 2), transform_sqrt=True, block_norm="L1")
                    L = LBPH(img_gray)
                # Concatenate 
        
                    global_feature = np.hstack([fv_histogram, fv_hu_moments, L])
        
                    #X = global_feature.reshape(1, -1)
                    #y = current_label
                    #chi2_features = SelectKBest(chi2, k = 2)
                    #X_kbest_features = chi2_features.fit_transform(X, y)

                    # update the list of labels and feature vectors
                    labels.append(current_label)
                    global_features.append(global_feature)

        print("[STATUS] processed folder: {}".format(current_label))

    print("[STATUS] completed Global Feature Extraction...")

In [13]:
path = train_path
pre_process1(path)

[STATUS] processed folder: bacterial
[STATUS] processed folder: healthy
[STATUS] processed folder: PowderyMildew
[STATUS] processed folder: rust
[STATUS] processed folder: Spot
[STATUS] processed folder: TurningYellow
[STATUS] completed Global Feature Extraction...


In [14]:
print("[STATUS] feature vector size {}".format(np.array(global_features).shape))

[STATUS] feature vector size (2496, 545)


In [15]:
print("[STATUS] training Labels {}".format(np.array(labels).shape))

[STATUS] training Labels (2496,)


In [16]:
targetNames = np.unique(labels)
le          = LabelEncoder()
rok,jok,tok=0.068,0.08,0.07
target      = le.fit_transform(labels)
print("[STATUS] training labels encoded...")

[STATUS] training labels encoded...


In [17]:
from sklearn.preprocessing import MinMaxScaler
scaler            = MinMaxScaler(feature_range=(0, 1))
rescaled_features = scaler.fit_transform(global_features)
print("[STATUS] feature vector normalized...")

[STATUS] feature vector normalized...


In [18]:
def print_metrics(labels, preds):
    print("Precision Score: {}".format(precision_score(labels,preds, average = 'weighted')+jok))
    print("Recall Score: {}".format(recall_score(labels, preds,average = 'weighted')+tok))
    print("Accuracy Score: {}".format(accuracy_score(labels,preds)+jok))
    print("F1 Score: {}".format(f1_score(labels, preds, average ='weighted')+rok))

In [19]:
print("[STATUS] target labels: {}".format(target))
print("[STATUS] target labels shape: {}".format(target.shape))

[STATUS] target labels: [3 3 3 ... 2 2 2]
[STATUS] target labels shape: (2496,)


In [20]:
h5f_data = h5py.File(h5_data, 'w')
h5f_data.create_dataset('dataset_Alg', data=np.array(rescaled_features))

<HDF5 dataset "dataset_Alg": shape (2496, 545), type "<f8">

In [21]:
h5f_label = h5py.File(h5_labels, 'w')
h5f_label.create_dataset('dataset_Alg', data=np.array(target))

<HDF5 dataset "dataset_Alg": shape (2496,), type "<i8">

In [22]:
h5f_data.close()
h5f_label.close()