In [41]:
import cv2
from sklearn import svm
from scipy.cluster.vq import kmeans, vq
import numpy as np
import pandas as pd
import glob
from skimage.filters import threshold_yen
from skimage.exposure import rescale_intensity
from scipy.stats import skew
from itertools import chain


In [42]:
Apple_training_data='dataset\\Apples\\Train\\*.jpg'
Orange_training_data='dataset\\Oranges\\Train\\*.jpg'
Mango_training_data='dataset\\Mangoes\\Train\\*.jpg'

Apple_hue_arr = []   #Info of hue of  each apple pic
Orange_hue_arr = []  #Info of hue of  each orange pic
Mango_hue_arr = []   #Info of hue of  each mango pic
Apple_des_list = []
Orange_des_list = []
Mango_des_list = []
sift = cv2.SIFT_create()


In [43]:
for file in glob.glob(Apple_training_data):    
    img = cv2.imread(file)  #read apple images
    img = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)  #convert from RBG to HSV
    img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    kpts, des = sift.detectAndCompute(img_gray,None)
    if len(kpts) < 1:
        no_kpts = np.zeros((1, sift.descriptorSize()), np.float32)
        Apple_des_list.append((file, no_kpts))
    else:
        Apple_des_list.append((file, des)) 
    h,s,v=cv2.split(img)  #Split the the channels of each image
    hue_mean=np.mean(h)  #calculate the mean of hue channel of each image
    hue_var=np.var(h)    #calculate the var of hue channel of each image
    flatten_h = list(np.concatenate(h).flat)  #convert the 2-D image to 1-D image
    flatten_s = list(np.concatenate(s).flat)
    hue_skewness=skew(flatten_h)   #calculate the skewness of hue channel of each image
    image_hue_info=[]
    image_hue_info.append(hue_mean)
    image_hue_info.append(hue_var)
    image_hue_info.append(hue_skewness)
    Apple_hue_arr.append(image_hue_info)
print(len(Apple_hue_arr))

72


In [44]:
# Stack all the descriptors vertically in a numpy array
descriptors = Apple_des_list[0][1]
for file, descriptor in Apple_des_list[1:]:
    descriptors = np.vstack((descriptors, descriptor))

In [45]:
#kmeans works only on float, so convert integers to float
descriptors_float = descriptors.astype(float)

k = 60  #k means awal 60 clusters
voc, variance = kmeans(descriptors_float, k, 1)

In [46]:
Apple_features = np.zeros((72, k), "float32")
for i in range(72):
    words, distance = vq(Apple_des_list[i][1],voc)
    for w in words:
        Apple_features[i][w] += 1

In [47]:
Apples_all_features=np.concatenate((Apple_hue_arr,Apple_features),axis=1)

In [48]:
for file in glob.glob(Orange_training_data):    
    img = cv2.imread(file)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
    img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    kpts, des = sift.detectAndCompute(img_gray,None)
    if len(kpts) < 1:
        no_kpts = np.zeros((1, sift.descriptorSize()), np.float32)
        Orange_des_list.append((file, no_kpts))
    else:
        Orange_des_list.append((file, des)) 
    h,s,v=cv2.split(img)
    hue_mean=np.mean(h)  #calculate the mean of hue channel of each image
    hue_var=np.var(h) 
    flatten_h = list(np.concatenate(h).flat)
    hue_skewness=skew(flatten_h)
    image_hue_info=[]
    image_hue_info.append(hue_mean)
    image_hue_info.append(hue_var)
    image_hue_info.append(hue_skewness)
    Orange_hue_arr.append(image_hue_info) 

In [49]:
# Stack all the descriptors vertically in a numpy array
descriptors = Orange_des_list[0][1]
for file, descriptor in Orange_des_list[1:]:
    descriptors = np.vstack((descriptors, descriptor))

In [50]:
#kmeans works only on float, so convert integers to float
descriptors_float = descriptors.astype(float)

k = 60  #k means awal 60 clusters
voc, variance = kmeans(descriptors_float, k, 1)

In [51]:
Orange_features = np.zeros((70, k), "float32")
for i in range(70):
    words, distance = vq(Orange_des_list[i][1],voc)
    for w in words:
        Orange_features[i][w] += 1

In [52]:
Oranges_all_features=np.concatenate((Orange_hue_arr,Orange_features),axis=1)

In [53]:
for file in glob.glob(Mango_training_data):    
    img = cv2.imread(file)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
    img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    kpts, des = sift.detectAndCompute(img_gray,None)
    if len(kpts) < 1:
        no_kpts = np.zeros((1, sift.descriptorSize()), np.float32)
        Mango_des_list.append((file, no_kpts))
    else:
        Mango_des_list.append((file, des)) 
    h,s,v=cv2.split(img)
    hue_mean=np.mean(h)
    hue_var=np.var(h)
    flatten_h = list(np.concatenate(h).flat)
    hue_skewness=skew(flatten_h)
    image_hue_info=[]
    image_hue_info.append(hue_mean)
    image_hue_info.append(hue_var)
    image_hue_info.append(hue_skewness)
    Mango_hue_arr.append(image_hue_info) 

In [54]:
# Stack all the descriptors vertically in a numpy array
descriptors = Mango_des_list[0][1]
for file, descriptor in Mango_des_list[1:]:
    descriptors = np.vstack((descriptors, descriptor))

In [55]:
#kmeans works only on float, so convert integers to float
descriptors_float = descriptors.astype(float)

k = 60  #k means awal 60 clusters
voc, variance = kmeans(descriptors_float, k, 1)

In [56]:

Mango_features = np.zeros((78, k), "float32")
for i in range(78):
    words, distance = vq(Mango_des_list[i][1],voc)
    for w in words:
        Mango_features[i][w] += 1

In [57]:
Mangos_all_features=np.concatenate((Mango_hue_arr,Mango_features),axis=1)

In [58]:
#print(len(Apple_hue_arr),len(Orange_hue_arr),len(Mango_hue_arr))
a=["Apple"]*len(Apple_hue_arr)
o=["Orange"]*len(Orange_hue_arr)
m=["Mango"]*len(Mango_hue_arr)
labels=a+o+m
print(len(labels))
print(labels)

features=Apple_hue_arr+Orange_hue_arr+Mango_hue_arr
print(len(features))
print(features)


220
['Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Orange', 'Orange', 'Orange', 'Orange', 'Orange', 'Orange', 'Orange', 'Orange', 'Orange', 'Orange', 'Orange', 'Orange', 'Orange', 'Orange', 'Orange', 'Orange', 'Orange', 'Orange', 'Orange', 'Orange', 'Orange', 'Orange', 'Orange', 'Orange', 'Orange', 'Orange', 'Orange', 'Orange', 'Orange', 'Orange', 'Orange', 'Orange', 'Orange', 'Orange', 'Orange

In [59]:

#fit (training my model with x-images having the y-types)
#degree : is the power of the scope in the polynomial equation to detect the new dimension..... preferd degree = #features + 1  to avoid overlapped instances
#coef0 : is the constant to be added in the scope in the polynomial equation
#gamma : just a constant to scale the new values with
clf_poly = svm.SVC(kernel='poly',degree=35,gamma='auto',coef0=1).fit(features,labels)
clf_linear = svm.SVC(kernel='linear').fit(features,labels)

In [60]:
p=[]
img = cv2.imread("M1.JPG")
img = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
h,s,v=cv2.split(img)
hue_mean=np.mean(h)
sat_mean=np.mean(s)
hue_var=np.var(h)
sat_var=np.var(h)
flatten_h = list(np.concatenate(h).flat)
flatten_s = list(np.concatenate(s).flat)
hue_skewness=skew(flatten_h)
sat_skewness=skew(flatten_s)
image_hue_info=[]
image_hue_info.append(hue_mean)
image_hue_info.append(hue_var)
image_hue_info.append(hue_skewness)
image_hue_info.append(sat_mean)
image_hue_info.append(sat_var)
image_hue_info.append(sat_skewness)
p.append(image_hue_info) 
print(p)    



#predict the fruit type in a new image
print("POLY",clf_poly.predict(p))
print("LINEAR",clf_linear.predict(p))


[[38.26753020833333, 212.14694321262914, 0.4453941620116571, 107.00081041666667, 212.14694321262914, 0.7885114792616188]]


ValueError: X.shape[1] = 6 should be equal to 3, the number of features at training time