In [1]:
import cv2
from sklearn import svm
from scipy.cluster.vq import kmeans, vq
import numpy as np
import pandas as pd
import glob
from skimage.filters import threshold_yen
from skimage.exposure import rescale_intensity
from scipy.stats import skew
from itertools import chain
from skimage.feature import greycomatrix, greycoprops
from sklearn.preprocessing import scale
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler as sc


In [2]:
Apple_training_data='dataset\\Apples\\Train\\*.jpg'
Orange_training_data='dataset\\Oranges\\Train\\*.jpg'
Mango_training_data='dataset\\Mangoes\\Train\\*.jpg'
Test_data='dataset\\test\\*.jpg'

Apple_hue_arr = []   #Info of hue of  each apple pic
Orange_hue_arr = []  #Info of hue of  each orange pic
Mango_hue_arr = []   #Info of hue of  each mango pic
Apple_des_list = []
Orange_des_list = []
Mango_des_list = []
sift = cv2.SIFT_create()


In [3]:
def get_all_glcm_features(gray_scale_img):
    """
    Given a grayscale image with graylevels from 0 - 255, this function returns the contrast
    and the homogeneity features of the image with the help of GLCM
    """
    # Tip: Make sure you understand the input-output of everything you write, 
    # not doing that results in bugs that make you believe the lab is long
    
    #size of co-occ matrix = number of gray levels
    image_array = np.array(gray_scale_img)
    #print('first pixel= ', image_array[0][0])
    coocurrence_matrix = greycomatrix(image_array, [1], [0])
    contrast = greycoprops(coocurrence_matrix, 'contrast')
    homogeneity = greycoprops(coocurrence_matrix, 'homogeneity')
    #mean = greycoprops(coocurrence_matrix, 'mean')
    energy = greycoprops(coocurrence_matrix, 'energy')
    #entropy = greycoprops(coocurrence_matrix, 'entropy')
    #variance = greycoprops(coocurrence_matrix, 'variance')
    correlation = greycoprops(coocurrence_matrix, 'correlation')
    return contrast, homogeneity, energy, correlation

In [4]:
for file in glob.glob(Apple_training_data):    
    img = cv2.imread(file)  #read apple images
    img = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)  #convert from RBG to HSV
    img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    contrast, homogeneity, energy, correlation= get_all_glcm_features((img_gray * 255).astype(np.uint8))
    kpts, des = sift.detectAndCompute(img_gray,None)
    if len(kpts) < 1:
        no_kpts = np.zeros((1, sift.descriptorSize()), np.float32)
        Apple_des_list.append((file, no_kpts))
    else:
        Apple_des_list.append((file, des)) 
    h,s,v=cv2.split(img)  #Split the the channels of each image
    hue_mean=np.mean(h)  #calculate the mean of hue channel of each image
    hue_var=np.var(h)    #calculate the var of hue channel of each image
    flatten_h = list(np.concatenate(h).flat)  #convert the 2-D image to 1-D image
    flatten_s = list(np.concatenate(s).flat)
    hue_skewness=skew(flatten_h)   #calculate the skewness of hue channel of each image
    image_hue_info=[]
    image_hue_info.append(hue_mean)
    image_hue_info.append(hue_var)
    image_hue_info.append(hue_skewness)
    #######################################################################
    image_hue_info.append(contrast[0][0])
    image_hue_info.append(homogeneity[0][0])
    image_hue_info.append(energy[0][0])
    image_hue_info.append(correlation[0][0])
    #########################################################################
    Apple_hue_arr.append(image_hue_info)
print(len(Apple_hue_arr))

72


In [5]:
# Stack all the descriptors vertically in a numpy array
descriptors = Apple_des_list[0][1]
for file, descriptor in Apple_des_list[1:]:
    descriptors = np.vstack((descriptors, descriptor))

In [6]:
#kmeans works only on float, so convert integers to float
descriptors_float = descriptors.astype(float)

k = 60  #k means awal 60 clusters
voc, variance = kmeans(descriptors_float, k, 1)

In [7]:
Apple_features = np.zeros((72, k), "float32")
for i in range(72):
    words, distance = vq(Apple_des_list[i][1],voc)
    for w in words:
        Apple_features[i][w] += 1

In [8]:
Apples_all_features=np.concatenate((Apple_hue_arr,Apple_features.tolist()),axis=1).tolist()

In [9]:
for file in glob.glob(Orange_training_data):    
    img = cv2.imread(file)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
    img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    contrast, homogeneity, energy, correlation= get_all_glcm_features((img_gray * 255).astype(np.uint8))
    kpts, des = sift.detectAndCompute(img_gray,None)
    if len(kpts) < 1:
        no_kpts = np.zeros((1, sift.descriptorSize()), np.float32)
        Orange_des_list.append((file, no_kpts))
    else:
        Orange_des_list.append((file, des)) 
    h,s,v=cv2.split(img)
    hue_mean=np.mean(h)  #calculate the mean of hue channel of each image
    hue_var=np.var(h) 
    flatten_h = list(np.concatenate(h).flat)
    hue_skewness=skew(flatten_h)
    image_hue_info=[]
    image_hue_info.append(hue_mean)
    image_hue_info.append(hue_var)
    image_hue_info.append(hue_skewness)
    #######################################################################
    image_hue_info.append(contrast[0][0])
    image_hue_info.append(homogeneity[0][0])
    image_hue_info.append(energy[0][0])
    image_hue_info.append(correlation[0][0])
    #########################################################################
    Orange_hue_arr.append(image_hue_info) 

In [10]:
# Stack all the descriptors vertically in a numpy array
descriptors = Orange_des_list[0][1]
for file, descriptor in Orange_des_list[1:]:
    descriptors = np.vstack((descriptors, descriptor))

In [11]:
#kmeans works only on float, so convert integers to float
descriptors_float = descriptors.astype(float)

k = 60  #k means awal 60 clusters
voc, variance = kmeans(descriptors_float, k, 1)

In [12]:
Orange_features = np.zeros((70, k), "float32")
for i in range(70):
    words, distance = vq(Orange_des_list[i][1],voc)
    for w in words:
        Orange_features[i][w] += 1

In [13]:
Oranges_all_features=np.concatenate((Orange_hue_arr,Orange_features.tolist()),axis=1).tolist()

In [14]:
for file in glob.glob(Mango_training_data):    
    img = cv2.imread(file)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
    img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    contrast, homogeneity, energy, correlation= get_all_glcm_features((img_gray * 255).astype(np.uint8))
    kpts, des = sift.detectAndCompute(img_gray,None)
    if len(kpts) < 1:
        no_kpts = np.zeros((1, sift.descriptorSize()), np.float32)
        Mango_des_list.append((file, no_kpts))
    else:
        Mango_des_list.append((file, des)) 
    h,s,v=cv2.split(img)
    hue_mean=np.mean(h)
    hue_var=np.var(h)
    flatten_h = list(np.concatenate(h).flat)
    hue_skewness=skew(flatten_h)
    image_hue_info=[]
    image_hue_info.append(hue_mean)
    image_hue_info.append(hue_var)
    image_hue_info.append(hue_skewness)
    #######################################################################
    image_hue_info.append(contrast[0][0])
    image_hue_info.append(homogeneity[0][0])
    image_hue_info.append(energy[0][0])
    image_hue_info.append(correlation[0][0])
    #########################################################################
    Mango_hue_arr.append(image_hue_info) 

In [15]:
# Stack all the descriptors vertically in a numpy array
descriptors = Mango_des_list[0][1]
for file, descriptor in Mango_des_list[1:]:
    descriptors = np.vstack((descriptors, descriptor))

In [16]:
#kmeans works only on float, so convert integers to float
descriptors_float = descriptors.astype(float)

k = 60  #k means awal 60 clusters
voc, variance = kmeans(descriptors_float, k, 1)

In [17]:

Mango_features = np.zeros((78, k), "float32")
for i in range(78):
    words, distance = vq(Mango_des_list[i][1],voc)
    for w in words:
        Mango_features[i][w] += 1

In [18]:
Mangos_all_features=np.concatenate((Mango_hue_arr,Mango_features.tolist()),axis=1).tolist()

In [19]:
#print(len(Apple_hue_arr),len(Orange_hue_arr),len(Mango_hue_arr))
a=["Apple"]*len(Apples_all_features)
o=["Orange"]*len(Oranges_all_features)
m=["Mango"]*len(Mangos_all_features)
all_labels=a+o+m
print(len(all_labels))
print(all_labels)

all_features= Apples_all_features + Oranges_all_features + Mangos_all_features
print(len(all_features))
print(all_features[0])


220
['Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Orange', 'Orange', 'Orange', 'Orange', 'Orange', 'Orange', 'Orange', 'Orange', 'Orange', 'Orange', 'Orange', 'Orange', 'Orange', 'Orange', 'Orange', 'Orange', 'Orange', 'Orange', 'Orange', 'Orange', 'Orange', 'Orange', 'Orange', 'Orange', 'Orange', 'Orange', 'Orange', 'Orange', 'Orange', 'Orange', 'Orange', 'Orange', 'Orange', 'Orange', 'Orange

#pca
pca = PCA()
x_scaled_pca = pca.fit_transform(all_features)

per_var = np.round(pca.explained_variance_ratio_*100,decimals=1)
labels =[str(x) for x in range(1, len(per_var)+1)]


plt.bar(x=range(1, len(per_var)+1), height=per_var)
plt.tick_params(
    axis='x',
    which='both',
    bottom= False,
    top= False,
    labelbottom=False)
plt.ylabel('percentage of explained variance')
plt.xlabel('principal Components')
plt.title('scree plot')
plt.show()

In [23]:
param_grid = [
    {
        'C': [0.5, 1, 10, 100],
        'gamma': ['scale',1,0.1,0.01,0.001,0.0001],
        'kernel': ['rbf']
    },
]

optimal_params = GridSearchCV(
    svm.SVC(),
    param_grid,
    cv=25,
    scoring='accuracy',
    verbose=0
)

#print(len(all_features_scalled),len(y_train))
#all_features_scaled = scale(all_features)

optimal_params.fit(all_features,all_labels)
print(optimal_params.best_params_)

{'C': 1, 'gamma': 'scale', 'kernel': 'rbf'}


In [24]:
import pickle
clf_svm_rbf =  svm.SVC(random_state=0,C=0.5,gamma='scale',kernel='rbf')
X_train, X_test, y_train, y_test = train_test_split(all_features,all_labels, test_size=0.1,random_state=0) 

#train the model
clf_svm_rbf.fit(all_features,all_labels)
# save the model to disk
filename = 'frut_recognation_model.sav'
pickle.dump(clf_svm_rbf, open(filename, 'wb'))

all_features_scalled=scale(all_features)
clf_linear = svm.SVC(kernel='linear').fit(all_features_scalled[0:220,0:3].tolist(),all_labels)

In [23]:
Test_des_list=[]
Test_hue_arr=[]
'''
Apple_training_data='dataset\\Apples\\Test\\*.jpg'
Orange_training_data='dataset\\Oranges\\Test\\*.jpg'
Mango_training_data='dataset\\Mangoes\\Test\\*.jpg'
'''
for file in glob.glob(Test_data):    
    img = cv2.imread(file)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
    img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    contrast, homogeneity, energy, correlation= get_all_glcm_features((img_gray * 255).astype(np.uint8))
    kpts, des = sift.detectAndCompute(img_gray,None)
    if len(kpts) < 1:
        no_kpts = np.zeros((1, sift.descriptorSize()), np.float32)
        Test_des_list.append((file, no_kpts))
    else:
        Test_des_list.append((file, des)) 
    h,s,v=cv2.split(img)
    hue_mean=np.mean(h)  #calculate the mean of hue channel of each image
    hue_var=np.var(h) 
    flatten_h = list(np.concatenate(h).flat)
    hue_skewness=skew(flatten_h)
    image_hue_info=[]
    image_hue_info.append(hue_mean)
    image_hue_info.append(hue_var)
    image_hue_info.append(hue_skewness)
    #######################################################################
    image_hue_info.append(contrast[0][0])
    image_hue_info.append(homogeneity[0][0])
    image_hue_info.append(energy[0][0])
    image_hue_info.append(correlation[0][0])
    #########################################################################
    Test_hue_arr.append(image_hue_info) 



    # Stack all the descriptors vertically in a numpy array
descriptors = Test_des_list[0][1]
for file, descriptor in Test_des_list[1:]:
    descriptors = np.vstack((descriptors, descriptor))


#kmeans works only on float, so convert integers to float
descriptors_float = descriptors.astype(float)

k = 60  #k means awal 60 clusters
voc, variance = kmeans(descriptors_float, k, 1)




Test_features = np.zeros((len(Test_hue_arr), k), "float32")
for i in range(len(Test_hue_arr)):
    words, distance = vq(Test_des_list[i][1],voc)
    for w in words:
        Test_features[i][w] += 1



#Test_features_scaled = scale(Test_features.tolist())
#print(Test_features[0],Test_features_scaled[0])

#Test_hue_arr_scaled = scale(Test_hue_arr)
#print(Test_hue_arr[0],Test_hue_arr_scaled[0])

Test_all_features=np.concatenate((Test_hue_arr,Test_features.tolist()),axis=1).tolist()

In [24]:
#predict the fruit type in a new image
#print("LINEAR",clf_linear.predict(Test_all_features))
print("rbf",clf_svm_rbf.predict(Test_all_features))


rbf ['Apple' 'Orange' 'Apple' 'Apple' 'Orange' 'Apple' 'Orange' 'Apple'
 'Mango' 'Apple' 'Orange' 'Mango' 'Mango' 'Mango' 'Mango' 'Mango' 'Apple']


In [1]:
#Import scikit-learn metrics module for accuracy calculation
#from sklearn import metrics
# Model Accuracy: how often is the classifier correct?
#print("Accuracy:",metrics.accuracy_score(y_test, y_test))