In [6]:
import cv2
from sklearn import svm
from scipy.cluster.vq import kmeans, vq
import numpy as np
import glob
from scipy.stats import skew
from skimage.feature import greycomatrix, greycoprops
from sklearn.model_selection import GridSearchCV
import pickle


In [7]:
Apple_training_data='dataset\\Apples\\*'
Orange_training_data='dataset\\Oranges\\*'
Mango_training_data='dataset\\Mangoes\\*'

sift = cv2.SIFT_create()


In [8]:
def get_all_glcm_features(gray_scale_img):
    """
    Given a grayscale image with graylevels from 0 - 255, this function returns the contrast
    and the homogeneity features of the image with the help of GLCM
    """
    # Tip: Make sure you understand the input-output of everything you write, 
    # not doing that results in bugs that make you believe the lab is long
    
    #size of co-occ matrix = number of gray levels
    image_array = np.array(gray_scale_img)
    #print('first pixel= ', image_array[0][0])
    coocurrence_matrix = greycomatrix(image_array, [1], [0])
    contrast = greycoprops(coocurrence_matrix, 'contrast')
    homogeneity = greycoprops(coocurrence_matrix, 'homogeneity')
    #mean = greycoprops(coocurrence_matrix, 'mean')
    energy = greycoprops(coocurrence_matrix, 'energy')
    #entropy = greycoprops(coocurrence_matrix, 'entropy')
    #variance = greycoprops(coocurrence_matrix, 'variance')
    correlation = greycoprops(coocurrence_matrix, 'correlation')
    return contrast, homogeneity, energy, correlation

In [9]:
def extract_features(data_path):
    des_list=[]
    hue_arr=[]
    for file in glob.glob(data_path):    
        img = cv2.imread(file)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
        img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        contrast, homogeneity, energy, correlation= get_all_glcm_features((img_gray * 255).astype(np.uint8))
        kpts, des = sift.detectAndCompute(img_gray,None)
        if len(kpts) < 1:
            no_kpts = np.zeros((1, sift.descriptorSize()), np.float32)
            des_list.append((file, no_kpts))
        else:
            des_list.append((file, des)) 
        h,s,v=cv2.split(img)
        hue_mean=np.mean(h)  #calculate the mean of hue channel of each image
        hue_var=np.var(h) 
        flatten_h = list(np.concatenate(h).flat)
        hue_skewness=skew(flatten_h)
        image_hue_info=[]
        image_hue_info.append(hue_mean)
        image_hue_info.append(hue_var)
        image_hue_info.append(hue_skewness)
        #######################################################################
        image_hue_info.append(contrast[0][0])
        image_hue_info.append(homogeneity[0][0])
        image_hue_info.append(energy[0][0])
        image_hue_info.append(correlation[0][0])
        #########################################################################
        hue_arr.append(image_hue_info) 



        # Stack all the descriptors vertically in a numpy array
    descriptors = des_list[0][1]
    for file, descriptor in des_list[1:]:
        descriptors = np.vstack((descriptors, descriptor))


    #kmeans works only on float, so convert integers to float
    descriptors_float = descriptors.astype(float)

    k = 60  #k means awal 60 clusters
    voc, variance = kmeans(descriptors_float, k, 1)

    features = np.zeros((len(hue_arr), k), "float32")
    for i in range(len(hue_arr)):
        words, distance = vq(des_list[i][1],voc)
        for w in words:
            features[i][w] += 1


    #Test_features_scaled = scale(Test_features.tolist())
    #print(Test_features[0],Test_features_scaled[0])

    #Test_hue_arr_scaled = scale(Test_hue_arr)
    #print(Test_hue_arr[0],Test_hue_arr_scaled[0])

    all_features=np.concatenate((hue_arr,features.tolist()),axis=1).tolist()
    return(all_features)

In [10]:
#extract fruits features
Apples_all_features = extract_features(Apple_training_data)

In [11]:
Oranges_all_features = extract_features(Orange_training_data)

In [12]:
Mangos_all_features = extract_features(Mango_training_data)

In [28]:
#print(len(Apple_hue_arr),len(Orange_hue_arr),len(Mango_hue_arr))
a=["Apple"]*len(Apples_all_features)
o=["Orange"]*len(Oranges_all_features)
m=["Mango"]*len(Mangos_all_features)
all_labels=a+o+m
print(len(all_labels))
print(all_labels)

all_features= Apples_all_features + Oranges_all_features + Mangos_all_features
print(len(all_features))
print(all_features[0])


286
['Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Apple', 'Orange', 'Orange', 'Orange', 'Orange', 'Orange', 'Orange', 'Orange', 'Orange', 'Orange', 'Orange', 'Orange', 'Orange', 'Orange', '

#pca
pca = PCA()
x_scaled_pca = pca.fit_transform(all_features)

per_var = np.round(pca.explained_variance_ratio_*100,decimals=1)
labels =[str(x) for x in range(1, len(per_var)+1)]


plt.bar(x=range(1, len(per_var)+1), height=per_var)
plt.tick_params(
    axis='x',
    which='both',
    bottom= False,
    top= False,
    labelbottom=False)
plt.ylabel('percentage of explained variance')
plt.xlabel('principal Components')
plt.title('scree plot')
plt.show()

In [30]:
#x_train, x_test, y_train, y_test = train_test_split(all_features,all_labels, test_size=0.3,random_state=0) 
param_grid = [
    {
        'C': [0.5, 1, 10, 100],
        'gamma': ['scale',1,0.1,0.01,0.001,0.0001],
        'kernel': ['rbf']
    },
]

optimal_params = GridSearchCV(
    svm.SVC(),
    param_grid,
    cv=25,
    scoring='accuracy',
    verbose=0
)

#print(len(all_features_scalled),len(y_train))
#all_features_scaled = scale(all_features)

optimal_params.fit(all_features,all_labels)
svm_C = optimal_params.best_params_['C']
svm_gamma = optimal_params.best_params_['gamma']
print(svm_C,svm_gamma)

0.5 scale


In [31]:

clf_svm_rbf =  svm.SVC(random_state=0,C=svm_C,gamma=svm_gamma,kernel='rbf')

#train the model
clf_svm_rbf.fit(all_features,all_labels)
# save the model to disk
filename = 'final_model.sav'
pickle.dump(clf_svm_rbf, open(filename, 'wb'))

all_features_scalled=scale(all_features)
clf_linear = svm.SVC(kernel='linear').fit(all_features_scalled[0:220,0:3].tolist(),all_labels)

In [37]:
#Import scikit-learn metrics module for accuracy calculation
x_test = extract_features('dataset\\Test\\*')
y_test = ['Mango','Apple','Apple','Mango','Mango','Orange','Apple','Apple','Apple','Apple','Orange','Apple','Mango','Mango','Mango','Orange','Orange','Orange']
#Import scikit-learn metrics module for accuracy calculation
from sklearn import metrics
# Model Accuracy: how often is the classifier correct?
predicted = clf_svm_rbf.predict(x_test)
print(predicted)
print("Accuracy:",metrics.accuracy_score(predicted, y_test))



['Mango' 'Orange' 'Apple' 'Mango' 'Mango' 'Orange' 'Apple' 'Apple' 'Apple'
 'Apple' 'Apple' 'Apple' 'Mango' 'Mango' 'Mango' 'Orange' 'Orange'
 'Orange']
Accuracy: 0.8888888888888888
