# Fruit recognition using Fuzzy logic
## (using sci-kit decision tree classifier)

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import os
import cv2
import glob
import mahotas as mt

fixed_size       = tuple((80, 80))

# Shape extraction (Hu moments)

In [2]:
# feature-descriptor-1: Hu Moments
def fd_hu_moments(image):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    feature = cv2.HuMoments(cv2.moments(image)).flatten()
    
    feature = np.mean(feature)
    return feature

# Haralick Texture Feature Vector Extraction

In [3]:
# feature-descriptor-2: Haralick Texture
def fd_haralick(image):
    # convert the image to grayscale
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    # compute the haralick texture feature vector
    haralick = mt.features.haralick(gray).mean(axis=0)
    
    # return the result
    haralick = np.mean(haralick)
    return haralick

# Color Histogram

In [4]:
# feature-descriptor-3: Color Histogram
def fd_histogram(image, mask=None):
    # grab the image channels, initialize the tuple of colors,
    # the figure and the flattened feature vector
    chans = cv2.split(image)
    colors = ("b", "g", "r")

    # loop over the image channels
    for (chan, color) in zip(chans, colors):
        # create a histogram for the current channel and
        # concatenate the resulting histograms for each
        # channel
        hist = cv2.calcHist([chan], [0], None, [256], [0, 256])
    hist = np.mean(hist)
#     print(hist)
    return hist

# Processing all Images using Haralic Texture

In [5]:
datadir = "D:\python\opencv\dataset\preprocessedData" # Note: Add your dataset path . 
CATEGORIES = [ "apple", "banana","mixed","orange"] # Create directories based on the category .  

# training the data
# by creating multidim array and appending all individual img's gray-scale value.
# empty list to hold feature vectors and train labels


train_features = []
train_labels = []
training_data = []

def create_training_data():
    for category in CATEGORIES:
        path = os.path.join(datadir, category)    #path for diff classes.
        
        class_num = CATEGORIES.index(category)   # assigning numbers to diff classes. 
        for img in os.listdir(path):
            # exception is used to pass broken images.
            try:
                image = cv2.imread(os.path.join(path,img))

                # convert the image to grayscale
#                 gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

                # extract haralick texture from the image
                feature1 = fd_hu_moments(image)
                feature2 = fd_haralick(image)
                feature3 = fd_histogram(image)
#                 features = np.mean(features)     #extra

                # append the feature vector and label
                training_data.append([[feature1, feature2, feature3], class_num])
                
            except Exception as e:
                pass
            
create_training_data()   

In [6]:
print(len(training_data))
# print(training_data)

236


In [7]:
# training_data is organised according to classes.
# to include all classes during training the data should be randomized
# So that 75% of training data cover all classes of FLOWERS.

import random

In [8]:
random.shuffle(training_data)

In [9]:
# to check whether data is randomized. 
for check in training_data[:10]:
    print(check[0])    # img matrix
    print(check[1])    # img class

[0.00012010933678206954, 795.651224046627, 1665.625]
2
[0.00013773628551780128, 1826.0246102793765, 937.2656]
0
[0.00024203208654577883, 1337.3808539836405, 2765.625]
0
[0.00012743320159033425, 133.25810735254225, 21824.0]
1
[0.00011493205279988547, 839.8680407755453, 478.51562]
3
[0.00017967310007190024, 1620.535722188505, 4265.0]
3
[0.00013583043087315996, 2021.8475848742517, 4882.9688]
0
[0.00021856547613250733, 3467.5372142633496, 46211.895]
1
[0.00016120930982631908, 2920.992697446246, 940.625]
3
[0.00012182767331041552, 775.1245931536098, 4859.7656]
2


In [10]:
for features, label in training_data:
    train_features.append(features)
    train_labels.append(label)
    
print(train_features[0])         #for 1st image.
print(train_labels[0])
# print(X[0][0])                 #for 1st image 1st row.
# print(X)                       #for whole set.

[0.00012010933678206954, 795.651224046627, 1665.625]
2


# Training data using Decision Tree

In [11]:
import sklearn.tree as tree
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report, accuracy_score
from sklearn.model_selection import train_test_split
import graphviz

In [12]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(train_features,train_labels,test_size=0.25,random_state=1)

In [13]:
X_train = np.array(X_train).reshape(-1, 3)      #extra

decision_tree_classifier=DecisionTreeClassifier()
dtree = decision_tree_classifier.fit(X_train,y_train)
print(dtree)

DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None,
            max_features=None, max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, presort=False, random_state=None,
            splitter='best')


In [14]:
fn = ["shape","texture","color"]
tn = ["apple","banana","mixed","orange"]

dot = tree.export_graphviz(dtree, out_file=None, feature_names=fn, class_names=tn, filled=True, rounded=True, special_characters=True)
 
graph = graphviz.Source(dot)
graph.format = 'png'
graph.render('decision_tree', view=True)


'decision_tree.png'

In [15]:
X_test = np.array(X_test).reshape(-1, 3)      #extra

decision_tree_classifier.score(X_test,y_test)

0.4915254237288136

# Testing a sample

In [16]:
fruit = ["apple","banana","mixed","orange"] 
sample = "D:\python\opencv\dataset\preprocessedData\orange\orange_5.jpg" # Take a test sample from any one of the directories
image = cv2.imread(sample)

feature1 = fd_hu_moments(image)
feature2 = fd_haralick(image)
feature3 = fd_histogram(image)
fts = [feature1, feature2, feature3]

In [17]:
#features = np.array(features).reshape(-1, 13)
features = np.array(fts).reshape(-1, 3)          #extra

prediction = dtree.predict(features)
print("The fruit is :")
print(fruit[prediction[0]])

The fruit is :
orange
