In [4]:
import os
import numpy as np
from PIL import Image
from skimage.feature import hog
from skimage import exposure
import cv2

In [5]:
# Function to load image data and convert to numpy array
def convertImgToNumpyArr(image_path):
    try:
        img = Image.open(image_path)  # creating reference variable img to access image data
        img = img.resize((100, 100))  # resize image to 100x100 in case it is not
        img = np.array(img)  # converting image data from JPG to numpy array
        #img = img / 255.0  # normalizing RGB values (cv2 needs 0-255 range)
        return img
    except Exception as e:
        print(f"Error processing image {image_path}: {e}")  # error handling
        return None

In [6]:
# Function to compute HOG features of an image
def computeHOGFeatures(image):
    
    # conversion to grayscale
    grayImg = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
    grayImg = grayImg / 255.0  # normalizing

    # computing HOG features
    fd, hogImg = hog(grayImg, pixels_per_cell=(8,8), orientations=9, cells_per_block=(2, 2), visualize=True) # 9 orientations, 8x8 pixels per cell, 2x2 cells per block

    # increasing the contrast of the image
    hogImg = exposure.rescale_intensity(hogImg, in_range=(0, 10))

    return fd, hogImg

In [7]:
# Function to compute colour histogram features for an image

def computeColourHist(image, bins=20):

    # conversion from RGB to HSV
    hsvImg = cv2.cvtColor(image, cv2.COLOR_RGB2HSV)
    
    hsvImg = hsvImg / 255.0  # normalizing
    
    # extracting histograms for Hue, Saturation, and Value
    hueHist = np.histogram(hsvImg[:,:,0], bins=bins, range=(0, 1))[0]  
    satHist = np.histogram(hsvImg[:,:,1], bins=bins, range=(0, 1))[0]  
    valHist = np.histogram(hsvImg[:,:,2], bins=bins, range=(0, 1))[0]  
    
    # concatenating the histograms of Hue, Saturation, and Value and returning
    return np.concatenate((hueHist, satHist, valHist))

In [8]:
# Function to extract features and save them as numpy arrays
def saveImgFeaturesAsNumpyArr(src_path, save_path):
    print(f"Original Image Folder: {src_path}")
    os.makedirs(os.path.dirname(save_path), exist_ok=True)  # creating the save folder if it doesn't exist
    print(f"Saving Image Features Folder: {save_path}")
    
    # going through each fruit folder in the source path (train or test)
    for fruit_folder in os.listdir(src_path):
        fruit_folder_path = os.path.join(src_path, fruit_folder)
        
        if os.path.isdir(fruit_folder_path):  # checking if the path actually points to a folder
            print(f"Currently Processing Fruit Images of: {fruit_folder}")
            
            # going through each image file in the current folder
            for img in os.listdir(fruit_folder_path):
                img_path = os.path.join(fruit_folder_path, img)
                
                if img.endswith('.jpg'):  # checking if the file is an jpg image
                    img_as_numpy_arr = convertImgToNumpyArr(img_path)
                    if img_as_numpy_arr is None:  # handling image processing error
                        print(f"Error processing image {img_path}")
                    else:
                        # computing the feature vector (HOG + Colour Histogram)
                        hogFeatures, _ = computeHOGFeatures(img_as_numpy_arr)  # Only use the feature descriptor
                        hogFeatures = np.ravel(hogFeatures)  # Flatten HOG features if needed

                        colourHistFeatures = computeColourHist(img_as_numpy_arr)
                        colourHistFeatures = np.ravel(colourHistFeatures)  # Flatten color histogram features if needed
                        # print("HOG features shape:", hogFeatures.shape)
                        # print("Colour Histogram features shape:", colourHistFeatures.shape)

                        # joining both feature sets
                        featureVector = np.concatenate((hogFeatures, colourHistFeatures))
                        featureVector = featureVector.astype(np.float32) # converting to float32 for saving storage space
                        # print("Feature vector length:", len(featureVector))

                        # creating the respective save path for each feature vector
                        subfolder_path = os.path.relpath(fruit_folder_path, src_path)
                        save_file_name = f"{os.path.splitext(img)[0]}.npy"  # saving file with .npy extension
                        img_save_path = os.path.join(save_path, subfolder_path, save_file_name)
                        
                        # creating the save folder if it doesn't exist
                        os.makedirs(os.path.dirname(img_save_path), exist_ok=True)
                        np.save(img_save_path, featureVector)

In [1]:
# # Define paths for source and destination
# train_data_path = os.path.join('fruits-360', 'Training')
# features_save_path = os.path.join('fruits-360', 'Features', 'Training')

# # Run feature extraction on training data
# saveImgFeaturesAsNumpyArr(train_data_path, features_save_path)

In [22]:
# Define paths for source and destination
test_data_path = os.path.join('fruits-360', 'Test')
features_save_path = os.path.join('fruits-360', 'Features', 'Test')

# Run feature extraction on training data
saveImgFeaturesAsNumpyArr(test_data_path, features_save_path)

Original Image Folder: fruits-360\Test
Saving Image Features Folder: fruits-360\Features\Test
Currently Processing Fruit Images of: Apple 10
Currently Processing Fruit Images of: Apple 12
Currently Processing Fruit Images of: Apple 13
Currently Processing Fruit Images of: Apple 14
Currently Processing Fruit Images of: Apple 17
Currently Processing Fruit Images of: Apple 6
Currently Processing Fruit Images of: Apple 9
Currently Processing Fruit Images of: Apple Braeburn 1
Currently Processing Fruit Images of: Apple Core 1
Currently Processing Fruit Images of: Apple Crimson Snow 1
Currently Processing Fruit Images of: Apple Golden 1
Currently Processing Fruit Images of: Apple Golden 2
Currently Processing Fruit Images of: Apple Golden 3
Currently Processing Fruit Images of: Apple Granny Smith 1
Currently Processing Fruit Images of: Apple hit 1
Currently Processing Fruit Images of: Apple Pink Lady 1
Currently Processing Fruit Images of: Apple Red 1
Currently Processing Fruit Images of: Ap

In [None]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Define dataset paths
data_dir_train = "fruits-360/Features/Training"
data_dir_test = "fruits-360/Features/Test"

# Initialize lists to store data
features_train, labels_train = [], []
features_test, labels_test = [], []
fruit_classes = {}  # Mapping of fruit name to label

# Read training dataset
for label, fruit_name in enumerate(os.listdir(data_dir_train)):
    fruit_path = os.path.join(data_dir_train, fruit_name)
    
    if os.path.isdir(fruit_path):
        fruit_classes[fruit_name] = label  # Assign numeric label
        
        for file in os.listdir(fruit_path):
            if file.endswith(".npy"):  # Process only .npy files
                file_path = os.path.join(fruit_path, file)
                feature_data = np.load(file_path)
                
                features_train.append(feature_data)
                labels_train.append(label)

# Read testing dataset
for fruit_name, label in fruit_classes.items():  # Use the same labels as training
    fruit_path = os.path.join(data_dir_test, fruit_name)
    
    if os.path.isdir(fruit_path):
        for file in os.listdir(fruit_path):
            if file.endswith(".npy"):  # Process only .npy files
                file_path = os.path.join(fruit_path, file)
                feature_data = np.load(file_path)
                
                features_test.append(feature_data)
                labels_test.append(label)

# Convert lists to NumPy arrays
X_train, y_train = np.array(features_train), np.array(labels_train)
X_test, y_test = np.array(features_test), np.array(labels_test)

# Train Decision Tree Classifier
clf = DecisionTreeClassifier(criterion='gini', max_depth=None, random_state=42)
clf.fit(X_train, y_train)

# Make predictions
y_pred = clf.predict(X_test)

# Evaluate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Decision Tree Accuracy: {accuracy:.4f}")

Loading Apple 10:   0%|          | 0/699 [00:00<?, ?file/s]

Loading Apple 10: 100%|██████████| 699/699 [00:00<00:00, 4851.08file/s]
Loading Apple 12: 100%|██████████| 466/466 [00:03<00:00, 116.86file/s] 
Loading Apple 13: 100%|██████████| 699/699 [00:07<00:00, 92.77file/s] 
Loading Apple 14: 100%|██████████| 466/466 [00:05<00:00, 90.61file/s] 
Loading Apple 17: 100%|██████████| 610/610 [00:07<00:00, 86.59file/s] 
Loading Apple 6: 100%|██████████| 473/473 [00:05<00:00, 87.48file/s]
Loading Apple 9: 100%|██████████| 694/694 [00:07<00:00, 95.55file/s] 
Loading Apple Braeburn 1: 100%|██████████| 492/492 [00:05<00:00, 93.90file/s] 
Loading Apple Core 1: 100%|██████████| 157/157 [00:01<00:00, 86.48file/s]
Loading Apple Crimson Snow 1: 100%|██████████| 444/444 [00:04<00:00, 95.96file/s] 
Loading Apple Golden 1: 100%|██████████| 480/480 [00:04<00:00, 97.34file/s] 
Loading Apple Golden 2: 100%|██████████| 492/492 [00:04<00:00, 99.60file/s] 
Loading Apple Golden 3: 100%|██████████| 481/481 [00:05<00:00, 94.50file/s] 
Loading Apple Granny Smith 1: 100%|██

Decision Tree Accuracy: 0.9796
