In [1]:
import os
import numpy as np
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from tqdm import tqdm  # for the progress bar
import joblib

In [None]:
# Function to Load Feature Vectors and Labels
def loadFeaturesAndLabels(features_path):

    features = [] # list to store the feature vectors
    labels = [] # list to store the labels
    
    # checking if the path exists
    if not os.path.exists(features_path):
        raise ValueError(f"Path {features_path} does not exist.")
    # checking if the path is a directory
    if not os.path.isdir(features_path):
        raise ValueError(f"Path {features_path} is not a directory.")
    
    # going through through the folders and load the features
    for fruit_folder in tqdm(os.listdir(features_path), unit="folder", desc=f"Loading Features from {features_path}"):

        fruit_folder_path = os.path.join(features_path, fruit_folder) # creating complete path of the fruit folder
        
        if os.path.isdir(fruit_folder_path):  # checking if it's an valid path to a folder
            for featureVectorFile in os.listdir(fruit_folder_path):

                feature_file_path = os.path.join(fruit_folder_path, featureVectorFile) # creating complete path of the feature file
                
                if featureVectorFile.endswith('.npy'):  # checking if its a valid feature file
                    labels.append(fruit_folder[:-2].strip())  # the folder name is the label (not taking the numbers at the end)

                    featureVector = np.load(feature_file_path)
                    features.append(featureVector)
                    
                    
    return np.array(features), np.array(labels)

In [3]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# loading the train and test feature data (LBP Histogram and Colour Histogram features)
trainX, trainY = loadFeaturesAndLabels('img_ColourHist_LBP_Hist_Features/img_ColourHist_LBP_Hist_Features/Training')
testX, testY = loadFeaturesAndLabels('img_ColourHist_LBP_Hist_Features/img_ColourHist_LBP_Hist_Features/Testing')

# using StandardScaler to standardize the features
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
trainX = scaler.fit_transform(trainX)
testX = scaler.transform(testX)

print(f"Training Features Shape: {trainX.shape}")
print(f"Testing Features Shape: {testX.shape}")
print(f"Number of Labels: {len(np.unique(trainY))}")

print("Training Random Forest Classifier...")
clf = RandomForestClassifier(n_estimators=100, criterion='gini', max_depth=None, random_state=42)
clf.fit(trainX, trainY)

print("Predicting test data...")
y_pred = clf.predict(testX)

accuracy = accuracy_score(testY, y_pred)
print(f"Random Forest Accuracy: {accuracy:.4f}")

Loading Features from img_ColourHist_LBP_Hist_Features/img_ColourHist_LBP_Hist_Features/Training: 100%|██████████| 160/160 [15:13<00:00,  5.71s/folder]
Loading Features from img_ColourHist_LBP_Hist_Features/img_ColourHist_LBP_Hist_Features/Testing: 100%|██████████| 159/159 [05:14<00:00,  1.98s/folder]


Training Features Shape: (79921, 70)
Testing Features Shape: (26668, 70)
Number of Labels: 132
Training Random Forest Classifier...
Predicting test data...
Random Forest Accuracy: 0.9910


In [7]:
from joblib import dump

# Save the model to a .pkl file
dump(clf, 'random_forest_model.pkl')
print(f"random forest model successfully saved to random_forest_model.pkl")
# Save the scaler to a .pkl file
dump(scaler, 'scaler.pkl')
print(f"Scaler successfully saved to scaler.pkl")

random forest model successfully saved to random_forest_model.pkl
Scaler successfully saved to scaler.pkl


In [8]:
from joblib import load
loaded_model = load('random_forest_model.pkl')
print("Model loaded successfully.")
# Predicting with the loaded model
y_pred_loaded = loaded_model.predict(testX)
accuracy_loaded = accuracy_score(testY, y_pred_loaded)
print(f"Loaded Model Accuracy: {accuracy_loaded:.4f}")


Model loaded successfully.
Loaded Model Accuracy: 0.9910


In [2]:
from joblib import load
import numpy as np
import cv2
from skimage.feature import hog, local_binary_pattern as lbp
from PIL import Image

# Load model and scaler
clf = load('random_forest_model.pkl')  
scaler = load('scaler.pkl')

In [19]:
from skimage import exposure
# Function to load image data and convert to numpy array
def convertImgToNumpyArr(image_path):
    try:
        img = Image.open(image_path)  # creating reference variable img to access image data
        img = img.resize((100, 100))  # resize image to 100x100 in case it is not
        img = np.array(img)  # converting image data from JPG to numpy array
        #img = img / 255.0  # normalizing RGB values (cv2 needs 0-255 range)
        return img
    except Exception as e:
        print(f"Error processing image {image_path}: {e}")  # error handling
        return None

# Function to compute colour histogram features for an image

def computeColourHist(image, bins=20):

    # conversion from RGB to HSV
    hsvImg = cv2.cvtColor(image, cv2.COLOR_RGB2HSV)
    
    hsvImg = hsvImg / 255.0  # normalizing
    
    # extracting histograms for Hue, Saturation, and Value
    hueHist = np.histogram(hsvImg[:,:,0], bins=bins, range=(0, 1))[0]  
    satHist = np.histogram(hsvImg[:,:,1], bins=bins, range=(0, 1))[0]  
    valHist = np.histogram(hsvImg[:,:,2], bins=bins, range=(0, 1))[0]  
    
    # concatenating the histograms of Hue, Saturation, and Value and returning
    return np.concatenate((hueHist, satHist, valHist))

def computeLBP_Features(image):

     # conversion to grayscale
    gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)

    # computing LBP features
    lbpImg = lbp(gray, method="uniform", P = 8, R = 1) 
    
    bins = np.arange(0, 11) # number of bins for histogram (10)
    lbpHist, _ = np.histogram(lbpImg.flatten(), bins=bins, range=(0, 10)) # computing histogram

    return lbpHist / (np.sum(lbpHist) + 1e-6)

def extract_features(image_path):
    img = convertImgToNumpyArr(image_path)
    ch_features = computeColourHist(img)
    lbp_features = computeLBP_Features(img)
    return np.concatenate([ch_features, lbp_features])  # Combine HOG + LBP

In [None]:
# Path to new image
new_image_path = "apple2.png"  # Change this to your image path

# Extract features (HOG + LBP only)
features = extract_features(new_image_path).reshape(1, -1)  # Reshape to 2D array (1 sample, n features)

# Scale features (use the same scaler!)
scaled_features = scaler.transform(features)

# Predict
predicted_class = clf.predict(scaled_features)[0]
print(f"Predicted class: {predicted_class}")

Predicted class: Apple
