In [1]:
import os
import numpy as np
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from tqdm import tqdm  # for the progress bar
import joblib

In [2]:
# Function to Load Feature Vectors and Labels
def loadFeaturesAndLabels(features_path):

    features = [] # list to store the feature vectors
    labels = [] # list to store the labels
    
    # checking if the path exists
    if not os.path.exists(features_path):
        raise ValueError(f"Path {features_path} does not exist.")
    # checking if the path is a directory
    if not os.path.isdir(features_path):
        raise ValueError(f"Path {features_path} is not a directory.")
    
    # going through through the folders and load the features
    for fruit_folder in tqdm(os.listdir(features_path), unit="folder", desc=f"Loading Features from {features_path}"):

        fruit_folder_path = os.path.join(features_path, fruit_folder) # creating complete path of the fruit folder
        
        if os.path.isdir(fruit_folder_path):  # checking if it's an valid path to a folder
            for featureVectorFile in os.listdir(fruit_folder_path):

                feature_file_path = os.path.join(fruit_folder_path, featureVectorFile) # creating complete path of the feature file
                
                if featureVectorFile.endswith('.npy'):  # checking if its a valid feature file
                    labels.append(fruit_folder[:-2].strip())  # the folder name is the label (not taking the numbers at the end)

                    featureVector = np.load(feature_file_path)
                    features.append(featureVector)
                    
                    
    return np.array(features), np.array(labels)

In [3]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# loading the train and test feature data (only LBP histogram features)
trainX_CH, trainY_CH = loadFeaturesAndLabels('img_ColourHist_Features/img_ColourHist_Features/Training')
testX_CH, testY_CH = loadFeaturesAndLabels('img_ColourHist_Features/img_ColourHist_Features/Testing')

print(f"Training Features Shape: {trainX_CH.shape}")
print(f"Testing Features Shape: {testX_CH.shape}")
print(f"Number of Labels: {len(np.unique(trainY_CH))}")

print("Training Random Forest Classifier...")
clf = RandomForestClassifier(n_estimators=100, criterion='gini', max_depth=None, random_state=42)
clf.fit(trainX_CH, trainY_CH)

print("Predicting test data...")
y_pred = clf.predict(testX_CH)

accuracy = accuracy_score(testY_CH, y_pred)
print(f"Random Forest Accuracy: {accuracy:.4f}")

Loading Features from img_ColourHist_Features/img_ColourHist_Features/Training: 100%|██████████| 160/160 [15:03<00:00,  5.65s/folder]
Loading Features from img_ColourHist_Features/img_ColourHist_Features/Testing: 100%|██████████| 159/159 [05:15<00:00,  1.98s/folder]


Training Features Shape: (79921, 60)
Testing Features Shape: (26668, 60)
Number of Labels: 132
Training Random Forest Classifier...
Predicting test data...
Random Forest Accuracy: 0.9903


In [4]:
from joblib import dump

# Save the model to a .pkl file
dump(clf, 'random_forest_model2.pkl')
print(f"random forest model successfully saved to random_forest_model2.pkl")

random forest model successfully saved to random_forest_model2.pkl
