## **Import Libraries** ##

In [2]:
import os
import numpy as np
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from tqdm import tqdm  # for the progress bar

## **Loading Feature Vectors and Their Labels** ##

In [3]:
# Function to Load Feature Vectors and Labels
def loadFeaturesAndLabels(features_path):

    features = [] # list to store the feature vectors
    labels = [] # list to store the labels
    
    # checking if the path exists
    if not os.path.exists(features_path):
        raise ValueError(f"Path {features_path} does not exist.")
    # checking if the path is a directory
    if not os.path.isdir(features_path):
        raise ValueError(f"Path {features_path} is not a directory.")
    
    # going through through the folders and load the features
    for fruit_folder in tqdm(os.listdir(features_path), unit="folder", desc=f"Loading Features from {features_path}"):

        fruit_folder_path = os.path.join(features_path, fruit_folder) # creating complete path of the fruit folder
        
        if os.path.isdir(fruit_folder_path):  # checking if it's an valid path to a folder
            for featureVectorFile in os.listdir(fruit_folder_path):

                feature_file_path = os.path.join(fruit_folder_path, featureVectorFile) # creating complete path of the feature file
                
                if featureVectorFile.endswith('.npy'):  # checking if its a valid feature file
                    labels.append(fruit_folder[:-2].strip())  # the folder name is the label (not taking the numbers at the end)

                    featureVector = np.load(feature_file_path)
                    features.append(featureVector)
                    
                    
    return np.array(features), np.array(labels)

## **SVM 1 (only Colour Histogram Features)** ##

In [3]:
# loading the train and test feature data (only colour histogram features)
trainFeatures1, trainLabels1 = loadFeaturesAndLabels('../features/img_ColourHist_Features/Training')
testFeatures1, testLabels1 = loadFeaturesAndLabels('../features/img_ColourHist_Features/Testing')

print(f"Training Features Shape: {trainFeatures1.shape}")
print(f"Testing Features Shape: {testFeatures1.shape}")
print(f"Number of Labels: {len(np.unique(trainLabels1))}")

# training
SVM_1 = SVC(kernel='linear')  # linear SVM classifier
SVM_1.fit(trainFeatures1, trainLabels1)

# predicting on test data
testPredictions1 = SVM_1.predict(testFeatures1)

# finding and printing the accuracy
accuracy1 = accuracy_score(testLabels1, testPredictions1)
print(f"Test Accuracy: {accuracy1 * 100:.2f}%")


Loading Features from ../features/img_ColourHist_Features/Training: 100%|██████████| 160/160 [00:05<00:00, 28.10folder/s]
Loading Features from ../features/img_ColourHist_Features/Testing: 100%|██████████| 159/159 [00:01<00:00, 84.97folder/s]


Training Features Shape: (79921, 60)
Testing Features Shape: (26668, 60)
Number of Labels: 132
Test Accuracy: 98.05%


In [4]:
import joblib

# saving SVM Model 1
model_filename1 = '../saved_models/SVM_Model_1.pkl'
joblib.dump(SVM_1, model_filename1)
print(f"Model 1 successfully saved to {model_filename1}")

Model 1 successfully saved to ../saved_models/SVM_Model_1.pkl


## **SVM 2 (Colour Histogram and HOG Features)** ##

In [5]:
# loading the train and test feature data (HOG and Colour Histogram features)
trainFeatures2, trainLabels2 = loadFeaturesAndLabels('../features/img_HOG_ColourHist_Features/Training')
testFeatures2, testLabels2 = loadFeaturesAndLabels('../features/img_HOG_ColourHist_Features/Testing')

print(f"Training Features Shape: {trainFeatures2.shape}")
print(f"Testing Features Shape: {testFeatures2.shape}")
print(f"Number of Labels: {len(np.unique(trainLabels2))}")

# training
SVM_2 = SVC(kernel='linear')  # linear SVM classifier
SVM_2.fit(trainFeatures2, trainLabels2)

# predicting on test data
testPredictions2 = SVM_2.predict(testFeatures2)

# finding and printing the accuracy
accuracy2 = accuracy_score(testLabels2, testPredictions2)
print(f"Test Accuracy: {accuracy2 * 100:.2f}%")

Loading Features from ../features/img_HOG_ColourHist_Features/Training: 100%|██████████| 160/160 [00:05<00:00, 30.08folder/s]
Loading Features from ../features/img_HOG_ColourHist_Features/Testing: 100%|██████████| 159/159 [00:01<00:00, 88.97folder/s]


Training Features Shape: (79921, 960)
Testing Features Shape: (26668, 960)
Number of Labels: 132
Test Accuracy: 98.05%


In [6]:
import joblib

# saving SVM Model 2
model_filename2 = '../saved_models/SVM_Model_2.pkl'
joblib.dump(SVM_2, model_filename2)
print(f"Model 2 successfully saved to {model_filename2}")

Model 2 successfully saved to ../saved_models/SVM_Model_2.pkl


## **PCA + SVM (PCA on Colour Histogram and HOG Features)** ##

In [4]:
from sklearn.decomposition import PCA

# loading the train and test feature data (HOG and Colour Histogram features)
trainFeatures3, trainLabelsPCA = loadFeaturesAndLabels('../features/img_HOG_ColourHist_Features/Training')
testFeatures3, testLabelsPCA = loadFeaturesAndLabels('../features/img_HOG_ColourHist_Features/Testing')

print(f"Training Features Shape: {trainFeatures3.shape}")
print(f"Testing Features Shape: {testFeatures3.shape}")
print(f"Number of Labels: {len(np.unique(trainLabelsPCA))}")

# applying PCA
pca = PCA(n_components=60)
trainFeaturesPCA = pca.fit_transform(trainFeatures3)
testFeaturesPCA = pca.transform(testFeatures3)

print(f"PCA Training Features Shape: {trainFeaturesPCA.shape}")
print(f"PCA Testing Features Shape: {testFeaturesPCA.shape}")

# training
PCA_SVM = SVC(kernel='linear')  # linear SVM classifier
PCA_SVM.fit(trainFeaturesPCA, trainLabelsPCA)

# predicting on test data
testPredictions3 = PCA_SVM.predict(testFeaturesPCA)

# finding and printing the accuracy
accuracy3 = accuracy_score(testLabelsPCA, testPredictions3)
print(f"Test Accuracy: {accuracy3 * 100:.2f}%")

Loading Features from ../features/img_HOG_ColourHist_Features/Training: 100%|██████████| 160/160 [00:04<00:00, 37.96folder/s]
Loading Features from ../features/img_HOG_ColourHist_Features/Testing: 100%|██████████| 159/159 [00:01<00:00, 140.78folder/s]


Training Features Shape: (79921, 960)
Testing Features Shape: (26668, 960)
Number of Labels: 132
PCA Training Features Shape: (79921, 60)
PCA Testing Features Shape: (26668, 60)
Test Accuracy: 98.05%


In [5]:
import joblib

# saving PCA + SVM Model 1
model_filename3 = '../saved_models/PCA_SVM_Model_1.pkl'
joblib.dump(PCA_SVM, model_filename3)
print(f"Model 3 successfully saved to {model_filename3}")

Model 3 successfully saved to ../saved_models/PCA_SVM_Model_1.pkl
