## **Import Libraries** ##

In [13]:
import os
import numpy as np
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from tqdm import tqdm  # for the progress bar
import joblib

## **Loading Feature Vectors and Their Labels** ##

In [14]:
# Function to Load Feature Vectors and Labels
def loadFeaturesAndLabels(features_path):

    features = [] # list to store the feature vectors
    labels = [] # list to store the labels
    
    # checking if the path exists
    if not os.path.exists(features_path):
        raise ValueError(f"Path {features_path} does not exist.")
    # checking if the path is a directory
    if not os.path.isdir(features_path):
        raise ValueError(f"Path {features_path} is not a directory.")
    
    # going through through the folders and load the features
    for fruit_folder in tqdm(os.listdir(features_path), unit="folder", desc=f"Loading Features from {features_path}"):

        fruit_folder_path = os.path.join(features_path, fruit_folder) # creating complete path of the fruit folder
        
        if os.path.isdir(fruit_folder_path):  # checking if it's an valid path to a folder
            for featureVectorFile in os.listdir(fruit_folder_path):

                feature_file_path = os.path.join(fruit_folder_path, featureVectorFile) # creating complete path of the feature file
                
                if featureVectorFile.endswith('.npy'):  # checking if its a valid feature file
                    labels.append(fruit_folder[:-2].strip())  # the folder name is the label (not taking the numbers at the end)

                    featureVector = np.load(feature_file_path)
                    features.append(featureVector)
                    
                    
    return np.array(features), np.array(labels)

In [3]:
# loading the train and test feature data (only colour histogram features)
trainFeatures, trainLabels = loadFeaturesAndLabels('../features/img_ColourHist_Features/Training')
testFeatures, testLabels = loadFeaturesAndLabels('../features/img_ColourHist_Features/Testing')

Loading Features from ../features/img_ColourHist_Features/Training: 100%|██████████| 160/160 [00:08<00:00, 18.81folder/s]
Loading Features from ../features/img_ColourHist_Features/Testing: 100%|██████████| 159/159 [00:03<00:00, 52.71folder/s]


In [4]:
print(f"Training Features Shape: {trainFeatures.shape}")
print(f"Testing Features Shape: {testFeatures.shape}")
print(f"Number of Labels: {len(np.unique(trainLabels))}")

Training Features Shape: (79921, 60)
Testing Features Shape: (26668, 60)
Number of Labels: 132


## **SVM 1 (only Colour Histogram Features)** ##

In [5]:
# training
SVM_1 = SVC(kernel='linear')  # linear SVM classifier
SVM_1.fit(trainFeatures, trainLabels)

# predicting on test data
testPredictions1 = SVM_1.predict(testFeatures)

# finding and printing the accuracy
accuracy1 = accuracy_score(testLabels, testPredictions1)
print(f"Linear SVM Test Accuracy: {accuracy1 * 100:.2f}%")


Linear SVM Test Accuracy: 98.05%


In [6]:
# saving SVM Model 1
model_filename1 = '../saved_models/SVM_Model_1.pkl'
joblib.dump(SVM_1, model_filename1)
print(f"Model 1 successfully saved to {model_filename1}")

Model 1 successfully saved to ../saved_models/SVM_Model_1.pkl


## **SVM 2 (Polynomial SVM)** ##

In [7]:
# training
SVM_2 = SVC(kernel='poly')  # polynomial SVM classifier
SVM_2.fit(trainFeatures, trainLabels)

# predicting on test data
testPredictions2 = SVM_2.predict(testFeatures)

# finding and printing the accuracy
accuracy2 = accuracy_score(testLabels, testPredictions2)
print(f"Polynomial SVM Test Accuracy: {accuracy2 * 100:.2f}%")

Polynomial SVM Test Accuracy: 97.25%


In [8]:
# saving SVM Model 2
model_filename2 = '../saved_models/SVM_Model_2.pkl'
joblib.dump(SVM_2, model_filename2)
print(f"Model 2 successfully saved to {model_filename2}")

Model 2 successfully saved to ../saved_models/SVM_Model_2.pkl


## **SVM 3 (RBF SVM)** ##

In [9]:
# training
SVM_3 = SVC(kernel='rbf')  # polynomial SVM classifier
SVM_3.fit(trainFeatures, trainLabels)

# predicting on test data
testPredictions3 = SVM_3.predict(testFeatures)

# finding and printing the accuracy
accuracy3 = accuracy_score(testLabels, testPredictions3)
print(f"RBF SVM Test Accuracy: {accuracy3 * 100:.2f}%")

RBF SVM Test Accuracy: 96.56%


In [10]:
# saving SVM Model 3
model_filename3 = '../saved_models/SVM_Model_3.pkl'
joblib.dump(SVM_3, model_filename3)
print(f"Model 3 successfully saved to {model_filename3}")

Model 3 successfully saved to ../saved_models/SVM_Model_3.pkl


## **PCA + Linear SVM 1 (PCA on Colour Histogram and HOG Features)** ##

In [11]:
from sklearn.decomposition import PCA

# loading the train and test feature data (HOG and Colour Histogram features)
trainFeatures2, trainLabelsPCA = loadFeaturesAndLabels('../features/img_HOG_ColourHist_Features/Training')
testFeatures2, testLabelsPCA = loadFeaturesAndLabels('../features/img_HOG_ColourHist_Features/Testing')

print(f"Training Features Shape: {trainFeatures2.shape}")
print(f"Testing Features Shape: {testFeatures2.shape}")
print(f"Number of Labels: {len(np.unique(trainLabelsPCA))}")

# applying PCA
pca = PCA(n_components=30)
trainFeaturesPCA = pca.fit_transform(trainFeatures2)
testFeaturesPCA = pca.transform(testFeatures2)

print(f"PCA Training Features Shape: {trainFeaturesPCA.shape}")
print(f"PCA Testing Features Shape: {testFeaturesPCA.shape}")

# training
PCA_SVM = SVC(kernel='linear')  # linear SVM classifier
PCA_SVM.fit(trainFeaturesPCA, trainLabelsPCA)

# predicting on test data
testPredictionsPCA = PCA_SVM.predict(testFeaturesPCA)

# finding and printing the accuracy
accuracyPCA = accuracy_score(testLabelsPCA, testPredictionsPCA)
print(f"PCA + Linear SVM Test Accuracy: {accuracyPCA * 100:.2f}%")

Loading Features from ../features/img_HOG_ColourHist_Features/Training: 100%|██████████| 160/160 [00:08<00:00, 18.62folder/s]
Loading Features from ../features/img_HOG_ColourHist_Features/Testing: 100%|██████████| 159/159 [00:02<00:00, 56.81folder/s]


Training Features Shape: (79921, 960)
Testing Features Shape: (26668, 960)
Number of Labels: 132
PCA Training Features Shape: (79921, 30)
PCA Testing Features Shape: (26668, 30)
PCA + Linear SVM Test Accuracy: 97.98%


In [12]:
# saving PCA + SVM Model 1
model_filename_PCA = '../saved_models/PCA_SVM_Model_1.pkl'
joblib.dump(PCA_SVM, model_filename_PCA)
print(f"PCA + SVM Model successfully saved to {model_filename_PCA}")

PCA + SVM Model successfully saved to ../saved_models/PCA_SVM_Model_1.pkl


## **PCA + Linear SVM (PCA on Colour Histogram and LBP Histogram Features)** ##

In [None]:
from sklearn.decomposition import PCA

# loading the train and test feature data (LBP Histogram and Colour Histogram features)
trainFeatures3, trainLabelsPCA2 = loadFeaturesAndLabels('../features/img_ColourHist_LBP_Hist_Features/Training')
testFeatures3, testLabelsPCA2 = loadFeaturesAndLabels('../features/img_ColourHist_LBP_Hist_Features/Testing')

print(f"Training Features Shape: {trainFeatures3.shape}")
print(f"Testing Features Shape: {testFeatures3.shape}")
print(f"Number of Labels: {len(np.unique(trainLabelsPCA2))}")

# applying PCA
pca = PCA(n_components=70)
trainFeaturesPCA2 = pca.fit_transform(trainFeatures3)
testFeaturesPCA2 = pca.transform(testFeatures3)

print(f"PCA Training Features Shape: {trainFeaturesPCA2.shape}")
print(f"PCA Testing Features Shape: {testFeaturesPCA2.shape}")

# training
PCA_SVM_2 = SVC(kernel='linear')  # linear SVM classifier
PCA_SVM_2.fit(trainFeaturesPCA2, trainLabelsPCA2)

# predicting on test data
testPredictionsPCA2 = PCA_SVM_2.predict(testFeaturesPCA2)

# finding and printing the accuracy
accuracyPCA2 = accuracy_score(testLabelsPCA2, testPredictionsPCA2)
print(f"PCA + Linear SVM 2 Test Accuracy: {accuracyPCA2 * 100:.2f}%")

Loading Features from ../features/img_ColourHist_LBP_Hist_Features/Training: 100%|██████████| 160/160 [00:08<00:00, 19.88folder/s]
Loading Features from ../features/img_ColourHist_LBP_Hist_Features/Testing: 100%|██████████| 159/159 [00:02<00:00, 59.53folder/s]


Training Features Shape: (79921, 70)
Testing Features Shape: (26668, 70)
Number of Labels: 132
PCA Training Features Shape: (79921, 30)
PCA Testing Features Shape: (26668, 30)
PCA + Linear SVM 2 Test Accuracy: 97.98%


In [None]:
# saving PCA + SVM Model 2
model_filename_PCA_2 = '../saved_models/PCA_SVM_Model_2.pkl'
joblib.dump(PCA_SVM_2, model_filename_PCA_2)
print(f"PCA + SVM Model successfully saved to {model_filename_PCA_2}")

PCA + SVM Model successfully saved to ../saved_models/PCA_SVM_Model_1.pkl
