## **Import Libraries** ##

In [1]:
import os
import numpy as np
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from tqdm import tqdm  # for the progress bar
import joblib

## **Loading Feature Vectors and Their Labels** ##

In [3]:
# Function to Load Feature Vectors and Labels
def loadFeaturesAndLabels(features_path):

    features = [] # list to store the feature vectors
    labels = [] # list to store the labels
    
    # checking if the path exists
    if not os.path.exists(features_path):
        raise ValueError(f"Path {features_path} does not exist.")
    # checking if the path is a directory
    if not os.path.isdir(features_path):
        raise ValueError(f"Path {features_path} is not a directory.")
    
    # going through through the folders and load the features
    for fruit_folder in tqdm(os.listdir(features_path), unit="folder", desc=f"Loading Features from {features_path}"):

        fruit_folder_path = os.path.join(features_path, fruit_folder) # creating complete path of the fruit folder
        
        if os.path.isdir(fruit_folder_path):  # checking if it's an valid path to a folder
            for featureVectorFile in os.listdir(fruit_folder_path):

                feature_file_path = os.path.join(fruit_folder_path, featureVectorFile) # creating complete path of the feature file
                
                if featureVectorFile.endswith('.npy'):  # checking if its a valid feature file
                    labels.append(fruit_folder[:-2].strip())  # the folder name is the label (not taking the numbers at the end)

                    featureVector = np.load(feature_file_path)
                    features.append(featureVector)
                    
                    
    return np.array(features), np.array(labels)

## **SVM (Linear, Poly, RBF) Using Colour Histogram Features** ##

### **Loading Features and Labels** ###

In [15]:
# loading the train and test feature data (only colour histogram features)
trainFeatures, trainLabels = loadFeaturesAndLabels('../features/img_ColourHist_Features/Training')
testFeatures, testLabels = loadFeaturesAndLabels('../features/img_ColourHist_Features/Testing')

Loading Features from ../features/img_ColourHist_Features/Training: 100%|██████████| 160/160 [00:07<00:00, 20.17folder/s]
Loading Features from ../features/img_ColourHist_Features/Testing: 100%|██████████| 159/159 [00:02<00:00, 54.85folder/s]


In [5]:
print(f"Training Features Shape: {trainFeatures.shape}")
print(f"Testing Features Shape: {testFeatures.shape}")
print(f"Number of Labels: {len(np.unique(trainLabels))}")

Training Features Shape: (79921, 60)
Testing Features Shape: (26668, 60)
Number of Labels: 132


### **SVM 1 (Linear)** ###

In [5]:
# training
SVM_1 = SVC(kernel='linear')  # linear SVM classifier
SVM_1.fit(trainFeatures, trainLabels)

# predicting on test data
testPredictions1 = SVM_1.predict(testFeatures)

# finding and printing the accuracy
accuracy1 = accuracy_score(testLabels, testPredictions1)
print(f"Linear SVM Test Accuracy: {accuracy1 * 100:.2f}%")


Linear SVM Test Accuracy: 98.05%


In [6]:
# saving SVM Model 1
model_filename1 = '../saved_models/SVM_Model_1.pkl'
joblib.dump(SVM_1, model_filename1)
print(f"Model 1 successfully saved to {model_filename1}")

Model 1 successfully saved to ../saved_models/SVM_Model_1.pkl


### **SVM 2 (Polynomial SVM)** ###

In [7]:
# training
SVM_2 = SVC(kernel='poly')  # polynomial SVM classifier
SVM_2.fit(trainFeatures, trainLabels)

# predicting on test data
testPredictions2 = SVM_2.predict(testFeatures)

# finding and printing the accuracy
accuracy2 = accuracy_score(testLabels, testPredictions2)
print(f"Polynomial SVM Test Accuracy: {accuracy2 * 100:.2f}%")

Polynomial SVM Test Accuracy: 97.25%


In [8]:
# saving SVM Model 2
model_filename2 = '../saved_models/SVM_Model_2.pkl'
joblib.dump(SVM_2, model_filename2)
print(f"Model 2 successfully saved to {model_filename2}")

Model 2 successfully saved to ../saved_models/SVM_Model_2.pkl


### **SVM 3 (RBF SVM)** ###

In [None]:
# training
SVM_3 = SVC(kernel='rbf')  # RBF SVM classifier
SVM_3.fit(trainFeatures, trainLabels)

# predicting on test data
testPredictions3 = SVM_3.predict(testFeatures)

# finding and printing the accuracy
accuracy3 = accuracy_score(testLabels, testPredictions3)
print(f"RBF SVM Test Accuracy: {accuracy3 * 100:.2f}%")

RBF SVM Test Accuracy: 96.56%


In [10]:
# saving SVM Model 3
model_filename3 = '../saved_models/SVM_Model_3.pkl'
joblib.dump(SVM_3, model_filename3)
print(f"Model 3 successfully saved to {model_filename3}")

Model 3 successfully saved to ../saved_models/SVM_Model_3.pkl


## **SVM (Linear) Using HOG Features** ##

### **Loading Features and Labels** ###

In [5]:
# loading the train and test feature data (only HOG features)
trainX_HOG, trainY_HOG = loadFeaturesAndLabels('../features/img_HOG_Features/Training')
testX_HOG, testY_HOG = loadFeaturesAndLabels('../features/img_HOG_Features/Testing')

print(f"Training Features Shape: {trainX_HOG.shape}")
print(f"Testing Features Shape: {testX_HOG.shape}")
print(f"Number of Labels: {len(np.unique(trainY_HOG))}")

Loading Features from ../features/img_HOG_Features/Training: 100%|██████████| 160/160 [00:06<00:00, 22.96folder/s]
Loading Features from ../features/img_HOG_Features/Testing: 100%|██████████| 159/159 [00:02<00:00, 63.08folder/s]

Training Features Shape: (79921, 900)
Testing Features Shape: (26668, 900)
Number of Labels: 132





### **SVM 5 (Linear, HOG Features)** ###

In [6]:
# training
SVM_5 = SVC(kernel='linear')  # linear SVM classifier
SVM_5.fit(trainX_HOG, trainY_HOG)

# predicting on test data

batchSize = 10  
totalBatches = int(np.ceil(len(testX_HOG) / batchSize))

testPred_HOG = []

print("Predicting HOG Test Data...")
for i in tqdm(range(totalBatches), desc="Predicting Batches", unit="batch"):
    # calculating the start and end index for the current batch
    startIdx = i * batchSize
    endIdx = min((i + 1) * batchSize, len(testX_HOG))
    # predicting the current batch
    batchPreds = SVM_5.predict(testX_HOG[startIdx:endIdx])
    testPred_HOG.extend(batchPreds)

testPred_HOG = np.array(testPred_HOG)

# finding and printing the accuracy
accuracyHOG = accuracy_score(testY_HOG, testPred_HOG)
print(f"Linear SVM with HOG Test Accuracy: {accuracyHOG * 100:.2f}%")

Predicting HOG Test Data...


Predicting Batches: 100%|██████████| 2667/2667 [15:16<00:00,  2.91batch/s]

Linear SVM with HOG Test Accuracy: 91.65%





In [7]:
# saving SVM Model 5
model_filename5 = '../saved_models/SVM_Model_5.pkl'
joblib.dump(SVM_5, model_filename5)
print(f"Model 5 successfully saved to {model_filename5}")

Model 5 successfully saved to ../saved_models/SVM_Model_5.pkl


## **SVM (Linear) Using LBP Histogram Features** ##

### **Loading Features and Labels** ###

In [None]:
# loading the train and test feature data (only LBP histogram features)
trainX_HOG, trainY_HOG = loadFeaturesAndLabels('../features/img_LBP_Hist_Features/Training')
testX_HOG, testY_HOG = loadFeaturesAndLabels('../features/img_LBP_Hist_Features/Testing')

print(f"Training Features Shape: {trainX_HOG.shape}")
print(f"Testing Features Shape: {testX_HOG.shape}")
print(f"Number of Labels: {len(np.unique(trainY_HOG))}")

Loading Features from ../features/img_LBP_Hist_Features/Training: 100%|██████████| 160/160 [00:08<00:00, 19.64folder/s]
Loading Features from ../features/img_LBP_Hist_Features/Testing: 100%|██████████| 159/159 [00:02<00:00, 58.28folder/s]


Training Features Shape: (79921, 10)
Testing Features Shape: (26668, 10)
Number of Labels: 132


### **SVM 4 (Linear, LBP Histogram Features)** ###

In [None]:
# training
SVM_5 = SVC(kernel='linear')  # linear SVM classifier
SVM_5.fit(trainX_HOG, trainY_HOG)

# predicting on test data

batchSize = 10  
totalBatches = int(np.ceil(len(testX_HOG) / batchSize))

testPred_HOG = []

print("Predicting LBP Test Data...")
for i in tqdm(range(totalBatches), desc="Predicting Batches", unit="batch"):
    # calculating the start and end index for the current batch
    startIdx = i * batchSize
    endIdx = min((i + 1) * batchSize, len(testX_HOG))
    # predicting the current batch
    batchPreds = SVM_5.predict(testX_HOG[startIdx:endIdx])
    testPred_HOG.extend(batchPreds)

testPred_HOG = np.array(testPred_HOG)

# finding and printing the accuracy
accuracyHOG = accuracy_score(testY_HOG, testPred_HOG)
print(f"Linear SVM with LBP Histogram Test Accuracy: {accuracyHOG * 100:.2f}%")

Predicting LBP Test Data...


Predicting Batches: 100%|██████████| 2667/2667 [07:35<00:00,  5.86batch/s]

Linear SVM with LBP Histogram Test Accuracy: 18.10%





In [None]:
# saving SVM Model 4
model_filename4 = '../saved_models/SVM_Model_4.pkl'
joblib.dump(SVM_5, model_filename4)
print(f"Model 4 successfully saved to {model_filename4}")

Model 4 successfully saved to ../saved_models/SVM_Model_4.pkl


## **SVM (Linear) Using 2 Feature Descriptors** ##

### **SVM 6 (Colour Histogram + HOG Features, Linear)** ###

In [4]:
# loading the train and test feature data (HOG and Colour Histogram features)
trainX, trainY = loadFeaturesAndLabels('../features/img_HOG_ColourHist_Features/Training')
testX, testY = loadFeaturesAndLabels('../features/img_HOG_ColourHist_Features/Testing')

# using StandardScaler to standardize the features
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
trainX = scaler.fit_transform(trainX)
testX = scaler.transform(testX)

print(f"Training Features Shape: {trainX.shape}")
print(f"Testing Features Shape: {testX.shape}")
print(f"Number of Labels: {len(np.unique(trainY))}")

# training
SVM_6 = SVC(kernel='linear')  # linear SVM classifier
SVM_6.fit(trainX, trainY)

# predicting on test data
print("Predicting Test Data...")
batchSize = 10  
totalBatches = int(np.ceil(len(testX) / batchSize))

testPred6 = []

for i in tqdm(range(totalBatches), desc="Predicting Batches", unit="batch"):
    # calculating the start and end index for the current batch
    startIdx = i * batchSize
    endIdx = min((i + 1) * batchSize, len(testX))
    # predicting the current batch
    batchPreds = SVM_6.predict(testX[startIdx:endIdx])
    testPred6.extend(batchPreds)

testPred6 = np.array(testPred6)

# finding and printing the accuracy
accuracy = accuracy_score(testY, testPred6)
print(f"Linear SVM on HOG and Colour Histogram Test Accuracy: {accuracy * 100:.2f}%")

Loading Features from ../features/img_HOG_ColourHist_Features/Training: 100%|██████████| 160/160 [00:06<00:00, 23.53folder/s]
Loading Features from ../features/img_HOG_ColourHist_Features/Testing: 100%|██████████| 159/159 [00:02<00:00, 60.38folder/s]


Training Features Shape: (79921, 960)
Testing Features Shape: (26668, 960)
Number of Labels: 132
Predicting Test Data...


Predicting Batches: 100%|██████████| 2667/2667 [18:52<00:00,  2.36batch/s]

Linear SVM on HOG and Colour Histogram Test Accuracy: 96.70%





In [6]:
# saving PCA + SVM Model 1
model_filename_6 = '../saved_models/SVM_Model_6.pkl'
joblib.dump(SVM_6, model_filename_6)
print(f"SVM Model 6 successfully sved to {model_filename_6}")

SVM Model 6 successfully sved to ../saved_models/SVM_Model_6.pkl


### **SVM 7 (Colour Histogram + LBP Histogram Features, Linear)** ###

In [7]:
# loading the train and test feature data (LBP Histogram and Colour Histogram features)
trainX, trainY = loadFeaturesAndLabels('../features/img_ColourHist_LBP_Hist_Features/Training')
testX, testY = loadFeaturesAndLabels('../features/img_ColourHist_LBP_Hist_Features/Testing')

# using StandardScaler to standardize the features
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
trainX = scaler.fit_transform(trainX)
testX = scaler.transform(testX)

print(f"Training Features Shape: {trainX.shape}")
print(f"Testing Features Shape: {testX.shape}")
print(f"Number of Labels: {len(np.unique(trainY))}")

# training
SVM_7 = SVC(kernel='linear')  # linear SVM classifier
SVM_7.fit(trainX, trainY)

# predicting on test data
testPredictions7 = SVM_7.predict(testX)

# finding and printing the accuracy
accuracy7 = accuracy_score(testY, testPredictions7)
print(f"Linear SVM on LBP + Colour Histogram Test Accuracy: {accuracy7 * 100:.2f}%")

Loading Features from ../features/img_ColourHist_LBP_Hist_Features/Training: 100%|██████████| 160/160 [00:10<00:00, 15.94folder/s]
Loading Features from ../features/img_ColourHist_LBP_Hist_Features/Testing: 100%|██████████| 159/159 [00:02<00:00, 54.91folder/s]


Training Features Shape: (79921, 70)
Testing Features Shape: (26668, 70)
Number of Labels: 132
Linear SVM on LBP + Colour Histogram Test Accuracy: 98.87%


In [11]:
# saving PCA + SVM Model 2
model_filename_7 = '../saved_models/SVM_Model_7.pkl'
joblib.dump(SVM_7, model_filename_7)
print(f"SVM Model 7 successfully saved to {model_filename_7}")

SVM Model 7 successfully saved to ../saved_models/SVM_Model_7.pkl
