# **Analysing the Best LDA + SVM Model** #

## **Import Libraries** ##

In [1]:
import os
from tqdm import tqdm  # for the progress bar

import numpy as np
from sklearn.metrics import accuracy_score

import joblib

## **Loading Test Features and Labels** ##

In [2]:
# Function to Load Feature Vectors and Labels
def loadFeaturesAndLabels(features_path):

    features = [] # list to store the feature vectors
    labels = [] # list to store the labels
    
    # checking if the path exists
    if not os.path.exists(features_path):
        raise ValueError(f"Path {features_path} does not exist.")
    # checking if the path is a directory
    if not os.path.isdir(features_path):
        raise ValueError(f"Path {features_path} is not a directory.")
    
    # going through through the folders and load the features
    for fruit_folder in tqdm(os.listdir(features_path), unit="folder", desc=f"Loading Features from {features_path}"):

        fruit_folder_path = os.path.join(features_path, fruit_folder) # creating complete path of the fruit folder
        
        if os.path.isdir(fruit_folder_path):  # checking if it's an valid path to a folder
            for featureVectorFile in os.listdir(fruit_folder_path):

                feature_file_path = os.path.join(fruit_folder_path, featureVectorFile) # creating complete path of the feature file
                
                if featureVectorFile.endswith('.npy'):  # checking if its a valid feature file
                    labels.append(fruit_folder[:-2].strip())  # the folder name is the label (not taking the numbers at the end)

                    featureVector = np.load(feature_file_path)
                    features.append(featureVector)
                    
                    
    return np.array(features), np.array(labels)

In [3]:
testFeatures, testLabels = loadFeaturesAndLabels('../features/img_HOG_ColourHist_Features/Testing')
print(f"Testing Features Shape: {testFeatures.shape}")
print(f"Number of Labels: {len(np.unique(testLabels))}")

Loading Features from ../features/img_HOG_ColourHist_Features/Testing: 100%|██████████| 160/160 [00:03<00:00, 52.07folder/s]


Testing Features Shape: (26668, 960)
Number of Labels: 131


## **Loading LDA Model 3** ##

In [None]:
# loading LDA Model 3
lda_model_filename = '../saved_models/LDA_Model_3.pkl'
LDA_Model = joblib.load(lda_model_filename)

# number of features in LDA Model 3
print(f"Number of Features in LDA: {LDA_Model.scalings_.shape[1]}")

# predicitng using LDA Model 3
testPredictions_LDA = LDA_Model.predict(testFeatures)

# finding the accuracy
accuracy = accuracy_score(testLabels, testPredictions_LDA)
print(f"LDA Classifier Accuracy: {accuracy*100:.2f}%")

Number of Features in LDA: 131
LDA Classifier Accuracy: 94.64%


## **Analyzing LDA + SVM Model 3** ##

In [None]:
# loading LDA + SVM Model 3
lda_svm_model_filename = '../saved_models/LDA_SVM_Model_3.pkl'
LDA_SVM_Model = joblib.load(lda_svm_model_filename)

# transforming the features using LDA Model 3 for LDA + SVM Model 3
testFeaturesLDA = LDA_Model.transform(testFeatures)
print(f"Number of Test Samples: {testFeaturesLDA.shape[0]}")
print(f"Numder of Features in LDA + SVM Model: {testFeaturesLDA.shape[1]}")

# predicitng using LDA + SVM Model 3
testPredictions_LDA_SVM = LDA_SVM_Model.predict(testFeaturesLDA)

# finding the accuracy
accuracy = accuracy_score(testLabels, testPredictions_LDA_SVM)
print(f"Best LDA + SVM Classifier Test Accuracy: {accuracy*100:.2f}%")

LDA + SVM Classifier Accuracy: 98.90%
