In [14]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.utils import to_categorical


In [4]:
# Load features and labels
features = np.load('../feature extraction/image_features.npy')  # shape: (num_images, 500)
labels = np.load('../feature extraction/image_labels.npy')        # shape: (num_images,)

# Normalize the features with StandardScaler
scaler = StandardScaler()
features = scaler.fit_transform(features)

# Assuming labels are integers from 0 to (num_categories-1).
num_classes = len(np.unique(labels))

# Convert labels to one-hot encoding
labels_categorical = to_categorical(labels, num_classes=num_classes)

In [None]:
import numpy as np
from scipy.stats import norm
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.utils import to_categorical

# Load features and labels
features = np.load('../feature extraction/image_features.npy')  # shape: (num_images, 500)
labels = np.load('../feature extraction/image_labels.npy')      # shape: (num_images,)

# Normalize the features
scaler = StandardScaler()
features_scaled = scaler.fit_transform(features)

# Convert labels to one-hot encoding if needed later
num_classes = len(np.unique(labels))
labels_categorical = to_categorical(labels, num_classes=num_classes)

# PCA Implementation
def compute_pca(X, variance_retained=0.95):
    X_mean = X - np.mean(X, axis=0)
    covariance_matrix = np.cov(X_mean, rowvar=False)
    eigenvalues, eigenvectors = np.linalg.eigh(covariance_matrix)
    sorted_indices = np.argsort(eigenvalues)[::-1]
    eigenvalues = eigenvalues[sorted_indices]
    eigenvectors = eigenvectors[:, sorted_indices]
    cumulative_variance = np.cumsum(eigenvalues) / np.sum(eigenvalues)
    num_components = np.argmax(cumulative_variance >= variance_retained) + 1
    principal_components = eigenvectors[:, :num_components]
    X_pca = np.dot(X_mean, principal_components)
    return X_pca, principal_components

# Naïve Bayes Implementation
class NaiveBayes:
    def fit(self, X, y):
        self.classes = np.unique(y)
        self.means = {}
        self.stds = {}
        self.priors = {}
        for cls in self.classes:
            X_cls = X[y == cls]
            self.means[cls] = np.mean(X_cls, axis=0)
            self.stds[cls] = np.std(X_cls, axis=0) + 1e-6  # Avoid division by zero
            self.priors[cls] = X_cls.shape[0] / X.shape[0]
    
    def predict(self, X):
        predictions = []
        for x in X:
            posteriors = []
            for cls in self.classes:
                prior = np.log(self.priors[cls])
                likelihood = np.sum(norm.logpdf(x, self.means[cls], self.stds[cls]))
                posteriors.append(prior + likelihood)
            predictions.append(self.classes[np.argmax(posteriors)])
        return np.array(predictions)

# Apply PCA
features_pca, _ = compute_pca(features_scaled, variance_retained=0.95)

# Split into train-test sets (80:20)
X_train, X_test, y_train, y_test = train_test_split(features_pca, labels, test_size=0.2, random_state=42, stratify=labels)

# Train and evaluate Naïve Bayes model
nb_model = NaiveBayes()
nb_model.fit(X_train, y_train)

y_train_pred = nb_model.predict(X_train)
y_test_pred = nb_model.predict(X_test)




Train Accuracy: 0.9240
Test Accuracy: 0.5875


In [13]:
from sklearn.metrics import accuracy_score, classification_report
# Calculate accuracy and classification report
train_accuracy = accuracy_score(y_train, y_train_pred)
X_test_accuracy = accuracy_score(y_test, y_test_pred)
test_report = classification_report(y_test, y_test_pred, output_dict=True)

print(f"Train Accuracy: {train_accuracy:.4f}")
print(f"Test Accuracy: {X_test_accuracy:.4f}")

Train Accuracy: 0.9240
Test Accuracy: 0.5875
