# ==========================================
# 1. IMPORTS & CONFIGURATION
# ==========================================

In [1]:
import os
import numpy as np
import cv2
from skimage.feature import hog
from multiprocessing import Pool, cpu_count
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

## Binary Classification Using Handcrafted Features and ML Classifiers

In [2]:
# Define paths
DATASET_PATH = "../../dataset"
CATEGORIES = ["with_mask", "without_mask"]
IMAGE_SIZE = (128, 128)
VALID_EXTENSIONS = {'.jpg', '.jpeg', '.png'}
# FEATURES_FOLDER = "testing_features"
FEATURES_FOLDER = "enhanced_features"
EPOCHS = 10
BATCH_SIZE = 32

# Create output folder if not exists
os.makedirs(FEATURES_FOLDER, exist_ok=True)

# ==========================================
# 2. FEATURE EXTRACTION (HOG)
# ==========================================

In [None]:
# def extract_hog_features(img_path):
#     """ Extract HOG features from an image """
#     try:
#         img = cv2.imread(img_path)
#         if img is None:
#             return None
#         img = cv2.resize(img, IMAGE_SIZE)
#         hog_features = [hog(channel, pixels_per_cell=(8, 8), cells_per_block=(2, 2), block_norm="L2-Hys", visualize=False)
#                         for channel in cv2.split(img)]
#         return np.hstack(hog_features)
#     except:
#         return None

def extract_features(img_path):
    """ Extract HOG, Color Histogram, and LBP features from an image """
    try:
        img = cv2.imread(img_path)
        if img is None:
            return None
        img = cv2.resize(img, IMAGE_SIZE)

        # HOG Features
        hog_features = [hog(channel, pixels_per_cell=(8, 8), cells_per_block=(2, 2), block_norm="L2-Hys", visualize=False)
                        for channel in cv2.split(img)]
        hog_features = np.hstack(hog_features)

        # Color Histogram Features (using HSV)
        hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
        hist_features = np.concatenate([cv2.calcHist([hsv], [i], None, [8], [0, 256]).flatten() for i in range(3)])

        # Local Binary Patterns (LBP)
        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        lbp = local_binary_pattern(gray, P=8, R=1, method="uniform")
        lbp_hist, _ = np.histogram(lbp.ravel(), bins=np.arange(0, 10), range=(0, 10))
        lbp_hist = lbp_hist.astype("float")
        lbp_hist /= (lbp_hist.sum() + 1e-7)  # Normalize

        # Edge Histogram
        edges = cv2.Canny(gray, 100, 200)
        edge_hist, _ = np.histogram(edges, bins=8, range=(0, 256))
        edge_hist = edge_hist.astype("float")
        edge_hist /= (edge_hist.sum() + 1e-7)  # Normalize

        # Concatenate all features
        final_features = np.hstack([hog_features, hist_features, lbp_hist, edge_hist])

        # Ensure feature size consistency
        if final_features.shape[0] > 24300:
            final_features = final_features[:24300]  # Truncate if too large
        elif final_features.shape[0] < 24300:
            final_features = np.pad(final_features, (0, 24300 - final_features.shape[0]))  # Pad if too small

        return final_features

    except:
        return None


def process_image(args):
    """ Process single image for multiprocessing """
    img_path, label = args
    features = extract_features(img_path)
    return (features, label) if features is not None else (None, None)

def load_dataset_parallel():
    """ Load dataset and extract features in parallel """
    image_paths_labels = [(os.path.join(DATASET_PATH, cat, f), label)
                            for label, cat in enumerate(CATEGORIES)
                            for f in os.listdir(os.path.join(DATASET_PATH, cat))]

    X_hog, y_hog = [], []
    with Pool(max(cpu_count() - 1, 1)) as pool:
        results = list(tqdm(pool.imap(process_image, image_paths_labels), total=len(image_paths_labels)))

    for features, label in results:
        if features is not None:
            X_hog.append(features)
            y_hog.append(label)

    return np.array(X_hog), np.array(y_hog)

# Run feature extraction only if data is missing
if not os.path.exists(os.path.join(FEATURES_FOLDER, "X.npy")):
    print("Extracting features...")
    X_hog, y_hog = load_dataset_parallel()
    np.save(os.path.join(FEATURES_FOLDER, "X.npy"), X_hog)
    np.save(os.path.join(FEATURES_FOLDER, "y.npy"), y_hog)
    print(f"Feature extraction completed. Saved {X_hog.shape}.")
else:
    print("Loading pre-extracted features...")
    X_hog = np.load(os.path.join(FEATURES_FOLDER, "X.npy"))
    y_hog = np.load(os.path.join(FEATURES_FOLDER, "y.npy"))

# Split dataset
# X_train, X_test, y_train, y_test = train_test_split(X_hog, y_hog, test_size=0.2, random_state=42)

  0%|          | 0/4094 [00:00<?, ?it/s]

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X_hog, y_hog, test_size=0.2, random_state=42)

# ==========================================
# 3. CLASSIFICATION
# ==========================================

In [None]:
# Train SVM
svm_model = SVC(kernel="linear")
svm_model.fit(X_train, y_train)
svm_pred = svm_model.predict(X_test)
svm_acc = accuracy_score(y_test, svm_pred)

print(f"SVM Accuracy: {svm_acc:.4f}")
print("\nSVM Classification Report:")
print(classification_report(y_test, svm_pred))

# Plot confusion matrix
svm_cm = confusion_matrix(y_test, svm_pred)
sns.heatmap(svm_cm, annot=True, fmt="d", cmap="Blues")
plt.title("SVM Confusion Matrix")
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.show()