In [23]:
import os
import cv2
import numpy as np
from skimage.feature import hog
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from xgboost import XGBClassifier
import torch


## Data loading

In [None]:
np.random.seed(42)

mask_dir = "dataset/with_mask"
no_mask_dir = "dataset/without_mask"

def augment_image(img):
    augmented_imgs = []
    augmented_imgs.append(img)
    augmented_imgs.append(cv2.flip(img, 1))  # horizontal flip
    augmented_imgs.append(cv2.flip(img, 0))  # vertical flip
    return augmented_imgs

def extract_hog_features(img, img_size=(128,128)):
    resized_img = cv2.resize(img, img_size)
    # Extract HOG features
    hog_features = hog(resized_img, orientations=9, pixels_per_cell=(8, 8),
                       cells_per_block=(2, 2), block_norm='L2-Hys', visualize=False)

    return hog_features

def load_data_with_augmentation(image_dir, label):
    features = []
    labels = []
    
    for img_name in os.listdir(image_dir):
        img_path = os.path.join(image_dir, img_name)
        # Read image in grayscale
        img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
        if img is None:
            continue  
        
        # Augment the image (original, horizontal flip, vertical flip)
        aug_imgs = augment_image(img)
        
        # Extract HOG features for each augmented version
        for aug_img in aug_imgs:
            hog_feat = extract_hog_features(aug_img)
            features.append(hog_feat)
            labels.append(label)
    
    return features, labels

features_mask, labels_mask = load_data_with_augmentation(mask_dir, label=1)
features_no_mask, labels_no_mask = load_data_with_augmentation(no_mask_dir, label=0)



## Data splitting

In [None]:
# Combine and shuffle the data
features = features_mask + features_no_mask
labels = labels_mask + labels_no_mask

# Shuffle the combined dataset
combined = list(zip(features, labels))
np.random.shuffle(combined)
features, labels = zip(*combined)

X = np.array(features)
y = np.array(labels)

print("Total samples:", len(y))
print("Feature vector size:", X.shape[1])

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

Total samples: 12285
Feature vector size: 8100


## XGBoost Classifier

In [15]:
# ----------------------------
# XGBoost Classifier
# ----------------------------
print("\nTraining XGBoost Classifier...")
xgb_clf = XGBClassifier(use_label_encoder=False, eval_metric='logloss', random_state=42)
xgb_clf.fit(X_train, y_train)
y_pred_xgb = xgb_clf.predict(X_test)
acc_xgb = accuracy_score(y_test, y_pred_xgb)
print("XGBoost Accuracy: {:.2f}%".format(acc_xgb*100))
print("XGBoost Classification Report:\n", classification_report(y_test, y_pred_xgb))


Training XGBoost Classifier...


Parameters: { "use_label_encoder" } are not used.



XGBoost Accuracy: 90.76%
XGBoost Classification Report:
               precision    recall  f1-score   support

           0       0.92      0.89      0.90      1158
           1       0.90      0.93      0.91      1299

    accuracy                           0.91      2457
   macro avg       0.91      0.91      0.91      2457
weighted avg       0.91      0.91      0.91      2457



In [16]:
# Compute confusion matrix for XGBoost predictions
cm_xgb = confusion_matrix(y_test, y_pred_xgb)
print("Confusion Matrix for XGBoost:\n", cm_xgb)

Confusion Matrix for XGBoost:
 [[1026  132]
 [  95 1204]]


## SVM Classifier

In [17]:
# ----------------------------
# SVM Classifier
# ----------------------------
print("\nTraining SVM Classifier...")
svm_clf = SVC(kernel='rbf', probability=True, random_state=42)
svm_clf.fit(X_train, y_train)
y_pred_svm = svm_clf.predict(X_test)
acc_svm = accuracy_score(y_test, y_pred_svm)
print("SVM Accuracy: {:.2f}%".format(acc_svm*100))
print("SVM Classification Report:\n", classification_report(y_test, y_pred_svm))


Training SVM Classifier...
SVM Accuracy: 92.51%
SVM Classification Report:
               precision    recall  f1-score   support

           0       0.93      0.91      0.92      1158
           1       0.92      0.94      0.93      1299

    accuracy                           0.93      2457
   macro avg       0.93      0.92      0.92      2457
weighted avg       0.93      0.93      0.93      2457



In [18]:
# Compute confusion matrix for SVM predictions
cm_svm = confusion_matrix(y_test, y_pred_svm)
print("Confusion Matrix for SVM:\n", cm_svm)

Confusion Matrix for SVM:
 [[1052  106]
 [  78 1221]]


## Neural Network Classifier

In [19]:
# ----------------------------
# Neural Network Classifier (MLP)
# ----------------------------
print("\nTraining Neural Network (MLP) Classifier...")
mlp_clf = MLPClassifier(hidden_layer_sizes=(100,), activation='relu', solver='adam', 
                        max_iter=200, random_state=42)
mlp_clf.fit(X_train, y_train)
y_pred_mlp = mlp_clf.predict(X_test)
acc_mlp = accuracy_score(y_test, y_pred_mlp)
print("MLP Accuracy: {:.2f}%".format(acc_mlp*100))
print("MLP Classification Report:\n", classification_report(y_test, y_pred_mlp))


Training Neural Network (MLP) Classifier...
MLP Accuracy: 89.05%
MLP Classification Report:
               precision    recall  f1-score   support

           0       0.89      0.88      0.88      1158
           1       0.89      0.90      0.90      1299

    accuracy                           0.89      2457
   macro avg       0.89      0.89      0.89      2457
weighted avg       0.89      0.89      0.89      2457



In [20]:
# Compute confusion matrix for MLP predictions
cm_mlp = confusion_matrix(y_test, y_pred_mlp)
print("Confusion Matrix for MLP:\n", cm_mlp)

Confusion Matrix for MLP:
 [[1015  143]
 [ 126 1173]]


## Performance Metrics

In [24]:
# ----------------------------
# Summary: Compare accuracies
# ----------------------------
print("\nSummary of Classification Accuracies:")
print("SVM: {:.2f}%".format(acc_svm*100))
print("MLP: {:.2f}%".format(acc_mlp*100))
print("XGBoost: {:.2f}%".format(acc_xgb*100))


Summary of Classification Accuracies:
SVM: 92.51%
MLP: 89.05%
XGBoost: 90.76%


## Saving models

In [25]:
# Save the trained models as .pth files
torch.save(svm_clf, 'svm_model.pth')
torch.save(mlp_clf, 'mlp_model.pth')
torch.save(xgb_clf, 'xgb_model.pth')