In [8]:
import cv2
import numpy as np
from skimage.feature import local_binary_pattern, graycomatrix, graycoprops
from skimage.feature import hog
from sklearn.utils import shuffle
import os
from sklearn.model_selection import train_test_split




Part a
_________________________________________________________________________________________________________________________________

Models we use: XGBoost and Neural Network.

In [9]:
input_folder = '../dataset' 
# Input folder that must contain 2 folder, one with masked face images and another non-masked
# Here we assume that name of the 2 sub-directories are 'with_mask' and 'without_mask'

For each image we create a feature vector that reflects texture, Histogram of Gradients (HoG), Scale-invaiant fetaures, etc.

In [11]:
# Resize all images to a fixed size
IMAGE_SIZE = (64, 64)  # Ensures consistent feature vector length

def extract_texture_features(image, num_bins=32):
    """Extracts grayscale histogram-based texture features."""
    image = cv2.resize(image, IMAGE_SIZE)  # Ensure fixed size
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    hist = cv2.calcHist([gray], [0], None, [num_bins], [0, 256])
    hist = cv2.normalize(hist, hist).flatten()
    return hist

def extract_hog_features(image):
    """Extracts HOG features with a fixed-length descriptor."""
    image = cv2.resize(image, IMAGE_SIZE)  # Ensure fixed size
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    
    features = hog(
        gray, 
        orientations=9, 
        pixels_per_cell=(8, 8), 
        cells_per_block=(2, 2), 
        block_norm='L2-Hys'
    )
    
    return features

def extract_shape_features(image, num_bins=32):
    """Extracts edge-based shape features using Canny and histogram."""
    image = cv2.resize(image, IMAGE_SIZE)  # Ensure fixed size
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    edges = cv2.Canny(gray, 100, 200)
    
    edge_hist = cv2.calcHist([edges], [0], None, [num_bins], [0, 256])
    edge_hist = cv2.normalize(edge_hist, edge_hist).flatten()
    return edge_hist

def extract_color_features(image):
    """Extracts color histogram features in the HSV space."""
    image = cv2.resize(image, IMAGE_SIZE)  # Ensure fixed size
    hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    
    hist = cv2.calcHist([hsv], [0, 1, 2], None, [8, 8, 8], [0, 256, 0, 256, 0, 256])
    hist = cv2.normalize(hist, hist).flatten()
    return hist

def extract_keypoint_features(image, max_features=128):
    """Extracts ORB keypoint-based features with a fixed length."""
    image = cv2.resize(image, IMAGE_SIZE)  # Ensure fixed size
    orb = cv2.ORB_create()
    kp, des = orb.detectAndCompute(image, None)

    if des is None:
        return np.zeros(max_features)  # No keypoints, return zero vector

    des = des.flatten()  # Flatten descriptor array
    return des[:max_features] if len(des) >= max_features else np.pad(des, (0, max_features - len(des)))

def extract_features(image):
    """Combines all feature extractions into a single feature vector."""
    texture_features = extract_texture_features(image)
    shape_features = extract_shape_features(image)
    color_features = extract_color_features(image)
    hog_features = extract_hog_features(image)
    keypoint_features = extract_keypoint_features(image)

    return np.hstack((texture_features, shape_features, color_features, hog_features, keypoint_features))

#image = cv2.imread('dataset/with_mask/0_0_≈˙◊¢ 2020-02-23 132115.png')
#image = cv2.resize(image, (128, 128))  
#features = extract_features(image)  # Extract handcrafted features
#print(f'{type(features)} , {np.shape(features)}')


In [12]:
# Define dataset path
DATASET_PATH = input_folder
CATEGORIES = ["with_mask", "without_mask"]  # these must be name of folders inside DATASET_PATH

# Function to load images and extract features
def load_dataset():
    X, y = [], []

    for category in CATEGORIES:
        label = 1 if category == "with_mask" else 0  # Assign label (1 = mask, 0 = no mask)
        folder_path = os.path.join(DATASET_PATH, category)

        for filename in os.listdir(folder_path):
            img_path = os.path.join(folder_path, filename)
            image = cv2.imread(img_path)

            if image is None:
                continue  # Skip unreadable images

            image = cv2.resize(image, (128, 128))  # Resize for consistency
            features = extract_features(image)  # Extract handcrafted features

            X.append(features)
            y.append(label)

    return np.array(X), np.array(y)

# Load dataset
X, y = load_dataset()

X, y = shuffle(X, y, random_state=42)  # Ensures random order but reproducibility
# Split into training & testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Print dataset info
print(f"Total samples: {len(X)} | Train: {len(X_train)} | Test: {len(X_test)}")




Total samples: 4095 | Train: 3276 | Test: 819


Now we train an ensumble model, precisely XGBoost.

In [10]:
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Split of data already done
# Convertng to DMatrix (XGBoost’s optimized data structure)
dtrain = xgb.DMatrix(X_train, label=y_train)
dtest = xgb.DMatrix(X_test, label=y_test)

params = {
    "objective": "binary:logistic",  # Binary classification
    "eval_metric": "logloss",        # Log loss as evaluation metric
    "eta": 0.1,                       # Learning rate
    "max_depth": 10,                    # Tree depth
    "subsample": 0.8,                  # Use 80% of data per tree
    "colsample_bytree": 0.8,           # Use 80% of features per tree
    "seed": 42                         # Reproducibility
}

# Training the  model
num_rounds = 200  # number of boosting rounds
bst = xgb.train(params, dtrain, num_rounds)


# predictions on test split
y_pred_prob = bst.predict(dtest)

# Converting probabilities to binary labels (0 or 1)
y_pred = [1 if prob > 0.5 else 0 for prob in y_pred_prob]


accuracy = accuracy_score(y_test, y_pred)
print(f"XGBoost Test Accuracy: {accuracy:.4f}")



XGBoost Test Accuracy: 0.9414


Now we train and test on a Neural Network.

In [17]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import StratifiedKFold, train_test_split
from sklearn.metrics import accuracy_score

# Splitting data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

def create_model():
    model = Sequential([
        Dense(256, activation='relu', input_shape=(X_train.shape[1],)),  # Input layer
        Dropout(0.3),  # Dropout for regularization
        Dense(128, activation='relu'),  # Hidden layer
        Dropout(0.3),
        Dense(64, activation='relu'),  # Hidden layer
        Dense(1, activation='sigmoid')  # Output layer (Binary Classification)
    ])
    model.compile(optimizer=Adam(learning_rate=0.0005), loss='binary_crossentropy', metrics=['accuracy'])
    return model

# k-Fold Cross Validation
k = 5
kf = StratifiedKFold(n_splits=k, shuffle=True, random_state=42)
validation_accuracies = []

for train_index, val_index in kf.split(X_train, y_train):
    X_train_fold, X_val_fold = X_train[train_index], X_train[val_index]
    y_train_fold, y_val_fold = y_train[train_index], y_train[val_index]
    
    # Create a new model for each fold
    model = create_model()
    
    # Train model
    model.fit(X_train_fold, y_train_fold, epochs=30, batch_size=32, verbose=0, validation_data=(X_val_fold, y_val_fold))
    
    # Evaluate on validation set
    val_loss, val_accuracy = model.evaluate(X_val_fold, y_val_fold, verbose=0)
    validation_accuracies.append(val_accuracy)

# Average validation accuracy across folds
avg_val_accuracy = np.mean(validation_accuracies)
print(f"Average Validation Accuracy (k={k}): {avg_val_accuracy:.4f}")

# Training final model on the full training data
final_model = create_model()
final_model.fit(X_train, y_train, epochs=30, batch_size=32, verbose=1)

# Evaluate on test set
test_loss, test_accuracy = final_model.evaluate(X_test, y_test, verbose=1)
print(f"Test Accuracy: {test_accuracy:.4f}")


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Average Validation Accuracy (k=5): 0.8971
Epoch 1/30
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.6773 - loss: 0.9061
Epoch 2/30
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8489 - loss: 0.4527
Epoch 3/30
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8788 - loss: 0.3358
Epoch 4/30
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8989 - loss: 0.2817
Epoch 5/30
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8964 - loss: 0.2864
Epoch 6/30
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9167 - loss: 0.2128
Epoch 7/30
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9194 - loss: 0.2105
Epoch 8/30
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9236 - loss: 0.183