In [17]:
import os
import cv2
import dlib
import numpy as np
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import LabelEncoder
from imblearn.under_sampling import RandomUnderSampler
from albumentations import Compose, HorizontalFlip, RandomBrightnessContrast, Rotate, Resize, ShiftScaleRotate

# Paths
train_path = "the-data/data/train_converted/"
val_path = "the-data/data/val_converted/"

# Dlib models
face_detector = dlib.get_frontal_face_detector()
face_rec_model = dlib.face_recognition_model_v1("dat/dlib_face_recognition_resnet_model_v1.dat")

# Augmentation pipeline
augmentations = Compose([
    HorizontalFlip(p=0.5),
    RandomBrightnessContrast(brightness_limit=0.3, contrast_limit=0.3, p=0.5),
    Rotate(limit=15, p=0.5),
    Resize(150, 150, p=1.0),
    ShiftScaleRotate(p=0.3),
])

# Function to apply augmentations
def augment_image(image):
    augmented = augmentations(image=image)
    return augmented['image']

# Function to extract features from face
def extract_features(image, face_rect):
    # Load the shape predictor
    shape_predictor = dlib.shape_predictor("dat/shape_predictor_68_face_landmarks.dat")
    
    # Detect facial landmarks
    shape = shape_predictor(image, face_rect)
    
    # Convert grayscale image to RGB
    if len(image.shape) == 2:  # Grayscale
        image = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB)

    # Compute the face descriptor
    face_descriptor = face_rec_model.compute_face_descriptor(image, shape)
    return np.array(face_descriptor)

# Process dataset and extract features
def process_dataset(dataset_path, augment=False):
    features = []
    labels = []
    for root, dirs, files in os.walk(dataset_path):
        label = os.path.basename(root)  # Subdirectory name as label
        for file in files:
            file_path = os.path.join(root, file)
            image = cv2.imread(file_path)
            if image is None:
                print(f"Skipping invalid file: {file_path}")
                continue
            
            # Convert to grayscale
            gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

            # Detect faces
            faces = face_detector(gray)

            for face in faces:
                # Extract features
                feature = extract_features(gray, face)
                features.append(feature)
                labels.append(label)

            # Apply augmentations if enabled
            if augment:
                augmented_image = augment_image(image)
                augmented_gray = cv2.cvtColor(augmented_image, cv2.COLOR_BGR2GRAY)
                augmented_faces = face_detector(augmented_gray)
                
                for aug_face in augmented_faces:
                    aug_feature = extract_features(augmented_gray, aug_face)
                    features.append(aug_feature)
                    labels.append(label)
    
    return np.array(features), np.array(labels)

# Load and preprocess dataset
print("Processing training data...")
X_train, y_train = process_dataset(train_path, augment=True)

print("Processing validation data...")
X_val, y_val = process_dataset(val_path, augment=False)

# Encode labels to integers by fitting on both training and validation labels
label_encoder = LabelEncoder()
all_labels = np.concatenate((y_train, y_val))  # Combine both training and validation labels
label_encoder.fit(all_labels)

y_train = label_encoder.transform(y_train)
y_val = label_encoder.transform(y_val)

# Apply undersampling on the training data to balance the classes
undersample = RandomUnderSampler(sampling_strategy='auto', random_state=42)
X_train_resampled, y_train_resampled = undersample.fit_resample(X_train, y_train)

# Train the model with undersampled data
print("Training classifier...")
clf = SVC(kernel="linear", class_weight='balanced', C=10, gamma='scale')
clf.fit(X_train_resampled, y_train_resampled)

# Evaluate the model using cross-validation
print("Evaluating model with cross-validation...")
kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
accuracies = []

for train_index, val_index in kf.split(X_train_resampled, y_train_resampled):
    X_train_fold, X_val_fold = X_train_resampled[train_index], X_train_resampled[val_index]
    y_train_fold, y_val_fold = y_train_resampled[train_index], y_train_resampled[val_index]
    
    clf.fit(X_train_fold, y_train_fold)
    y_pred = clf.predict(X_val_fold)
    accuracies.append(accuracy_score(y_val_fold, y_pred))

# Print cross-validation accuracy
cross_val_accuracy = np.mean(accuracies) * 100
print(f"Cross-Validation Accuracy: {cross_val_accuracy:.2f}%")

# Evaluate the model on the validation set
print("Evaluating model on validation data...")
y_pred = clf.predict(X_val)
validation_accuracy = accuracy_score(y_val, y_pred)
print(f"Validation Accuracy: {validation_accuracy * 100:.2f}%")


Processing training data...
Processing validation data...
Training classifier...
Evaluating model with cross-validation...


ValueError: n_splits=5 cannot be greater than the number of members in each class.

penambahan data augmentasi, fitur ekstraksi, hyperparameter tuning, k-fold cross-validation

In [14]:
# # hasil validasi : 92.59%, cross-validasi : 93.70%

# import os
# import cv2
# import dlib
# import numpy as np
# from sklearn.svm import SVC
# from sklearn.metrics import accuracy_score
# from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score
# from sklearn.preprocessing import LabelEncoder
# from albumentations import Compose, HorizontalFlip, RandomBrightnessContrast, Rotate, Resize, ShiftScaleRotate

# # Paths
# train_path = "the-data/data/train_converted/"
# val_path = "the-data/data/val_converted/"

# # Dlib models
# face_detector = dlib.get_frontal_face_detector()
# face_rec_model = dlib.face_recognition_model_v1("dat/dlib_face_recognition_resnet_model_v1.dat")
# shape_predictor = dlib.shape_predictor("dat/shape_predictor_68_face_landmarks.dat")

# # Augmentation pipeline
# augmentations = Compose([
#     HorizontalFlip(p=0.5),
#     RandomBrightnessContrast(brightness_limit=0.3, contrast_limit=0.3, p=0.5),
#     Rotate(limit=15, p=0.5),
#     Resize(150, 150, p=1.0),
#     ShiftScaleRotate(p=0.3),
# ])

# # Function to apply augmentations
# def augment_image(image):
#     augmented = augmentations(image=image)
#     return augmented['image']

# # Function to extract features from face
# def extract_features(image, face_rect):
#     # Detect facial landmarks
#     shape = shape_predictor(image, face_rect)
#     # Convert grayscale image to RGB if needed
#     if len(image.shape) == 2:  # Grayscale
#         image = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB)
#     # Compute the face descriptor
#     face_descriptor = face_rec_model.compute_face_descriptor(image, shape)
#     return np.array(face_descriptor)

# # Process dataset and extract features
# def process_dataset(dataset_path, augment=False):
#     features = []
#     labels = []

#     for root, dirs, files in os.walk(dataset_path):
#         label = os.path.basename(root)  # Subdirectory name as label
#         for file in files:
#             file_path = os.path.join(root, file)
#             image = cv2.imread(file_path)
#             if image is None:
#                 print(f"Skipping invalid file: {file_path}")
#                 continue

#             # Convert to grayscale
#             gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

#             # Detect faces
#             faces = face_detector(gray)
#             for face in faces:
#                 # Extract features
#                 feature = extract_features(gray, face)
#                 features.append(feature)
#                 labels.append(label)

#                 # Apply augmentations if enabled
#                 if augment:
#                     augmented_image = augment_image(image)
#                     augmented_gray = cv2.cvtColor(augmented_image, cv2.COLOR_BGR2GRAY)
#                     augmented_faces = face_detector(augmented_gray)
#                     for aug_face in augmented_faces:
#                         aug_feature = extract_features(augmented_gray, aug_face)
#                         features.append(aug_feature)
#                         labels.append(label)

#     return np.array(features), np.array(labels)

# # Process dataset
# print("Processing training and validation data...")
# X_train, y_train = process_dataset(train_path, augment=True)
# X_val, y_val = process_dataset(val_path, augment=False)

# # Combine labels for encoding
# all_labels = np.concatenate((y_train, y_val))

# # Encode labels
# label_encoder = LabelEncoder()
# label_encoder.fit(all_labels)
# y_train = label_encoder.transform(y_train)
# y_val = label_encoder.transform(y_val)

# # Hyperparameter tuning using GridSearchCV
# param_grid = {
#     'C': [0.1, 1, 10],
#     'kernel': ['linear', 'rbf'],
#     'gamma': ['scale', 0.1, 0.01]
# }
# clf = GridSearchCV(SVC(probability=True), param_grid, cv=5, scoring='accuracy')

# print("Training classifier with cross-validation...")
# clf.fit(X_train, y_train)

# # Best parameters and cross-validation scores
# print(f"Best Parameters: {clf.best_params_}")
# print(f"Cross-Validation Accuracy: {clf.best_score_ * 100:.2f}%")

# # Evaluate on validation set
# print("Evaluating model on validation data...")
# y_pred = clf.predict(X_val)
# accuracy = accuracy_score(y_val, y_pred)
# print(f"Validation Accuracy: {accuracy * 100:.2f}%")


Processing training and validation data...
Training classifier with cross-validation...




Best Parameters: {'C': 10, 'gamma': 'scale', 'kernel': 'linear'}
Cross-Validation Accuracy: 93.70%
Evaluating model on validation data...
Validation Accuracy: 92.59%


menambahkan blur backgrond dan membuat kotak di wajah(?)

In [11]:
# hasil : 30.86%

# import os
# import cv2
# import dlib
# import numpy as np
# from sklearn.svm import SVC
# from sklearn.metrics import accuracy_score
# from sklearn.model_selection import train_test_split
# from albumentations import Compose, HorizontalFlip, RandomBrightnessContrast, Rotate, Resize, ShiftScaleRotate, GaussNoise

# # Paths
# train_path = "the-data/data/train_converted/"
# val_path = "the-data/data/val_converted/"

# # Dlib models
# face_detector = dlib.get_frontal_face_detector()
# face_rec_model = dlib.face_recognition_model_v1("dat/dlib_face_recognition_resnet_model_v1.dat")
# shape_predictor = dlib.shape_predictor("dat/shape_predictor_68_face_landmarks.dat")

# # Augmentation pipeline
# augmentations = Compose([
#     HorizontalFlip(p=0.5),
#     RandomBrightnessContrast(brightness_limit=0.3, contrast_limit=0.3, p=0.5),
#     Rotate(limit=15, p=0.5),
#     Resize(150, 150, p=1.0),
#     ShiftScaleRotate(shift_limit=0.05, scale_limit=0.1, rotate_limit=10, p=0.3),
#     GaussNoise(var_limit=(10, 50), p=0.3)
# ])

# # Function to apply augmentations
# def augment_image(image):
#     augmented = augmentations(image=image)
#     return augmented['image']

# # Function to blur background
# def blur_background(image, face_rects):
#     mask = np.zeros_like(image)  # Create a black mask
#     for rect in face_rects:
#         x, y, w, h = rect.left(), rect.top(), rect.width(), rect.height()
#         cv2.rectangle(mask, (x, y), (x + w, y + h), (255, 255, 255), -1)  # Mask the face region

#     blurred = cv2.GaussianBlur(image, (99, 99), 30)  # Apply Gaussian blur
#     blurred_background = np.where(mask == 255, image, blurred)  # Combine blurred background with original face region
#     return blurred_background

# # Function to extract features from face
# def extract_features(image, face_rect):
#     # Detect facial landmarks
#     shape = shape_predictor(image, face_rect)
#     # Convert grayscale image to RGB
#     if len(image.shape) == 2:  # Grayscale
#         image = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB)
#     # Compute the face descriptor
#     face_descriptor = face_rec_model.compute_face_descriptor(image, shape)
#     return np.array(face_descriptor)

# # Process dataset and extract features
# def process_dataset(dataset_path, augment=False):
#     features = []
#     labels = []

#     for root, dirs, files in os.walk(dataset_path):
#         label = os.path.basename(root)  # Subdirectory name as label
#         for file in files:
#             file_path = os.path.join(root, file)
#             image = cv2.imread(file_path)
#             if image is None:
#                 print(f"Skipping invalid file: {file_path}")
#                 continue

#             # Convert to grayscale
#             gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

#             # Detect faces
#             faces = face_detector(gray)
#             if faces:
#                 # Blur background
#                 blurred_image = blur_background(image, faces)
                
#                 for face in faces:
#                     # Extract features
#                     feature = extract_features(gray, face)
#                     features.append(feature)
#                     labels.append(label)

#                     # Apply augmentations if enabled
#                     if augment:
#                         augmented_image = augment_image(image=blurred_image)
#                         augmented_gray = cv2.cvtColor(augmented_image, cv2.COLOR_BGR2GRAY)
#                         augmented_faces = face_detector(augmented_gray)
#                         for aug_face in augmented_faces:
#                             aug_feature = extract_features(augmented_gray, aug_face)
#                             features.append(aug_feature)
#                             labels.append(label)

#     return np.array(features), np.array(labels)

# # Train the model
# print("Processing training data...")
# X_train, y_train = process_dataset(train_path, augment=True)

# print("Processing validation data...")
# X_val, y_val = process_dataset(val_path, augment=False)

# print("Training classifier...")
# clf = SVC(kernel="linear", probability=True)
# clf.fit(X_train, y_train)

# # Evaluate the model
# print("Evaluating model...")
# y_pred = clf.predict(X_val)
# accuracy = accuracy_score(y_val, y_pred)
# print(f"Validation Accuracy: {accuracy * 100:.2f}%")


Processing training data...
Processing validation data...
Training classifier...
Evaluating model...
Validation Accuracy: 30.86%


penambahan augmentasi shiftScaleRotate

In [9]:
# # hasil adalah 33.95%

# import os
# import cv2
# import dlib
# import numpy as np
# from sklearn.svm import SVC
# from sklearn.metrics import accuracy_score
# from sklearn.model_selection import train_test_split
# from albumentations import Compose, HorizontalFlip, RandomBrightnessContrast, Rotate, Resize, ShiftScaleRotate
# from albumentations import GaussNoise

# # Paths
# train_path = "the-data/data/train_converted/"
# val_path = "the-data/data/val_converted/"

# # Dlib models
# face_detector = dlib.get_frontal_face_detector()
# face_rec_model = dlib.face_recognition_model_v1("dat/dlib_face_recognition_resnet_model_v1.dat")


# # Augmentation pipeline
# augmentations = Compose([
#     HorizontalFlip(p=0.5),
#     RandomBrightnessContrast(brightness_limit=0.3, contrast_limit=0.3, p=0.5),
#     Rotate(limit=15, p=0.5),
#     Resize(150, 150, p=1.0),
#     ShiftScaleRotate(p=0.3),  
# ])


# # Function to apply augmentations
# def augment_image(image):
#     augmented = augmentations(image=image)
#     return augmented['image']

# # Function to extract features from face
# def extract_features(image, face_rect):
#     # Load the shape predictor
#     shape_predictor = dlib.shape_predictor("dat/shape_predictor_68_face_landmarks.dat")
#     # Detect facial landmarks
#     shape = shape_predictor(image, face_rect)
#     # Convert grayscale image to RGB
#     if len(image.shape) == 2:  # Grayscale
#         image = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB)
#     # Compute the face descriptor
#     face_descriptor = face_rec_model.compute_face_descriptor(image, shape)
#     return np.array(face_descriptor)


# # Process dataset and extract features
# def process_dataset(dataset_path, augment=False):
#     features = []
#     labels = []

#     for root, dirs, files in os.walk(dataset_path):
#         label = os.path.basename(root)  # Subdirectory name as label
#         for file in files:
#             file_path = os.path.join(root, file)
#             image = cv2.imread(file_path)
#             if image is None:
#                 print(f"Skipping invalid file: {file_path}")
#                 continue

#             # Convert to grayscale
#             gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

#             # Detect faces
#             faces = face_detector(gray)
#             for face in faces:
#                 # Extract features
#                 feature = extract_features(gray, face)
#                 features.append(feature)
#                 labels.append(label)

#                 # Apply augmentations if enabled
#                 if augment:
#                     augmented_image = augment_image(image)
#                     augmented_gray = cv2.cvtColor(augmented_image, cv2.COLOR_BGR2GRAY)
#                     augmented_faces = face_detector(augmented_gray)
#                     for aug_face in augmented_faces:
#                         aug_feature = extract_features(augmented_gray, aug_face)
#                         features.append(aug_feature)
#                         labels.append(label)

#     return np.array(features), np.array(labels)

# # Train the model
# print("Processing training data...")
# X_train, y_train = process_dataset(train_path, augment=True)

# print("Processing validation data...")
# X_val, y_val = process_dataset(val_path, augment=False)

# print("Training classifier...")
# clf = SVC(kernel="linear", probability=True)
# clf.fit(X_train, y_train)

# # Evaluate the model
# print("Evaluating model...")
# y_pred = clf.predict(X_val)
# accuracy = accuracy_score(y_val, y_pred)
# print(f"Validation Accuracy: {accuracy * 100:.2f}%")


In [7]:
# hasil adalah 33.33%

# import os
# import cv2
# import dlib
# import numpy as np
# from sklearn.svm import SVC
# from sklearn.metrics import accuracy_score
# from sklearn.model_selection import train_test_split
# from albumentations import Compose, HorizontalFlip, RandomBrightnessContrast, Rotate, Resize
# from albumentations import GaussNoise

# # Paths
# train_path = "the-data/data/train_converted/"
# val_path = "the-data/data/val_converted/"

# # Dlib models
# face_detector = dlib.get_frontal_face_detector()
# face_rec_model = dlib.face_recognition_model_v1("dat/dlib_face_recognition_resnet_model_v1.dat")


# # Augmentation pipeline
# augmentations = Compose([
#     HorizontalFlip(p=0.5),
#     RandomBrightnessContrast(brightness_limit=0.3, contrast_limit=0.3, p=0.5),
#     Rotate(limit=15, p=0.5),
#     Resize(150, 150, p=1.0)
# ])


# # Function to apply augmentations
# def augment_image(image):
#     augmented = augmentations(image=image)
#     return augmented['image']

# # Function to extract features from face
# def extract_features(image, face_rect):
#     # Load the shape predictor
#     shape_predictor = dlib.shape_predictor("dat/shape_predictor_68_face_landmarks.dat")
#     # Detect facial landmarks
#     shape = shape_predictor(image, face_rect)
#     # Convert grayscale image to RGB
#     if len(image.shape) == 2:  # Grayscale
#         image = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB)
#     # Compute the face descriptor
#     face_descriptor = face_rec_model.compute_face_descriptor(image, shape)
#     return np.array(face_descriptor)


# # Process dataset and extract features
# def process_dataset(dataset_path, augment=False):
#     features = []
#     labels = []

#     for root, dirs, files in os.walk(dataset_path):
#         label = os.path.basename(root)  # Subdirectory name as label
#         for file in files:
#             file_path = os.path.join(root, file)
#             image = cv2.imread(file_path)
#             if image is None:
#                 print(f"Skipping invalid file: {file_path}")
#                 continue

#             # Convert to grayscale
#             gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

#             # Detect faces
#             faces = face_detector(gray)
#             for face in faces:
#                 # Extract features
#                 feature = extract_features(gray, face)
#                 features.append(feature)
#                 labels.append(label)

#                 # Apply augmentations if enabled
#                 if augment:
#                     augmented_image = augment_image(image)
#                     augmented_gray = cv2.cvtColor(augmented_image, cv2.COLOR_BGR2GRAY)
#                     augmented_faces = face_detector(augmented_gray)
#                     for aug_face in augmented_faces:
#                         aug_feature = extract_features(augmented_gray, aug_face)
#                         features.append(aug_feature)
#                         labels.append(label)

#     return np.array(features), np.array(labels)

# # Train the model
# print("Processing training data...")
# X_train, y_train = process_dataset(train_path, augment=True)

# print("Processing validation data...")
# X_val, y_val = process_dataset(val_path, augment=False)

# print("Training classifier...")
# clf = SVC(kernel="linear", probability=True)
# clf.fit(X_train, y_train)

# # Evaluate the model
# print("Evaluating model...")
# y_pred = clf.predict(X_val)
# accuracy = accuracy_score(y_val, y_pred)
# print(f"Validation Accuracy: {accuracy * 100:.2f}%")
