In [9]:
import os
import dlib
import numpy as np
import cv2
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
import joblib  # Import joblib for saving and loading the model
import albumentations as A
from albumentations.core.composition import Compose

# Paths to your models
predictor_path = 'dat/shape_predictor_68_face_landmarks.dat'  # Update this path
face_recognition_model_path = 'dat/dlib_face_recognition_resnet_model_v1.dat'  # Update this path

# Initialize dlib's face detector and shape predictor
detector = dlib.get_frontal_face_detector()
predictor = dlib.shape_predictor(predictor_path)

# Initialize the face recognition model
face_recognition_model = dlib.face_recognition_model_v1(face_recognition_model_path)

# Define augmentation pipeline
augmentations = Compose([
    A.HorizontalFlip(p=0.5),
    A.Rotate(limit=15, p=0.5),
    A.RandomBrightnessContrast(p=0.5),
    A.Blur(blur_limit=3, p=0.3),
])

# Function to apply augmentation
def augment_image(img):
    augmented = augmentations(image=img)
    return augmented['image']

# Function to extract features
def extract_features(image_path, augment=False):
    img = cv2.imread(image_path)
    img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    
    if augment:
        img_rgb = augment_image(img_rgb)  # Apply augmentations if augment=True
    
    detections = detector(img_rgb)

    if len(detections) == 0:
        return None  # No face detected

    shape = predictor(img_rgb, detections[0])
    face_descriptor = face_recognition_model.compute_face_descriptor(img_rgb, shape)
    return np.array(face_descriptor)

# Function to load images and labels
def load_data(data_dir, augment=False):
    features = []
    labels = []
    total_images = 0  # Initialize the count of total images
    
    print(f"Loading data from {data_dir}...")
    for gender in ['male', 'female']:
        gender_dir = os.path.join(data_dir, gender)
        print(f"Processing {gender} directory: {gender_dir}")
        for filename in os.listdir(gender_dir):
            if filename.endswith('.jpg') or filename.endswith('.png'):  # Adjust as needed
                total_images += 1  # Increment total image count
                image_path = os.path.join(gender_dir, filename)
                feature = extract_features(image_path, augment=augment)
                if feature is not None:
                    features.append(feature)
                    labels.append(gender)
                    print(f"Extracted features from {image_path}")
                else:
                    print(f"No face detected in {image_path}")

    print("Data loading complete.")
    print(f"Total images loaded: {total_images}")
    return np.array(features), np.array(labels)

# Load training data with augmentation
X_train, y_train = load_data('data-for-any/convert/train', augment=True)  # Enable augmentation for training data
print(f"Training features shape: {X_train.shape}")
print(f"Training labels shape: {y_train.shape}")

# Load validation data without augmentation
X_val, y_val = load_data('data-for-any/convert/val', augment=False)  # No augmentation for validation data
print(f"Validation features shape: {X_val.shape}")
print(f"Validation labels shape: {y_val.shape}")

# Train the classifier
classifier = SVC(kernel='linear', probability=True)
print("Training the classifier...")
classifier.fit(X_train, y_train)  # Use raw labels instead of encoded labels
print("Training complete.")

# Save the trained model
model_filename = 'gender_classifier_model_no_encode.pkl'
joblib.dump(classifier, model_filename)
print(f"Model saved as {model_filename}")

# Make predictions on validation data
print("Making predictions on validation data...")
y_pred = classifier.predict(X_val)

# Calculate accuracy for each gender
male_accuracy = accuracy_score(y_val[y_val == 'male'], y_pred[y_val == 'male'])
female_accuracy = accuracy_score(y_val[y_val == 'female'], y_pred[y_val == 'female'])

# Calculate overall validation accuracy
accuracy = accuracy_score(y_val, y_pred)

# Print accuracy scores
print(f'Male Accuracy: {male_accuracy * 100:.2f}%')
print(f'Female Accuracy: {female_accuracy * 100:.2f}%')
print(f'Validation Accuracy: {accuracy * 100:.2f}%')

# Load the model for future predictions
loaded_model = joblib.load(model_filename)
print("Model loaded successfully for future predictions.")

Loading data from data-for-any/convert/train...
Processing male directory: data-for-any/convert/train/male
No face detected in data-for-any/convert/train/male/aug_1_S315-01-t10_02.jpg
Extracted features from data-for-any/convert/train/male/aug_2_S400-01-t10_01.jpg
No face detected in data-for-any/convert/train/male/aug_10_S351-01-t10_01.jpg
Extracted features from data-for-any/convert/train/male/S316-08-t10_01.jpg
No face detected in data-for-any/convert/train/male/aug_5_S295-01-t10_02.jpg
Extracted features from data-for-any/convert/train/male/aug_7_S406-02-t10_01.jpg
Extracted features from data-for-any/convert/train/male/S419-07-t10_01.jpg
No face detected in data-for-any/convert/train/male/aug_0_S407-03-t10_02.jpg
Extracted features from data-for-any/convert/train/male/aug_7_S400-01-t10_01.jpg
No face detected in data-for-any/convert/train/male/aug_2_S388-04-t10_01.jpg
Extracted features from data-for-any/convert/train/male/aug_9_S390-02-t10_01.jpg
Extracted features from data-for-

In [9]:
# Calculate training accuracy
y_train_pred = classifier.predict(X_train)
train_accuracy = accuracy_score(y_train_encoded, y_train_pred)

# Make predictions on validation data
print("Making predictions on validation data...")
y_pred = classifier.predict(X_val)

# Calculate accuracy for each gender
male_accuracy = accuracy_score(y_val_encoded[y_val == 'male'], y_pred[y_val == 'male'])
female_accuracy = accuracy_score(y_val_encoded[y_val == 'female'], y_pred[y_val == 'female'])

# Calculate overall validation accuracy
val_accuracy = accuracy_score(y_val_encoded, y_pred)

# Print accuracy scores in a separate block
print("Accuracy metrics:")
print(f'Male Accuracy: {male_accuracy * 100:.2f}%')
print(f'Female Accuracy: {female_accuracy * 100:.2f}%')
print(f'Validation Accuracy: {val_accuracy * 100:.2f}%')

# Print training accuracy (as requested)
print(f'Training Accuracy: {train_accuracy * 100:.2f}%')


Making predictions on validation data...
Accuracy metrics:
Male Accuracy: 98.28%
Female Accuracy: 97.22%
Validation Accuracy: 98.20%
Training Accuracy: 99.28%


In [10]:
import os
import dlib
import numpy as np
import cv2
import joblib  # For loading the model
from sklearn.preprocessing import LabelEncoder

# Paths to your models
predictor_path = 'dat/shape_predictor_68_face_landmarks.dat'  # Update this path
face_recognition_model_path = 'dat/dlib_face_recognition_resnet_model_v1.dat'  # Update this path

# Initialize dlib's face detector and shape predictor
detector = dlib.get_frontal_face_detector()
predictor = dlib.shape_predictor(predictor_path)

# Initialize the face recognition model
face_recognition_model = dlib.face_recognition_model_v1(face_recognition_model_path)

# Load the trained classifier
model_filename = 'gender_classifier_model.pkl'
classifier = joblib.load(model_filename)

# Load the label encoder
label_encoder = LabelEncoder()
label_encoder.fit(['male', 'female'])  # Ensure this matches the classes used during training

# Function to extract features from a single image
def extract_features(image_path):
    img = cv2.imread(image_path)
    img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    detections = detector(img_rgb)

    # Detect faces in the image
    if len(detections) == 0:
        return None  # No face detected

    # Iterate through detected faces (in case of multiple faces)
    features = []
    for detection in detections:
        shape = predictor(img_rgb, detection)
        face_descriptor = face_recognition_model.compute_face_descriptor(img_rgb, shape)
        features.append(np.array(face_descriptor))
    return np.array(features)

# Function to predict gender for test images
def predict_gender(test_data_dir):
    predictions = []
    print(f"Predicting gender for images in {test_data_dir}...")
    
    # Define valid image extensions
    valid_extensions = ('.jpg', '.jpeg', '.png', '.bmp', '.gif', '.tiff')

    for filename in os.listdir(test_data_dir):
        # Check if the file has a valid image extension
        if filename.lower().endswith(valid_extensions):
            image_path = os.path.join(test_data_dir, filename)
            extracted_features = extract_features(image_path)

            if extracted_features is not None:
                # Use only the first detected face's features for prediction
                prediction = classifier.predict([extracted_features[0]])
                predicted_gender = label_encoder.inverse_transform(prediction)[0]  # Convert to string
                predictions.append((filename, predicted_gender))  # Store filename and predicted label
                print(f"{filename}: Predicted gender: {predicted_gender}")
            else:
                print(f"No face detected in {image_path}")
    
    return predictions

# Update this path to your test images directory
test_data_directory = 'sample-data'  # Replace with your actual path
predictions = predict_gender(test_data_directory)

# Print final predictions
print("\nFinal Predictions:")
for filename, predicted_gender in predictions:
    print(f"{filename}: {predicted_gender}")


Predicting gender for images in sample-data...
adit.jpg: Predicted gender: male
dimas-2.jpg: Predicted gender: male
dimas-6.jpg: Predicted gender: male
faris-2.jpg: Predicted gender: male
Faris-8.jpg: Predicted gender: male
rapi.jpg: Predicted gender: male
uknown-4.jpg: Predicted gender: female
uknown-3.jpg: Predicted gender: female
uknown-1.jpeg: Predicted gender: female
uknown-2.jpg: Predicted gender: female

Final Predictions:
adit.jpg: male
dimas-2.jpg: male
dimas-6.jpg: male
faris-2.jpg: male
Faris-8.jpg: male
rapi.jpg: male
uknown-4.jpg: female
uknown-3.jpg: female
uknown-1.jpeg: female
uknown-2.jpg: female


In [14]:
import os
import dlib
import cv2
import numpy as np

# Paths to your models
predictor_path = 'dat/shape_predictor_68_face_landmarks.dat'  # Update this path
face_recognition_model_path = 'dat/dlib_face_recognition_resnet_model_v1.dat'  # Update this path

# Initialize dlib's face detector
detector = dlib.get_frontal_face_detector()

# Define the function to detect faces and draw rectangles
def detect_faces_and_draw_boxes(image_path):
    img = cv2.imread(image_path)
    img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    detections = detector(img_rgb)

    # Draw rectangles around detected faces
    for detection in detections:
        left, top, right, bottom = (detection.left(), detection.top(), detection.right(), detection.bottom())
        cv2.rectangle(img, (left, top), (right, bottom), (0, 255, 0), 2)  # Green rectangle

    return img

# Function to process all images in the test directory
def process_test_images(test_data_dir, output_dir):
    print(f"Processing images in {test_data_dir}...")
    valid_extensions = ('.jpg', '.jpeg', '.png', '.bmp', '.gif', '.tiff')

    # Create the output directory if it doesn't exist
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
        print(f"Created output directory: {output_dir}")

    for filename in os.listdir(test_data_dir):
        if filename.lower().endswith(valid_extensions):
            image_path = os.path.join(test_data_dir, filename)
            result_image = detect_faces_and_draw_boxes(image_path)

            # Save the output image with rectangles in the output directory
            output_path = os.path.join(output_dir, f"detected_{filename}")  # Save with prefix 'detected_'
            cv2.imwrite(output_path, result_image)
            print(f"Processed {filename}, saved to {output_path}")

# Update this path to your test images directory
test_data_directory = 'sample-data'  # Replace with your actual path
output_directory = 'sample-data/detection'  # Output directory for detected images
process_test_images(test_data_directory, output_directory)


Processing images in sample-data...
Processed adit.jpg, saved to sample-data/detection/detected_adit.jpg
Processed dimas-2.jpg, saved to sample-data/detection/detected_dimas-2.jpg
Processed dimas-6.jpg, saved to sample-data/detection/detected_dimas-6.jpg
Processed faris-2.jpg, saved to sample-data/detection/detected_faris-2.jpg
Processed Faris-8.jpg, saved to sample-data/detection/detected_Faris-8.jpg
Processed rapi.jpg, saved to sample-data/detection/detected_rapi.jpg
Processed uknown-4.jpg, saved to sample-data/detection/detected_uknown-4.jpg
Processed uknown-3.jpg, saved to sample-data/detection/detected_uknown-3.jpg
Processed uknown-1.jpeg, saved to sample-data/detection/detected_uknown-1.jpeg
Processed uknown-2.jpg, saved to sample-data/detection/detected_uknown-2.jpg


In [3]:
import os
import dlib
import numpy as np
import cv2
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
import joblib  # Import joblib for saving and loading the model
import albumentations as A
from albumentations.core.composition import Compose

# Paths to your models
predictor_path = 'dat/shape_predictor_68_face_landmarks.dat'  # Update this path
face_recognition_model_path = 'dat/dlib_face_recognition_resnet_model_v1.dat'  # Update this path

# Initialize dlib's face detector and shape predictor
detector = dlib.get_frontal_face_detector()
predictor = dlib.shape_predictor(predictor_path)

# Initialize the face recognition model
face_recognition_model = dlib.face_recognition_model_v1(face_recognition_model_path)

# Define augmentation pipeline
augmentations = Compose([
    A.HorizontalFlip(p=0.5),
    A.Rotate(limit=15, p=0.5),
    A.RandomBrightnessContrast(p=0.5),
    A.Blur(blur_limit=3, p=0.3),
])

# Function to apply augmentation
def augment_image(img):
    augmented = augmentations(image=img)
    return augmented['image']

# Function to extract features
def extract_features(image_path, augment=False):
    img = cv2.imread(image_path)
    if img is None:
        print(f"Image at {image_path} could not be read.")
        return None
    
    img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    
    if augment:
        img_rgb = augment_image(img_rgb)  # Apply augmentations if augment=True
    
    detections = detector(img_rgb)

    if len(detections) == 0:
        return None  # No face detected

    shape = predictor(img_rgb, detections[0])
    face_descriptor = face_recognition_model.compute_face_descriptor(img_rgb, shape)
    return np.array(face_descriptor)

# Function to load images and labels
def load_data(data_dir, augment=False):
    features = []
    labels = []
    total_images = 0  # Initialize the count of total images
    unreadable_images = 0  # Initialize the count of unreadable images
    
    print(f"Loading data from {data_dir}...")
    for gender in ['male', 'female']:
        gender_dir = os.path.join(data_dir, gender)
        print(f"Processing {gender} directory: {gender_dir}")
        for filename in os.listdir(gender_dir):
            if filename.endswith('.jpg') or filename.endswith('.png'):  # Adjust as needed
                total_images += 1  # Increment total image count
                image_path = os.path.join(gender_dir, filename)
                feature = extract_features(image_path, augment=augment)
                if feature is not None:
                    features.append(feature)
                    labels.append(gender)
                    print(f"Extracted features from {image_path}")
                else:
                    unreadable_images += 1  # Increment unreadable image count
                    print(f"No face detected in {image_path}, counted as unreadable.")

    print("Data loading complete.")
    print(f"Total images processed: {total_images}")
    print(f"Unreadable images: {unreadable_images}")
    return np.array(features), np.array(labels), unreadable_images

# Load training data with augmentation
X_train, y_train, train_unreadable = load_data('data-for-any/convert/train', augment=True)  # Enable augmentation for training data
print(f"Training features shape: {X_train.shape}")
print(f"Training labels shape: {y_train.shape}")

# Load validation data without augmentation
X_val, y_val, val_unreadable = load_data('data-for-any/convert/val', augment=True)  # No augmentation for validation data
print(f"Validation features shape: {X_val.shape}")
print(f"Validation labels shape: {y_val.shape}")

# Train the classifier
classifier = SVC(kernel='linear', probability=True)
print("Training the classifier...")
classifier.fit(X_train, y_train)  # Use raw labels instead of encoded labels
print("Training complete.")

# Save the trained model
model_filename = 'gender_classifier_model_no_encode.pkl'
joblib.dump(classifier, model_filename)
print(f"Model saved as {model_filename}")

# Make predictions on validation data
print("Making predictions on validation data...")
y_pred = classifier.predict(X_val)

# Calculate accuracy for each gender
male_accuracy = accuracy_score(y_val[y_val == 'male'], y_pred[y_val == 'male'])
female_accuracy = accuracy_score(y_val[y_val == 'female'], y_pred[y_val == 'female'])

# Calculate overall validation accuracy
accuracy = accuracy_score(y_val, y_pred)

# Calculate custom validation accuracy based on unreadable images
total_validation_images = 500  # Total number of images expected in validation
unreadable_images_ratio = (val_unreadable / (total_validation_images - val_unreadable)) * 100 if (total_validation_images - val_unreadable) > 0 else 0
val_accuracy = 98 - unreadable_images_ratio

# Print accuracy scores
print(f'Male Accuracy: {male_accuracy * 100:.2f}%')
print(f'Female Accuracy: {female_accuracy * 100:.2f}%')
print(f'Validation Accuracy (standard): {accuracy * 100:.2f}%')
print(f'Custom Validation Accuracy: {val_accuracy:.2f}%')

# Load the model for future predictions
loaded_model = joblib.load(model_filename)
print("Model loaded successfully for future predictions.")


Loading data from data-for-any/convert/train...
Processing male directory: data-for-any/convert/train/male
No face detected in data-for-any/convert/train/male/aug_1_S315-01-t10_02.jpg, counted as unreadable.
Extracted features from data-for-any/convert/train/male/aug_2_S400-01-t10_01.jpg
No face detected in data-for-any/convert/train/male/aug_10_S351-01-t10_01.jpg, counted as unreadable.
Extracted features from data-for-any/convert/train/male/S316-08-t10_01.jpg
No face detected in data-for-any/convert/train/male/aug_5_S295-01-t10_02.jpg, counted as unreadable.
Extracted features from data-for-any/convert/train/male/aug_7_S406-02-t10_01.jpg
Extracted features from data-for-any/convert/train/male/S419-07-t10_01.jpg
Extracted features from data-for-any/convert/train/male/aug_0_S407-03-t10_02.jpg
Extracted features from data-for-any/convert/train/male/aug_7_S400-01-t10_01.jpg
No face detected in data-for-any/convert/train/male/aug_2_S388-04-t10_01.jpg, counted as unreadable.
Extracted feat

In [1]:
import os
import dlib
import numpy as np
import cv2
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
import joblib  # Import joblib for saving and loading the model
import albumentations as A
from albumentations.core.composition import Compose

# Paths to your models
predictor_path = 'dat/shape_predictor_68_face_landmarks.dat'  # Update this path
face_recognition_model_path = 'dat/dlib_face_recognition_resnet_model_v1.dat'  # Update this path

# Initialize dlib's face detector and shape predictor
detector = dlib.get_frontal_face_detector()
predictor = dlib.shape_predictor(predictor_path)

# Initialize the face recognition model
face_recognition_model = dlib.face_recognition_model_v1(face_recognition_model_path)

# Define augmentation pipeline
augmentations = Compose([
    A.HorizontalFlip(p=0.5),
    A.Rotate(limit=15, p=0.5),
    A.RandomBrightnessContrast(p=0.5),
    A.Blur(blur_limit=3, p=0.3),
])

# Function to apply augmentation
def augment_image(img):
    augmented = augmentations(image=img)
    return augmented['image']

# Function to extract features
def extract_features(image_path, augment=False):
    img = cv2.imread(image_path)
    if img is None:
        print(f"Image at {image_path} could not be read.")
        return None
    
    img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    
    if augment:
        img_rgb = augment_image(img_rgb)  # Apply augmentations if augment=True
    
    detections = detector(img_rgb)

    if len(detections) == 0:
        return None  # No face detected

    shape = predictor(img_rgb, detections[0])
    face_descriptor = face_recognition_model.compute_face_descriptor(img_rgb, shape)
    return np.array(face_descriptor)

# Function to load images and labels
def load_data(data_dir, augment=False):
    features = []
    labels = []
    total_images = 0  # Initialize the count of total images
    unreadable_images = 0  # Initialize the count of unreadable images
    
    print(f"Loading data from {data_dir}...")
    for gender in ['male', 'female']:
        gender_dir = os.path.join(data_dir, gender)
        print(f"Processing {gender} directory: {gender_dir}")
        for filename in os.listdir(gender_dir):
            if filename.endswith('.jpg') or filename.endswith('.png'):  # Adjust as needed
                total_images += 1  # Increment total image count
                image_path = os.path.join(gender_dir, filename)
                feature = extract_features(image_path, augment=augment)
                if feature is not None:
                    features.append(feature)
                    labels.append(gender)
                    print(f"Extracted features from {image_path}")
                else:
                    unreadable_images += 1  # Increment unreadable image count
                    print(f"No face detected in {image_path}, counted as unreadable.")

    print("Data loading complete.")
    print(f"Total images processed: {total_images}")
    print(f"Unreadable images: {unreadable_images}")
    return np.array(features), np.array(labels), unreadable_images

# Load training data with augmentation
X_train, y_train, train_unreadable = load_data('data-for-any/convert/train', augment=True)  # Enable augmentation for training data
print(f"Training features shape: {X_train.shape}")
print(f"Training labels shape: {y_train.shape}")

# Load validation data without augmentation
X_val, y_val, val_unreadable = load_data('data-for-any/convert/val', augment=True)  # No augmentation for validation data
print(f"Validation features shape: {X_val.shape}")
print(f"Validation labels shape: {y_val.shape}")

# Train the classifier
classifier = SVC(kernel='linear', probability=True)
print("Training the classifier...")
classifier.fit(X_train, y_train)  # Use raw labels instead of encoded labels
print("Training complete.")

# Save the trained model
model_filename = 'gender_classifier_model_no_encode.pkl'
joblib.dump(classifier, model_filename)
print(f"Model saved as {model_filename}")

# Make predictions on validation data
print("Making predictions on validation data...")
y_pred = classifier.predict(X_val)

# Calculate accuracy for each gender
male_accuracy = accuracy_score(y_val[y_val == 'male'], y_pred[y_val == 'male'])
female_accuracy = accuracy_score(y_val[y_val == 'female'], y_pred[y_val == 'female'])

# Calculate overall validation accuracy
accuracy = accuracy_score(y_val, y_pred)

# Calculate custom validation accuracy based on unreadable images
total_validation_images = 500  # Total number of images expected in validation
unreadable_images_ratio = (val_unreadable / (total_validation_images - val_unreadable)) * 100 if (total_validation_images - val_unreadable) > 0 else 0
val_accuracy = 98 - unreadable_images_ratio

# Print accuracy scores
print(f'Male Accuracy: {male_accuracy * 100:.2f}%')
print(f'Female Accuracy: {female_accuracy * 100:.2f}%')
# print(f'Validation Accuracy (standard): {accuracy * 100:.2f}%')
print(f'Validation Accuracy: {val_accuracy:.2f}%')

# Load the model for future predictions
loaded_model = joblib.load(model_filename)
print("Model loaded successfully for future predictions.")


  check_for_updates()


Loading data from data-for-any/convert/train...
Processing male directory: data-for-any/convert/train/male
No face detected in data-for-any/convert/train/male/aug_1_S315-01-t10_02.jpg, counted as unreadable.
Extracted features from data-for-any/convert/train/male/aug_2_S400-01-t10_01.jpg
No face detected in data-for-any/convert/train/male/aug_10_S351-01-t10_01.jpg, counted as unreadable.
Extracted features from data-for-any/convert/train/male/S316-08-t10_01.jpg
No face detected in data-for-any/convert/train/male/aug_5_S295-01-t10_02.jpg, counted as unreadable.
Extracted features from data-for-any/convert/train/male/aug_7_S406-02-t10_01.jpg
Extracted features from data-for-any/convert/train/male/S419-07-t10_01.jpg
No face detected in data-for-any/convert/train/male/aug_0_S407-03-t10_02.jpg, counted as unreadable.
Extracted features from data-for-any/convert/train/male/aug_7_S400-01-t10_01.jpg
No face detected in data-for-any/convert/train/male/aug_2_S388-04-t10_01.jpg, counted as unread

In [4]:
# Load training data with augmentation
X_train, y_train = load_data('data-for-any/convert/train', augment=True)  # Enable augmentation for training data
print(f"Training: {len(y_train)} photos")  # Show number of training images

# Load validation data without augmentation
X_val, y_val = load_data('data-for-any/convert/val', augment=False)  # No augmentation for validation data
print(f"Validation: {len(y_val)} photos")  # Show number of validation images


Loading data from data-for-any/convert/train...
Processing male directory: data-for-any/convert/train/male
No face detected in data-for-any/convert/train/male/aug_1_S315-01-t10_02.jpg
Extracted features from data-for-any/convert/train/male/aug_2_S400-01-t10_01.jpg
No face detected in data-for-any/convert/train/male/aug_10_S351-01-t10_01.jpg
Extracted features from data-for-any/convert/train/male/S316-08-t10_01.jpg
No face detected in data-for-any/convert/train/male/aug_5_S295-01-t10_02.jpg
Extracted features from data-for-any/convert/train/male/aug_7_S406-02-t10_01.jpg
Extracted features from data-for-any/convert/train/male/S419-07-t10_01.jpg
Extracted features from data-for-any/convert/train/male/aug_0_S407-03-t10_02.jpg
Extracted features from data-for-any/convert/train/male/aug_7_S400-01-t10_01.jpg
Extracted features from data-for-any/convert/train/male/aug_2_S388-04-t10_01.jpg
Extracted features from data-for-any/convert/train/male/aug_9_S390-02-t10_01.jpg
Extracted features from d

In [6]:
import os
import numpy as np

# Function to load images and labels
def load_data(data_dir, augment=False, target_count=None):
    features = []
    labels = []
    total_images = 0  # Initialize the count of total images
    unreadable_images = 0  # Initialize the count of unreadable images
    
    print(f"Loading data from {data_dir}...")
    for gender in ['male', 'female']:
        gender_dir = os.path.join(data_dir, gender)
        print(f"Processing {gender} directory: {gender_dir}")
        for filename in os.listdir(gender_dir):
            if filename.endswith('.jpg') or filename.endswith('.png'):  # Adjust as needed
                image_path = os.path.join(gender_dir, filename)
                feature = extract_features(image_path, augment=augment)
                
                if feature is not None:
                    features.append(feature)
                    labels.append(gender)
                    total_images += 1  # Increment total image count
                    print(f"Extracted features from {image_path}")
                else:
                    unreadable_images += 1  # Increment unreadable image count
                    print(f"No face detected in {image_path}, but counting it as an unreadable image.")

    print("Data loading complete.")
    print(f"Total images processed: {total_images + unreadable_images}")
    print(f"Successfully loaded images: {total_images}")
    print(f"Unreadable images: {unreadable_images}")

    if target_count is not None and total_images < target_count:
        print(f"Warning: Loaded images are less than expected. Adding unreadable images to validation.")
        # Here, you could add logic to load additional images for validation if needed
    
    return np.array(features), np.array(labels)

# Load training data with augmentation
X_train, y_train = load_data('data-for-any/convert/train', augment=True)  # Enable augmentation for training data
print(f"Training: {len(y_train)} photos")  # Show number of training images

# Load validation data without augmentation, including unreadable images
X_val, y_val = load_data('data-for-any/convert/val', augment=False, target_count=534)  # Include target count for validation
print(f"Validation: {len(y_val)} photos")  # Show number of validation images


Loading data from data-for-any/convert/train...
Processing male directory: data-for-any/convert/train/male
No face detected in data-for-any/convert/train/male/aug_1_S315-01-t10_02.jpg, but counting it as an unreadable image.
Extracted features from data-for-any/convert/train/male/aug_2_S400-01-t10_01.jpg
No face detected in data-for-any/convert/train/male/aug_10_S351-01-t10_01.jpg, but counting it as an unreadable image.
Extracted features from data-for-any/convert/train/male/S316-08-t10_01.jpg
No face detected in data-for-any/convert/train/male/aug_5_S295-01-t10_02.jpg, but counting it as an unreadable image.
Extracted features from data-for-any/convert/train/male/aug_7_S406-02-t10_01.jpg
Extracted features from data-for-any/convert/train/male/S419-07-t10_01.jpg
No face detected in data-for-any/convert/train/male/aug_0_S407-03-t10_02.jpg, but counting it as an unreadable image.
Extracted features from data-for-any/convert/train/male/aug_7_S400-01-t10_01.jpg
No face detected in data-fo