In [1]:
import os
import cv2
import numpy as np

# Function to apply random transformations to an image
def random_transform(img):
    rows, cols = img.shape

    # Random rotation
    angle = np.random.uniform(-30, 30)
    M = cv2.getRotationMatrix2D((cols / 2, rows / 2), angle, 1)
    img = cv2.warpAffine(img, M, (cols, rows), borderMode=cv2.BORDER_REFLECT)

    # Random translation
    tx = np.random.uniform(-0.3 * cols, 0.3 * cols)
    ty = np.random.uniform(-0.3 * rows, 0.3 * rows)
    M = np.float32([[1, 0, tx], [0, 1, ty]])
    img = cv2.warpAffine(img, M, (cols, rows), borderMode=cv2.BORDER_REFLECT)

    return img

# Function to load and preprocess training data
def load_training_data(data_folder, target_size=(224, 224)):
    images = []
    labels = []
    label_map = {}
    current_label = 0

    for person_name in os.listdir(data_folder):
        person_folder = os.path.join(data_folder, person_name)
        if os.path.isdir(person_folder):
            label_map[current_label] = person_name
            for filename in os.listdir(person_folder):
                img_path = os.path.join(person_folder, filename)
                img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
                if img is not None:
                    img_resized = cv2.resize(img, target_size)
                    augmented_imgs = [img_resized] + [
                        random_transform(img_resized) for _ in range(5)
                    ]
                    images.extend(augmented_imgs)
                    labels.extend([current_label] * len(augmented_imgs))
            current_label += 1

    return np.array(images), np.array(labels), label_map



# Path to the cropped faces folder
data_folder = "../data/faces3"

# Load training data
images, labels, label_map = load_training_data(data_folder)

print(f"Loaded {len(images)} images for training.")
print("Label Map:", label_map)

# Ensure images are in uint8 format (required for OpenCV)
images = images.astype('uint8')

# Train the LBPHFaceRecognizer model
recognizer = cv2.face.LBPHFaceRecognizer_create(radius=2, neighbors=8, grid_x=8, grid_y=8)

print("Training the LBPHFaceRecognizer model...")
recognizer.train(images, np.array(labels))

# Save the trained model
model_path = "../data/lbph_model.yml"
recognizer.write(model_path)
print(f"Model saved to {model_path}")

Loaded 3666 images for training.
Label Map: {0: 'matthias', 1: 'lasse', 2: 'akif', 3: 'bart', 4: 'florian', 5: 'daiane', 6: 'konrad', 7: 'senne', 8: 'michiel', 9: 'seppe', 10: 'youssef', 11: 'raul', 12: 'alper', 13: 'nelli'}
Training the LBPHFaceRecognizer model...
Model saved to ../data/lbph_model.yml


In [2]:

def predict_faces_on_folder(model_path, test_faces_folder, label_map):
    """
    Predict the labels of all faces in a folder using the trained LBPH model.
    Args:
        model_path: Path to the saved LBPH model.
        test_faces_folder: Path to the folder containing test face images.
        label_map: Dictionary mapping integer labels to person names.
    Returns:
        results: List of dictionaries with filename, predicted label, and confidence.
    """
    # Load the trained model
    recognizer = cv2.face.LBPHFaceRecognizer_create()
    recognizer.read(model_path)

    results = []

    for filename in os.listdir(test_faces_folder):
        test_image_path = os.path.join(test_faces_folder, filename)
        
        # Load and preprocess the test image
        
        test_img = cv2.imread(test_image_path, cv2.IMREAD_GRAYSCALE)
        if test_img is None:
            print(f"Failed to read {test_image_path}, skipping.")
            continue

        test_img_resized = cv2.resize(test_img, (224, 224))  # Resize to match training size

        # Predict the label
        label, confidence = recognizer.predict(test_img_resized)
        person_name = label_map[label]
        results.append({
            "filename": filename,
            "predicted_label": person_name,
            "confidence": confidence
        })

    return results

In [3]:
# Paths
model_path = "../data/lbph_model.yml"
test_faces_folder = "../data/faces4_test"
# label_map = {0: "senne", 1: "youssef", 2: "akif"}  # Update with your actual label map

# Predict on all test faces
results = predict_faces_on_folder(model_path, test_faces_folder, label_map)

In [4]:
# Sort results by filename
results = sorted(results, key=lambda x: x['filename'])

# Print results
print("Test Results:")
for result in results:
    print(f"Filename: {result['filename']}, Predicted: {result['predicted_label']}, Confidence: {result['confidence']}")

Test Results:
Filename: 0037_face_0.jpg, Predicted: senne, Confidence: 58.275749855816414
Filename: 0039_face_0.jpg, Predicted: senne, Confidence: 73.45496818371741
Filename: 0039_face_1.jpg, Predicted: youssef, Confidence: 58.0842313745586
Filename: 0039_face_2.jpg, Predicted: matthias, Confidence: 60.14213389760546
Filename: 0040_face_0.jpg, Predicted: seppe, Confidence: 69.08402443532239
Filename: 0040_face_1.jpg, Predicted: alper, Confidence: 51.54332270470053
Filename: 0040_face_3.jpg, Predicted: seppe, Confidence: 70.90705423599509
Filename: 0043_face_0.jpg, Predicted: seppe, Confidence: 93.61705197459358
Filename: 0043_face_1.jpg, Predicted: michiel, Confidence: 73.16671000477042
Filename: 0045_face_0.jpg, Predicted: alper, Confidence: 70.67723281493001
Filename: 0051_face_0.jpg, Predicted: youssef, Confidence: 85.3638853885874
Filename: 0058_face_0.jpg, Predicted: bart, Confidence: 66.0778096481179
Filename: 0062_face_0.jpg, Predicted: konrad, Confidence: 79.23217776571477
File

In [5]:
def group_results_by_image(results):
    """
    Group individual face predictions into a list of predicted labels for each image.
    Args:
        results: List of dictionaries with filename, predicted label, and confidence.
                 Example: [{"filename": "0125_face_0.jpg", "predicted_label": "senne", ...}, ...]
    Returns:
        grouped_results: List of dictionaries with image filename and grouped predicted labels.
                         Example: [{"filename": "0125.jpg", "predicted_labels": ["senne", "matthias"]}]
    """
    grouped_data = {}

    for result in results:
        # Extract the original image filename (before "_face_X")
        base_filename = result["filename"].split("_face")[0]

        # Add the predicted label to the grouped data
        if base_filename not in grouped_data:
            grouped_data[base_filename] = []
        if result["predicted_label"] != "unknown":  # Skip "unknown" predictions if needed
            grouped_data[base_filename].append(result["predicted_label"].lower())  # Convert to lowercase

    # Convert grouped data to the desired format
    grouped_results = [{"filename": filename, "predicted_labels": labels} for filename, labels in grouped_data.items()]
    return grouped_results

# Example usage
grouped_results = group_results_by_image(results)

# Print grouped results
print(grouped_results)


[{'filename': '0037', 'predicted_labels': ['senne']}, {'filename': '0039', 'predicted_labels': ['senne', 'youssef', 'matthias']}, {'filename': '0040', 'predicted_labels': ['seppe', 'alper', 'seppe']}, {'filename': '0043', 'predicted_labels': ['seppe', 'michiel']}, {'filename': '0045', 'predicted_labels': ['alper']}, {'filename': '0051', 'predicted_labels': ['youssef']}, {'filename': '0058', 'predicted_labels': ['bart']}, {'filename': '0062', 'predicted_labels': ['konrad']}, {'filename': '0066', 'predicted_labels': ['seppe', 'matthias']}, {'filename': '0078', 'predicted_labels': ['youssef']}, {'filename': '0103', 'predicted_labels': ['nelli']}, {'filename': '0125', 'predicted_labels': ['raul']}, {'filename': '0139', 'predicted_labels': ['youssef']}, {'filename': '0160', 'predicted_labels': ['youssef']}, {'filename': '0161', 'predicted_labels': ['florian']}, {'filename': '0166', 'predicted_labels': ['akif']}, {'filename': '0168', 'predicted_labels': ['konrad']}, {'filename': '0174', 'pre

In [6]:
import os
import pandas as pd

def create_submission_csv_from_grouped_results(grouped_results, output_csv_path, test_images_folder):
    """
    Create a Kaggle submission CSV file from grouped prediction results.
    Args:
        grouped_results: List of dictionaries with image filename and grouped labels.
        output_csv_path: Path to save the submission CSV file.
        test_images_folder: Folder containing all test images to ensure no image is missing.
    """
    # Get all filenames in the test images folder without extensions
    all_filenames = sorted(
        [os.path.splitext(filename)[0] for filename in os.listdir(test_images_folder) 
         if filename.endswith(('.jpg', '.jpeg', '.png'))]
    )
    
    # Map grouped results by filename for quick lookup
    grouped_dict = {group["filename"].split('.')[0]: ";".join(group["predicted_labels"]) for group in grouped_results}

    # Create submission data, ensuring every file has an entry
    submission_data = []
    for filename in all_filenames:
        label_name = grouped_dict.get(filename, "nothing")  # Default to "nothing" if no labels
        submission_data.append({"image": filename, "label_name": label_name})

    # Save to a CSV
    submission_df = pd.DataFrame(submission_data)
    submission_df.to_csv(output_csv_path, index=False)
    print(f"Submission file saved to {output_csv_path}")

# Example usage
test_images_folder = "../data/images/test_images/cleaned_images"
output_csv_path = "../submission.csv"
create_submission_csv_from_grouped_results(grouped_results, output_csv_path, test_images_folder)

Submission file saved to ../submission.csv
